In [2]:
#imports
import requests
import json
import pandas as pd
import numpy as np
import warnings
import random
from tensorflow.keras import backend as K
import matplotlib.pyplot as plt
from datetime import datetime
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [18]:
aircon_units = 6

# Data Cleaning ( GETTING THE NECESSARY DATA )

# Getting MongoDB data ( JSON )
Aircon_Data = pd.read_json("data/W512.spgg_aircon_status.json", convert_dates=False)
Sensors_Data = pd.read_json("data/W512.spgg_readings.json", convert_dates=False)

# Aircon_Data = pd.read_json("data/W512.w512_readings.json", convert_dates=False)
# Sensors_Data = pd.read_json("data/W512.w512_aircon_status.json", convert_dates=False)

# Normalize the data
Aircon_rows = []

for _, row in Aircon_Data.iterrows():
    date = row['date']
    time = row['time']
    
    flattened_row = {
        "date": date,
        "time": time
    }
    
    fc_readings = row['FC_FullStatus_Readings']
    
    for unit, data in fc_readings.items():
        flattened_row[f"{unit}_status"] = data.get("Status", None)
        flattened_row[f"{unit}_fan_status"] = data.get("Fan_Status", None)
        flattened_row[f"{unit}_set_point"] = data.get("Set_Point", None)
        flattened_row[f"{unit}_operation_mode"] = data.get("Operation_Mode", None)
    
    Aircon_rows.append(flattened_row)

Sensors_rows = []
for _, row in Sensors_Data.iterrows():
    invalid_input = False
    
    date = row['date']
    time = row['time']
    
    flattened_row = {
        "date": date,
        "time": time
    }
    
    
    lorawan_readings = row['Lorawan_Readings']
    
    if isinstance(lorawan_readings, dict):
        for unit, data in lorawan_readings.items():
            if isinstance(data, dict):  # Ensure that each item in Lorawan_Readings is a dictionary
                for key, value in data.items():
                    flattened_row[f"{unit}_{key}"] = value
            
    energy_readings = row['Energy_Readings']
    total_power = 0
    total_energy = 0
    invalid_input_power = False
    invalid_input_energy = False
    
    for unit, data in energy_readings.items():
        power = data.get('Power', None)
        energy = data.get('Energy', None)
        if power is None:
            invalid_input_power = True
        if energy is None:
            invalid_input_energy = True
        total_power += power
        total_energy += energy
        
    if invalid_input_power:
        total_power = None
    if invalid_input_energy:
        total_energy = None
        
    flattened_row["Total_Energy"] = total_energy
    flattened_row["Total_Power"] = total_power
    
    Sensors_rows.append(flattened_row)

    
Aircon_Normalize_Data = pd.DataFrame(Aircon_rows)
Sensors_Normalize_Data = pd.DataFrame(Sensors_rows)

Aircon_Normalize_Data['datetime_str'] = Aircon_Normalize_Data['date'] + ' ' + Aircon_Normalize_Data['time']
Aircon_Normalize_Data['datetime'] = Aircon_Normalize_Data['datetime_str'].apply(lambda x: datetime.strptime(x, "%a %b %d %Y %I:%M:%S %p"))
Aircon_Normalize_Data['timestamp'] = Aircon_Normalize_Data['datetime'].apply(lambda x: int(x.timestamp()))

Sensors_Normalize_Data['datetime_str'] = Sensors_Normalize_Data['date'] + ' ' + Sensors_Normalize_Data['time']
Sensors_Normalize_Data['datetime'] = Sensors_Normalize_Data['datetime_str'].apply(lambda x: datetime.strptime(x, "%a %b %d %Y %I:%M:%S %p"))
Sensors_Normalize_Data['timestamp'] = Sensors_Normalize_Data['datetime'].apply(lambda x: int(x.timestamp()))

merged_data = pd.merge_asof(
    Aircon_Normalize_Data,  # Left DataFrame
    Sensors_Normalize_Data,      # Right DataFrame
    on='timestamp',   # Key column
    direction='nearest'    # Match the nearest time
)

temperature_col = [
    col for col in merged_data.columns 
    if "temperature" in col.lower() and "24e124136d336145" not in col.lower()
]

outdoor_col = [
    col for col in merged_data.columns 
    if "24e124136d336145" in col.lower()
]

humidity_col = [
    col for col in merged_data.columns 
    if "humidity" in col.lower()
]

occupancy_col_total_in = [
    col for col in merged_data.columns
    if "line_1_total_in" in col.lower()
]

occupancy_col_total_out = [
    col for col in merged_data.columns
    if "line_1_total_out" in col.lower()
]

def get_unit_columns(unit_number, columns):
    return [col for col in columns if f"FC_Unit_{unit_number}" in col]

aircon_units_cols = {}

for unit in range(1, aircon_units + 1):
    aircon_units_cols[f'Unit_{unit}'] = get_unit_columns(unit, merged_data.columns)
    

# Creating the wanted fields
final_data = pd.DataFrame()
final_data["timestamp"] = merged_data["timestamp"]

final_data["temperature"] = merged_data[temperature_col].apply(lambda x: round(x.mean(), 3), axis=1)
final_data["humidity"] = merged_data[humidity_col].apply(lambda x: round(x.mean(),3), axis=1)

final_data['power_consumption'] = merged_data['Total_Power']
final_data['energy_consumption'] = merged_data['Total_Energy']

final_data["outdoor_temperature"] = merged_data[outdoor_col]['24E124136D336145_temperature'].ffill()
final_data["outdoor_humidity"] = merged_data[outdoor_col]['24E124136D336145_humidity'].ffill()

merged_data[occupancy_col_total_in] = merged_data[occupancy_col_total_in].fillna(method='bfill')
merged_data[occupancy_col_total_out] = merged_data[occupancy_col_total_out].fillna(method='bfill')

final_data['occupancy'] = (
    merged_data[occupancy_col_total_in].sum(axis=1) - merged_data[occupancy_col_total_out].sum(axis=1)
).clip(lower=0)

for unit, columns in aircon_units_cols.items():
    for column in columns:
        if 'set_point' in column:
            final_data[column] = merged_data[column].replace(0, pd.NA).ffill()
        else:
            final_data[column] = merged_data[column].replace("ERROR", pd.NA).ffill()

final_data.dropna(inplace=True)


# Convert all String values to Numberic Values
# final_data = final_data.replace("ON", 1)
# final_data = final_data.replace("OFF", 0)

# final_data = final_data.replace("AUTO", 1)
# final_data = final_data.replace("COOL", 2)
# final_data = final_data.replace("DRY", 3)
# final_data = final_data.replace("HEAT", 4)
# final_data = final_data.replace("FAN", 5)

# final_data = final_data.replace("AUTO", 1)
# final_data = final_data.replace("QUIET", 2)
# final_data = final_data.replace("LOW", 3)
# final_data = final_data.replace("MED", 4)
# final_data = final_data.replace("HIGH", 5)
# final_data = final_data.replace("MED-LOW", 6)
# final_data = final_data.replace("MED-HIGH", 7)

final_data.to_csv('final_data2.csv', index=False)

#print(final_data.columns)

#print(final_data.head())

def getFCData(data, row_index):
    settings = []
    for i in range(1, aircon_units + 1):
        settings.append(data[f"FC_Unit_{i}_status"].iloc[row_index])
        settings.append(data[f"FC_Unit_{i}_fan_status"].iloc[row_index])
        settings.append(data[f"FC_Unit_{i}_set_point"].iloc[row_index])
        settings.append(data[f"FC_Unit_{i}_operation_mode"].iloc[row_index])
        
    return settings

def is_same_settings(data, curr_row_index, next_row_index):   
    return True if (getFCData(data, curr_row_index) == getFCData(data, next_row_index)) else False

def is_all_off(data, curr_row_index, check_for_off):
    for i in range(1, aircon_units + 1):
        if data[f"FC_Unit_{i}_status"].iloc[curr_row_index] == "ON":
            return not check_for_off
        
    return check_for_off


# Get the time taken and energy consumed to get to the *target temp
# 0, 1, 2, 3
aircon_status_result = pd.DataFrame()
#aircon_status_result = pd.DataFrame(columns=['time_taken', 
#                                              'energy_consumption', 
#                                              'previous_temp', 
#                                              'previous_humi',
#                                              'current_temp',
#                                              'current_humi'])

total_final_rows = final_data.shape[0]
Aircon_Normalize_Data = Aircon_Normalize_Data.drop(['date', 'time', 'datetime_str', 'datetime', 'timestamp'], axis=1)
for i in range(total_final_rows - 1, -1, -1):
    
    rows = []
    time_taken = []
    energy_consumption = []
    previous_temp = []
    previous_humi = []
    
    curr_timestamp = final_data["timestamp"].iloc[i]
    curr_energy = final_data["energy_consumption"].iloc[i]
    curr_temperature = final_data["temperature"].iloc[i]
    curr_humidity = final_data["humidity"].iloc[i]
    
    while i >= 0 and is_same_settings(final_data, i - 1, i):
        rows.append(i - 1)
        time_taken.append(curr_timestamp - final_data["timestamp"].iloc[i - 1])
        energy_consumption.append(curr_energy - final_data["energy_consumption"].iloc[i - 1])
        previous_temp.append(final_data["temperature"].iloc[i - 1])
        previous_humi.append(final_data["humidity"].iloc[i - 1])
        
        i -= 1
        
    temp_df = pd.DataFrame({
            'timestamp': [curr_timestamp],
            'rows': [rows],
            'time_taken': [time_taken],
            'energy_consumption': [energy_consumption],
            'previous_temp': [previous_temp],
            'previous_humi': [previous_humi],
            'outdoor_temp': [final_data['outdoor_temperature'].iloc[i]],
            'outdoor_humi': [final_data['outdoor_humidity'].iloc[i]],
            'current_temp': [curr_temperature],
            'current_humi': [curr_humidity]
        })
    for col in Aircon_Normalize_Data.columns:
        temp_df[col] = final_data[col].iloc[i]
    
        
    aircon_status_result = pd.concat([aircon_status_result, temp_df], ignore_index=False)
        
        
        
        
        
print("Finished")
aircon_status_result = aircon_status_result.sort_values(by=['current_temp'], ascending=False)
aircon_status_result.to_csv('aircon_status.csv', index=False)
aircon_status_result.info()


  merged_data[occupancy_col_total_in] = merged_data[occupancy_col_total_in].fillna(method='bfill')
  merged_data[occupancy_col_total_out] = merged_data[occupancy_col_total_out].fillna(method='bfill')
  final_data[column] = merged_data[column].replace(0, pd.NA).ffill()
  final_data[column] = merged_data[column].replace(0, pd.NA).ffill()
  final_data[column] = merged_data[column].replace(0, pd.NA).ffill()
  final_data[column] = merged_data[column].replace(0, pd.NA).ffill()
  final_data[column] = merged_data[column].replace(0, pd.NA).ffill()


Finished
<class 'pandas.core.frame.DataFrame'>
Index: 2154 entries, 0 to 0
Data columns (total 34 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   timestamp                 2154 non-null   int64  
 1   rows                      2154 non-null   object 
 2   time_taken                2154 non-null   object 
 3   energy_consumption        2154 non-null   object 
 4   previous_temp             2154 non-null   object 
 5   previous_humi             2154 non-null   object 
 6   outdoor_temp              2154 non-null   float64
 7   outdoor_humi              2154 non-null   float64
 8   current_temp              2154 non-null   float64
 9   current_humi              2154 non-null   float64
 10  FC_Unit_1_status          2154 non-null   object 
 11  FC_Unit_1_fan_status      2154 non-null   object 
 12  FC_Unit_1_set_point       2154 non-null   float64
 13  FC_Unit_1_operation_mode  2154 non-null   object 
 14  FC_Unit

In [5]:
target_temp = 23.0
target_humi = 65

# Same Importance
# If energy_factor > time_factor, consider energy more
time_factor = 0.5
energy_factor = 0.5

# The lower the more accurate it is
acceptable_range = 0.8

            
def getRowData(row_index):
    temperature = aircon_status_result["current_temp"].iloc[row_index]
    humidity = aircon_status_result["current_humi"].iloc[row_index]
    
    return [temperature, humidity]

def getArrayData(row_index, array_index):
    time_taken = aircon_status_result["time_taken"].iloc[row_index]
    energy_consumption = aircon_status_result["energy_consumption"].iloc[row_index]
    temperature = aircon_status_result["previous_temp"].iloc[row_index]
    humidity = aircon_status_result["previous_humi"].iloc[row_index]
    
    return [temperature[array_index], humidity[array_index], time_taken[array_index], energy_consumption[array_index]]

def comparePath(best_path, current_path):    
    if best_path['factor'] > current_path['factor']:
        return True
    
    return False
    

total_rows = aircon_status_result.shape[0]

total_paths = 0

paths = {}


# returns energy consumption and time taken and best pave
# every combi would have different settings
def findBestCombi(current_row_index):
    global paths
    # Target not reached
    # ALl aircon status is OFF, not useful
    
    curr_temperature, curr_humidity = getRowData(current_row_index)
    
    if is_all_off(aircon_status_result, current_row_index, True):
        paths[current_row_index] = {'energy_consumption': [], 'starting_temp': curr_temperature, 'starting_humi': curr_humidity, 'time_taken': [], 'factor': float('inf'), 'path': []}
        return paths[current_row_index]
    
    # If current_row_index is already checked before
    if current_row_index in paths:
        # Can be EMPTY or VALID PATH
        return paths[current_row_index]
    
    # Check if using this status hit the target temp and humi
    for i in range(len(aircon_status_result['rows'].iloc[current_row_index])):
        
        array_data = getArrayData(current_row_index, i)
        if (abs(array_data[0] - target_temp) < acceptable_range and
            abs(array_data[1] - target_humi) < (acceptable_range*10)):
            print("Target Found")
            # Target Found
            # Put entry in paths
            curr_path = {
                'energy_consumption': [array_data[3]],
                'time_taken': [array_data[2]],
                'factor': array_data[3] * energy_factor + array_data[2] * time_factor,
                'starting_temp': curr_temperature,
                'starting_humi': curr_humidity,
                'ending_temp': array_data[0],
                'ending_humi': array_data[1],
                'path': [current_row_index]
            }
            paths[current_row_index] = curr_path
            return paths[current_row_index]
    
    # Start of Backtracking
    for i in range(len(aircon_status_result['rows'].iloc[current_row_index])):
        previous_data = getArrayData(current_row_index, i)
        for j in range(current_row_index + 1, total_rows):
            next_data = getRowData(j)
            if (abs(previous_data[0] - next_data[0]) < acceptable_range and
                abs(previous_data[1] - next_data[1]) < (acceptable_range*10)):

                path = findBestCombi(j)

                # There is a valid path
                if path and path['energy_consumption']:
                    curr_path = {
                        'energy_consumption': [previous_data[3]] + path['energy_consumption'],
                        'time_taken': [previous_data[2]] + path['time_taken'],
                        'starting_temp': previous_data[0],
                        'starting_humi': previous_data[1],
                        'ending_temp': path['ending_temp'],
                        'ending_humi': path['ending_humi'],
                        'path': [current_row_index] + path['path'] 
                    }
                    curr_path['factor'] = sum(curr_path['energy_consumption']) * energy_factor + sum(curr_path['time_taken']) * time_factor
                    if current_row_index in paths:
                        if comparePath(paths[current_row_index], curr_path):
                            paths[current_row_index] = curr_path
                    else:
                        paths[current_row_index] = curr_path
    # No valid paths to target temp and humi
    if current_row_index not in paths:
        paths[current_row_index] = {'energy_consumption': [], 'starting_temp': curr_temperature, 'starting_humi': curr_humidity, 'time_taken': [], 'factor': float('inf'), 'path': []}
    
    # return best path or empty path
    return paths[current_row_index]
   
    
# TO get best path
# find current temp and humi, then sort by 'comparison'


for i in range(total_rows):
    data_temperature, data_humidity = getRowData(i)
    print(i)
    if i not in paths:
        for j in range(len(aircon_status_result['rows'].iloc[i])):
            array_data = getArrayData(i, j)
            path = findBestCombi(i)

            # There is a valid path
            if path and path['energy_consumption']:
                curr_path = {
                    'energy_consumption': [array_data[3]] + path['energy_consumption'],
                    'time_taken': [array_data[2]] + path['time_taken'],
                    'starting_temp': array_data[0],
                    'starting_humi': array_data[1],
                    'ending_temp': path['ending_temp'],
                    'ending_humi': path['ending_humi'],
                    'path': [i] + path['path'] 
                }
                curr_path['factor'] = sum(curr_path['energy_consumption']) * energy_factor + sum(curr_path['time_taken']) * time_factor
                if i in paths:
                    if comparePath(paths[i], curr_path):
                        paths[i] = curr_path
                else:
                    paths[i] = curr_path
            else:
                paths[i] = {'energy_consumption': [], 'starting_temp': array_data[0], 'starting_humi': array_data[1], 'time_taken': [], 'factor': float('inf'), 'path': []}
    
print(paths)



0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [12]:
current_temperature, current_humidity = 28.0, 65

filtered_paths = {
    key: value for key, value in paths.items()
    if abs(value['starting_temp'] - current_temperature) < acceptable_range and
       abs(value['starting_humi'] - current_humidity) < acceptable_range*10
}
smallest_factor_path = min(filtered_paths.keys(), key=lambda x: filtered_paths[x]['factor'])

print(paths[smallest_factor_path])
def expandPath(row_index):
    for i in range(1, aircon_units + 1):
        print("UNIT " + str(i) + ":", end=' ')
        print(aircon_status_result[f"FC_Unit_{i}_status"].iloc[row_index], end=' ')
        print(aircon_status_result[f"FC_Unit_{i}_fan_status"].iloc[row_index], end=' ')
        print(aircon_status_result[f"FC_Unit_{i}_set_point"].iloc[row_index], end=' ')
        print(aircon_status_result[f"FC_Unit_{i}_operation_mode"].iloc[row_index], end=' ')
        print("")

for index, value in enumerate(paths[smallest_factor_path]['path']):
    expandPath(value)
    print("For", end = ' ')
    hours, remainder = divmod(paths[smallest_factor_path]['time_taken'][index], 3600)
    minutes, seconds = divmod(remainder, 60)

    print(f"Hours: {hours}, Minutes: {minutes}, Seconds: {seconds}")
    print("")

{'energy_consumption': [0.3500000000003638, 0.7100000000000364, 1.25, 0.680000000000291, 0.5, 0.06000000000040018], 'time_taken': [629, 629, 628, 628, 628, 36], 'starting_temp': 28.3, 'starting_humi': 61.929, 'ending_temp': 23.117, 'ending_humi': 69.643, 'path': [925, 1336, 1511, 1895, 2019, 2070], 'factor': 1590.7750000000005}
UNIT 1: ON MED 24.0 COOL 
UNIT 2: ON LOW 24.0 COOL 
UNIT 3: OFF MED 22.0 COOL 
UNIT 4: OFF HIGH 22.5 COOL 
UNIT 5: OFF QUIET 22.5 COOL 
UNIT 6: OFF LOW 22.5 COOL 
For Hours: 0, Minutes: 10, Seconds: 29

UNIT 1: OFF MED 22.0 COOL 
UNIT 2: OFF LOW 24.0 COOL 
UNIT 3: ON MED 22.0 COOL 
UNIT 4: ON HIGH 22.5 COOL 
UNIT 5: ON QUIET 22.5 COOL 
UNIT 6: ON LOW 22.5 COOL 
For Hours: 0, Minutes: 10, Seconds: 29

UNIT 1: OFF MED 22.0 COOL 
UNIT 2: OFF LOW 24.0 COOL 
UNIT 3: ON HIGH 22.0 COOL 
UNIT 4: ON HIGH 22.0 COOL 
UNIT 5: ON HIGH 22.0 COOL 
UNIT 6: ON HIGH 22.0 COOL 
For Hours: 0, Minutes: 10, Seconds: 28

UNIT 1: OFF MED 22.0 COOL 
UNIT 2: OFF LOW 24.0 COOL 
UNIT 3: ON