In [22]:
#imports
import requests
import json
import pandas as pd
import numpy as np
import warnings
import random
import matplotlib.pyplot as plt
from datetime import datetime
import time as clock

start_time = clock.time()
# Sensors_Data = pd.read_json('data/W512_readings.json')
# Aircon_Data = pd.read_json('data/W512_aircon_status.json')
# Weather_Data = pd.read_json('data/Weather_data.json')

Sensors_Data = pd.read_json('../test/W512_readings1.json')
Aircon_Data = pd.read_json('../test/W512_aircon_status1.json')
Weather_Data = pd.read_json('../test/Weather_data1.json')
# Aircon_Data = Aircon_Data.iloc[3194:] #testing out new dataset (data is cleaned before pulling)
# Normalize the data
Aircon_rows = []

for _, row in Aircon_Data.iterrows():
    date = row['date']
    time = row['time']
    
    flattened_row = {
        "date": date,
        "time": time
    }
    
    fc_readings = row['FC_FullStatus_Readings']
    if fc_readings and isinstance(fc_readings, dict):
        for unit, data in fc_readings.items():
            if any(data.get("Set_Point", None) == 404.0 for data in fc_readings.values()):
                continue
            flattened_row[f"{unit}_Status"] = data.get("Status", None)
            flattened_row[f"{unit}_Fan_Status"] = data.get("Fan_Status", None)
            flattened_row[f"{unit}_Set_Point"] = data.get("Set_Point", None)
            flattened_row[f"{unit}_Operation_Mode"] = data.get("Operation_Mode", None)
    
    Aircon_rows.append(flattened_row)

Sensors_rows = []
include_keys_1 = ["24E124725E285123", "24E124725E331695","24E124725E331744",
                      "24E124725E332483","24E124725E290348","24E124725E331733","24E124725E286745","24E124725E332564" # "24E124136D316361" is supposed to be outdoor but it is not outdoor yet
                         "24E124757E150866","24E124757E150896"]

include_keys_2 = ["Sensor_1","Sensor_3","Sensor_6"]
for _, row in Sensors_Data.iterrows():
    invalid_input = False
    
    date = row['date']
    time = row['time']
    
    flattened_row = {
        "date": date,
        "time": time
    }
    
    
    lorawan_readings = row['Lorawan_Readings']
    
    if isinstance(lorawan_readings, dict):
        for unit, data in lorawan_readings.items():
            if unit not in include_keys_1:
                continue
            if isinstance(data, dict):  # Ensure that each item in Lorawan_Readings is a dictionary
                for key, value in data.items():
                    
                    flattened_row[f"{unit}_{key}"] = value
            
    energy_readings = row['Energy_Readings']
    total_power = 0
    total_energy = 0
    invalid_input_power = False
    invalid_input_energy = False
    
    if energy_readings and isinstance(energy_readings, dict):
        for unit, data in energy_readings.items():
            if unit not in include_keys_2:
                continue
            power = data.get('Power', None)
            energy = data.get('Energy', None)
            if power is None:
                invalid_input_power = True
            if energy is None:
                invalid_input_energy = True
            total_power += power
            total_energy += energy
        
    if invalid_input_power:
        total_power = None
    if invalid_input_energy:
        total_energy = None
        
    flattened_row["Total_Energy"] = total_energy
    flattened_row["Total_Power"] = total_power
    
    Sensors_rows.append(flattened_row)


# Normalize the data
Weather_rows = []

for _, row in Weather_Data.iterrows():
    date = row['date']
    time = row['time']
    
    flattened_row = {
        "date": date,
        "time": time
    }
    
    flattened_row['weather_status']= row['result']['weather_status']
    flattened_row['weather_temp']= row['result']['weather_temp']
    flattened_row['weather_humidity']= row['result']['weather_humidity']
    
    Weather_rows.append(flattened_row)



Aircon_Normalize_Data = pd.DataFrame(Aircon_rows)
Sensors_Normalize_Data = pd.DataFrame(Sensors_rows)
Weather_Normalize_Data = pd.DataFrame(Weather_rows)
# For Aircon_Normalize_Data
Aircon_Normalize_Data['datetime_str'] = Aircon_Normalize_Data['date'].astype(str) + ' ' + Aircon_Normalize_Data['time']
Aircon_Normalize_Data['datetime'] = Aircon_Normalize_Data['datetime_str'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d %I:%M:%S %p"))
Aircon_Normalize_Data['timestamp'] = Aircon_Normalize_Data['datetime'].apply(lambda x: int(x.timestamp()))

# For Sensors_Normalize_Data
Sensors_Normalize_Data['datetime_str'] = Sensors_Normalize_Data['date'].astype(str) + ' ' + Sensors_Normalize_Data['time']
Sensors_Normalize_Data['datetime'] = Sensors_Normalize_Data['datetime_str'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d %I:%M:%S %p"))
Sensors_Normalize_Data['timestamp'] = Sensors_Normalize_Data['datetime'].apply(lambda x: int(x.timestamp()))

# For Weather_Normalize_Data
Weather_Normalize_Data['datetime_str'] = Weather_Normalize_Data['date'].astype(str) + ' ' + Weather_Normalize_Data['time']
Weather_Normalize_Data['datetime'] = Weather_Normalize_Data['datetime_str'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d %I:%M:%S %p"))
Weather_Normalize_Data['timestamp'] = Weather_Normalize_Data['datetime'].apply(lambda x: int(x.timestamp()))

merged_data = pd.merge_asof(
    Aircon_Normalize_Data,  # Left DataFrame
    Sensors_Normalize_Data,      # Right DataFrame
    on='timestamp',   # Key column
    direction='nearest'    # Match the nearest time
)
merged_data = pd.merge_asof(
    merged_data,  # Left DataFrame
    Weather_Normalize_Data,      # Right DataFrame
    on='timestamp',   # Key column
    direction='nearest'    # Match the nearest time
)

temperature_col = [
    col for col in merged_data.columns 
    if "temperature" in col.lower()
]
humidity_col = [
    col for col in merged_data.columns 
    if "humidity" in col.lower()
]
c02_col = [
    col for col in merged_data.columns
    if "co2" in col.lower()
]

occupancy_col_total_in = [
    col for col in merged_data.columns
    if "total_in" in col.lower()
]

occupancy_col_total_out = [
    col for col in merged_data.columns
    if "total_out" in col.lower()
]

def get_unit_columns(unit_number, columns):
    return [col for col in columns if f"FC_Unit_{unit_number}" in col]

aircon_units = len([
    col for col in merged_data.columns
    if "FC_Unit_" in col and "_Status" in col and "Fan" not in col
])

aircon_units_cols = {}

for unit in range(1, aircon_units + 1):
    aircon_units_cols[f'Unit_{unit}'] = get_unit_columns(unit, merged_data.columns)


final_data = pd.DataFrame()
final_data["timestamp"] = merged_data["timestamp"]

final_data["temperature"] = merged_data[temperature_col].apply(lambda x: round(x.mean(), 3), axis=1)
final_data["humidity"] = merged_data[humidity_col].apply(lambda x: round(x.mean(),3), axis=1)
final_data["co2"] = merged_data[c02_col].apply(lambda x: round(x.mean(),3), axis=1)

final_data['power_consumption'] = merged_data['Total_Power']
final_data['energy_consumption'] = merged_data['Total_Energy']

final_data["weather_status"] = merged_data["weather_status"]
final_data["weather_temp"] = merged_data['weather_temp']
final_data["weather_humid"] = merged_data['weather_humidity']


merged_data[occupancy_col_total_in] = merged_data[occupancy_col_total_in].fillna(method='bfill')
merged_data[occupancy_col_total_out] = merged_data[occupancy_col_total_out].fillna(method='bfill')

final_data['occupancy'] = (
    merged_data[occupancy_col_total_in].sum(axis=1) - merged_data[occupancy_col_total_out].sum(axis=1)
).clip(lower=0)


for unit, columns in aircon_units_cols.items():
    for column in columns:
        if 'set_point' in column:
            final_data[column] = merged_data[column].replace(0, pd.NA).ffill()
        else:
            final_data[column] = merged_data[column].replace("ERROR", pd.NA).ffill()

final_data.dropna(inplace=True)
# final_data.reset_index(drop=True, inplace=True)#######################################


def getFCData(data, row_index):
    settings = []
    for i in range(1, aircon_units + 1):
        settings.append(data[f"FC_Unit_{i}_Status"].iloc[row_index])
        settings.append(data[f"FC_Unit_{i}_Fan_Status"].iloc[row_index])
        settings.append(data[f"FC_Unit_{i}_Set_Point"].iloc[row_index])
        settings.append(data[f"FC_Unit_{i}_Operation_Mode"].iloc[row_index])
        
    return settings

def is_same_settings(data, curr_row_index, next_row_index):   
    return True if (getFCData(data, curr_row_index) == getFCData(data, next_row_index)) else False


def is_all_off(data, curr_row_index, check_for_off):
    for i in range(1, aircon_units + 1):
        if data[f"FC_Unit_{i}_Status"].iloc[curr_row_index] == "ON":
            return not check_for_off
        
    return check_for_off
    
def is_within_temperature_range(current_temp, next_temp):
    range_factor = 0.5
    if current_temp - range_factor <= next_temp <= current_temp + range_factor:
        return True
    return False
final_data.to_csv('final_data_W512_Analysis.csv', index=False)

print("final_data_created")

aircon_status_result = pd.DataFrame()
aircon_status_getBestSettings_result = pd.DataFrame()
total_final_rows = final_data.shape[0]
Aircon_Normalize_Data = Aircon_Normalize_Data.drop(['date', 'time', 'datetime_str', 'datetime', 'timestamp'], axis=1)


for i in range(total_final_rows - 1):
    if is_all_off(final_data, i, True):
        continue
    
    rows = []
    time_taken = []
    energy_consumption = []
    previous_temp = []
    previous_humi = []
    
    curr_timestamp = final_data["timestamp"].iloc[i]
    curr_energy = final_data["energy_consumption"].iloc[i]
    curr_temperature = final_data["temperature"].iloc[i]
    curr_humidity = final_data["humidity"].iloc[i]

    

    
    while i < total_final_rows - 1 and is_same_settings(final_data, i + 1, i):
        timetaken = final_data["timestamp"].iloc[i + 1] - curr_timestamp
        energyConsumption = final_data["energy_consumption"].iloc[i + 1] - curr_energy
        if timetaken < 15 or timetaken > 3600:
            break
        if energyConsumption < 0:
            break
        rows.append(i + 1)
        time_taken.append(timetaken)
        energy_consumption.append(final_data["energy_consumption"].iloc[i + 1] - curr_energy)
        previous_temp.append(final_data["temperature"].iloc[i + 1])
        previous_humi.append(final_data["humidity"].iloc[i + 1])
        
        i += 1
        
    temp_df = pd.DataFrame({
            'timestamp': [curr_timestamp],
            'rows': [rows],
            'time_taken': [time_taken],
            'energy_consumption': [energy_consumption],
            'previous_temp': [previous_temp],
            'previous_humi': [previous_humi],
            'current_temp': [curr_temperature],
            'current_humi': [curr_humidity],
        })
    for col in Aircon_Normalize_Data.columns:
        temp_df[col] = final_data[col].iloc[i]

    
    
        
    aircon_status_result = pd.concat([aircon_status_result, temp_df], ignore_index=False)

end_time = clock.time()
elapsed_time = end_time - start_time
elapsed_time_minutes = elapsed_time / 60
print(f"Processing time: {elapsed_time_minutes:.2f} minutes")
        
        
        
print("Finished")
aircon_status_result = aircon_status_result.sort_values(by=['current_temp'], ascending=False)
aircon_status_result.to_csv('aircon_status_W512.csv', index=False)
aircon_status_result.info()



  merged_data[occupancy_col_total_in] = merged_data[occupancy_col_total_in].fillna(method='bfill')
  merged_data[occupancy_col_total_out] = merged_data[occupancy_col_total_out].fillna(method='bfill')


final_data_created
Processing time: 0.38 minutes
Finished
<class 'pandas.core.frame.DataFrame'>
Index: 2796 entries, 0 to 0
Data columns (total 40 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   timestamp                 2796 non-null   int64  
 1   rows                      2796 non-null   object 
 2   time_taken                2796 non-null   object 
 3   energy_consumption        2796 non-null   object 
 4   previous_temp             2796 non-null   object 
 5   previous_humi             2796 non-null   object 
 6   current_temp              2796 non-null   float64
 7   current_humi              2796 non-null   float64
 8   FC_Unit_1_Status          2796 non-null   object 
 9   FC_Unit_1_Fan_Status      2796 non-null   object 
 10  FC_Unit_1_Set_Point       2796 non-null   float64
 11  FC_Unit_1_Operation_Mode  2796 non-null   object 
 12  FC_Unit_2_Status          2796 non-null   object 
 13  FC_Unit_2_Fan

In [23]:
print(merged_data[occupancy_col_total_in])

      24E124757E150932_line_1_total_in  24E124757E150790_line_1_total_in  \
0                                  1.0                              21.0   
1                                  1.0                              21.0   
2                                  1.0                              21.0   
3                                  1.0                              21.0   
4                                  1.0                              21.0   
...                                ...                               ...   
8829                               3.0                              73.0   
8830                               3.0                              73.0   
8831                               3.0                              74.0   
8832                               3.0                              74.0   
8833                               3.0                              74.0   

      24E124757E150896_line_1_total_in  24E124725E286745_line_1_total_in  \
0          