In [211]:
import requests
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

#Reading Data from CSV Files
temperature_readings = pd.read_csv('data/data.csv') #original dataset for temp readings 
temperature_readings = temperature_readings.interpolate(method='linear')#fill in null values


weather_readings = pd.read_csv('data/weatherData.csv') #orignal dataset for weather readings

# Converting date and time in temperature_readings to UNIX timestamp for comparision 
temperature_readings['datetime_str'] = temperature_readings['date'] + ' ' + temperature_readings['time']
temperature_readings['datetime'] = temperature_readings['datetime_str'].apply(lambda x: datetime.strptime(x, "%a %b %d %Y %I:%M:%S %p"))
temperature_readings['unix_timestamp'] = temperature_readings['datetime'].apply(lambda x: int(x.timestamp()))

# Converting date and time in weather_readings to UNIX timestamp for comparision 
weather_readings['datetime_str'] = weather_readings['date'] + ' ' + weather_readings['time']
weather_readings['datetime'] = weather_readings['datetime_str'].apply(lambda x: datetime.strptime(x, "%a %b %d %Y %I:%M:%S %p"))
weather_readings['unix_timestamp'] = weather_readings['datetime'].apply(lambda x: int(x.timestamp()))

weather_result_col = [col for col in weather_readings.columns if "result" in col.lower()]

# Merging both data for it to be on the same time
merged_data = pd.merge_asof(
    temperature_readings,  # Left DataFrame
    weather_readings[['unix_timestamp']+weather_result_col],      # Right DataFrame
    on='unix_timestamp',   # Key column
    direction='nearest'    # Match the nearest time
)

datetime_string = merged_data['date'] + " " + merged_data["time"]
merged_data["ISO_formatted_datetime"] = pd.to_datetime(
    datetime_string,
    format="%a %b %d %Y %I:%M:%S %p"
)
print(merged_data["ISO_formatted_datetime"])

# #Columns for lorWan Sensors
temperature_col = [
    col for col in merged_data.columns 
    if "lorawan_readings" in col.lower() and "temperature" in col.lower()
]

humidity_col = [
    col for col in merged_data.columns 
    if "humidity" in col.lower() and "lorawan_readings" in col.lower()
]

co2_col = [
    col for col in merged_data.columns 
    if "co2" in col.lower() and "lorawan_readings" in col.lower()
]

sensors_to_keep = ["Sensor_1", "Sensor_3", "Sensor_6"]
sensors_col = [col for col in merged_data.columns if any(sensor in col for sensor in sensors_to_keep)]

weather_cols_to_keep = ["weather_status","weather_temp","weather_humidity"]
weather_col = [col for col in merged_data.columns if any(weathercol in col for weathercol in weather_cols_to_keep)]

#Energy(power,energy,current) Data
energy_data = merged_data[["ISO_formatted_datetime"]+ sensors_col]
print(energy_data.info())

#indoor Data
indoor_data = merged_data[["ISO_formatted_datetime"] + temperature_col + humidity_col + co2_col]
print(indoor_data.info())

#Weather data
weather_data = merged_data[["ISO_formatted_datetime"]+ weather_col]
print(weather_data.info())


0     2024-11-15 16:25:07
1     2024-11-15 16:30:06
2     2024-11-15 16:35:06
3     2024-11-15 16:40:05
4     2024-11-15 16:45:06
              ...        
598   2024-11-21 15:49:03
599   2024-11-21 15:53:50
600   2024-11-21 16:00:02
601   2024-11-21 16:05:38
602   2024-11-21 16:11:04
Name: ISO_formatted_datetime, Length: 603, dtype: datetime64[ns]
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 603 entries, 0 to 602
Data columns (total 10 columns):
 #   Column                            Non-Null Count  Dtype         
---  ------                            --------------  -----         
 0   ISO_formatted_datetime            603 non-null    datetime64[ns]
 1   Energy_Readings.Sensor_1.Current  603 non-null    float64       
 2   Energy_Readings.Sensor_1.Energy   603 non-null    float64       
 3   Energy_Readings.Sensor_1.Power    603 non-null    float64       
 4   Energy_Readings.Sensor_3.Current  603 non-null    float64       
 5   Energy_Readings.Sensor_3.Energy   603 non-null   

  temperature_readings = temperature_readings.interpolate(method='linear')#fill in null values


In [213]:
# Current (A)
# Energy (kWh)
# Power (kW)

# energy_data = energy_data.rename(columns={"Energy_Readings.Sensor_1.Current":"compressor_current",
#                                          "Energy_Readings.Sensor_1.Energy":"compressor_current",
#                                          "Energy_Readings.Sensor_1.Power":"compressor_current",
#                                          "Energy_Readings.Sensor_3.Current":"fancoils_1_current",
#                                          "Energy_Readings.Sensor_3.Energy":"fancoils_1_current",
#                                          "Energy_Readings.Sensor_3.Power":"fancoils_1_current",
#                                          "Energy_Readings.Sensor_6.Current":"fancoils_2_current",
#                                          "Energy_Readings.Sensor_6.Energy":"fancoils_2_current",
#                                          "Energy_Readings.Sensor_6.Power":"fancoils_2_current"})




    ISO_formatted_datetime  Energy_Readings.Sensor_1.Current  \
0      2024-11-15 16:25:07                          9.483001   
1      2024-11-15 16:30:06                          0.697000   
2      2024-11-15 16:35:06                          0.589000   
3      2024-11-15 16:40:05                          0.590000   
4      2024-11-15 16:45:06                          0.590000   
..                     ...                               ...   
598    2024-11-21 15:49:03                         10.073000   
599    2024-11-21 15:53:50                         15.647000   
600    2024-11-21 16:00:02                          8.908000   
601    2024-11-21 16:05:38                         12.504000   
602    2024-11-21 16:11:04                          4.730000   

     Energy_Readings.Sensor_1.Energy  Energy_Readings.Sensor_1.Power  \
0                            2143.98                          6.4799   
1                            2144.24                          0.1693   
2              