In [5]:
import requests
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler



#The preset temp
set_temp = 23

#Reading Data from CSV Files
temperature_readings = pd.read_csv('data/data.csv') #original dataset for temp readings 
temperature_readings = temperature_readings.interpolate(method='linear')#fill in null values


weather_readings = pd.read_csv('data/weatherData.csv') #orignal dataset for weather readings

# Converting date and time in temperature_readings to UNIX timestamp for comparision 
temperature_readings['datetime_str'] = temperature_readings['date'] + ' ' + temperature_readings['time']
temperature_readings['datetime'] = temperature_readings['datetime_str'].apply(lambda x: datetime.strptime(x, "%a %b %d %Y %I:%M:%S %p"))
temperature_readings['unix_timestamp'] = temperature_readings['datetime'].apply(lambda x: int(x.timestamp()))

# Converting date and time in weather_readings to UNIX timestamp for comparision 
weather_readings['datetime_str'] = weather_readings['date'] + ' ' + weather_readings['time']
weather_readings['datetime'] = weather_readings['datetime_str'].apply(lambda x: datetime.strptime(x, "%a %b %d %Y %I:%M:%S %p"))
weather_readings['unix_timestamp'] = weather_readings['datetime'].apply(lambda x: int(x.timestamp()))

weather_result_col = [col for col in weather_readings.columns if "result" in col.lower()]

# Merging both data for it to be on the same time
merged_data = pd.merge_asof(
    temperature_readings,  # Left DataFrame
    weather_readings[['unix_timestamp']+weather_result_col],      # Right DataFrame
    on='unix_timestamp',   # Key column
    direction='nearest'    # Match the nearest time
)

datetime_string = merged_data['date'] + " " + merged_data["time"]
merged_data["ISO_formatted_datetime"] = pd.to_datetime(
    datetime_string,
    format="%a %b %d %Y %I:%M:%S %p"
)


# #Columns for lorWan Sensors
temperature_col = [
    col for col in merged_data.columns 
    if "lorawan_readings" in col.lower() and "temperature" in col.lower()
]

humidity_col = [
    col for col in merged_data.columns 
    if "humidity" in col.lower() and "lorawan_readings" in col.lower()
]

co2_col = [
    col for col in merged_data.columns 
    if "co2" in col.lower() and "lorawan_readings" in col.lower()
]

sensors_to_keep = ["Sensor_1", "Sensor_3", "Sensor_6"]
sensors_col = [col for col in merged_data.columns if any(sensor in col for sensor in sensors_to_keep)]

weather_cols_to_keep = ["weather_status","weather_temp","weather_humidity"]
weather_col = [col for col in merged_data.columns if any(weathercol in col for weathercol in weather_cols_to_keep)]

#adding avg temp humid and co2
merged_data['avg_temperature'] = merged_data[temperature_col].mean(axis=1)
merged_data['avg_humidity'] = merged_data[humidity_col].mean(axis=1)
merged_data['avg_co2'] = merged_data[co2_col].mean(axis=1)

avg_col = [
    col for col in merged_data.columns
    if "avg" in col.lower()
]

#Energy(power,energy,current) Data
energy_data = merged_data[["ISO_formatted_datetime"]+ sensors_col]
energy_data.columns = energy_data.columns.str.replace(
    r"Energy_Readings.Sensor_1\.(Current|Energy|Power)", "compressor_\\1", regex=True
).str.replace(
    r"Energy_Readings.Sensor_3\.(Current|Energy|Power)", "fancoil_1_\\1", regex=True
).str.replace(
    r"Energy_Readings.Sensor_6\.(Current|Energy|Power)", "fancoil_2_\\1", regex=True
)

#indoor Data
indoor_data = merged_data[["ISO_formatted_datetime"] + temperature_col + humidity_col + co2_col + avg_col]


#Weather data
weather_data = merged_data[["ISO_formatted_datetime"]+ weather_col]



#merging all the needed data
energy_indoor_merged = pd.merge(energy_data, indoor_data, on='ISO_formatted_datetime', how='inner')
final_merged_data = pd.merge(energy_indoor_merged, weather_data, on='ISO_formatted_datetime', how='inner')

# Calculate the smallest absolute difference between each sensor's temperature and the set temperature (set_temp).
# Calculate the difference between sensor temperatures and set_temp
final_merged_data['temp_diff'] = final_merged_data[temperature_col].apply(
    lambda row: row.sub(set_temp).abs().min(), axis=1
)

final_merged_data['total_energy'] = (
    final_merged_data['compressor_Energy'] +
    final_merged_data['fancoil_1_Energy'] +
    final_merged_data['fancoil_2_Energy']
)

final_merged_data['score'] = (
    0.5 * final_merged_data['temp_diff'] +  # Weight for temperature accuracy
    0.5 * final_merged_data['total_energy']  # Weight for energy efficiency
)

# Identify the best sensor based on the smallest absolute temperature difference.
final_merged_data['best_sensor'] = final_merged_data[temperature_col].apply(
    lambda row: row.sub(set_temp).abs().idxmin(), axis=1
)

# Map sensor column names to numeric labels for model training.
sensor_mapping = {sensor: idx for idx, sensor in enumerate(temperature_col)}
final_merged_data['best_sensor'] = final_merged_data['best_sensor'].map(sensor_mapping)

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)


print(final_merged_data.info())



  temperature_readings = temperature_readings.interpolate(method='linear')#fill in null values


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1803 entries, 0 to 1802
Data columns (total 38 columns):
 #   Column                                         Non-Null Count  Dtype         
---  ------                                         --------------  -----         
 0   ISO_formatted_datetime                         1803 non-null   datetime64[ns]
 1   compressor_Current                             1803 non-null   float64       
 2   compressor_Energy                              1803 non-null   float64       
 3   compressor_Power                               1803 non-null   float64       
 4   fancoil_1_Current                              1803 non-null   float64       
 5   fancoil_1_Energy                               1803 non-null   float64       
 6   fancoil_1_Power                                1803 non-null   float64       
 7   fancoil_2_Current                              1803 non-null   float64       
 8   fancoil_2_Energy                               1803 non-nu