In [14]:
import torch
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import torch.nn as nn
import joblib

class ComfortLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers=2):
        super(ComfortLSTM, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=0.1)
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        out = self.fc(lstm_out[:, -1, :])  # Take the last LSTM output
        return out

# Function to load the trained model
def load_trained_model(filepath, input_dim, output_dim):
    model = ComfortLSTM(input_dim=input_dim, hidden_dim=32, output_dim=output_dim, num_layers=2)
    model.load_state_dict(torch.load(filepath))
    model.eval()
    return model
    
korean_file_path = "datasets/temp/merged_korean_corrected.csv"
korean_df = pd.read_csv(korean_file_path)

korean_df['timestamp'] = pd.to_datetime(korean_df['timestamp'])
korean_df = korean_df.drop(columns=['Unnamed: 0'], errors='ignore')
korean_df.rename(columns={'total_active_power' : 'total_energy'}, inplace=True)

minute_level_df = korean_df.copy()

korean_hourly_df = korean_df.groupby('house_id', group_keys=False).apply(lambda g: g.set_index('timestamp').resample('h').mean().reset_index()).sort_values(by=['house_id', 'timestamp'])

korean_hourly_df['day_of_week'] = korean_hourly_df['timestamp'].dt.dayofweek

korean_hourly_df['total_energy_lag_1h'] = korean_hourly_df.groupby('house_id')['total_energy'].shift(1).fillna(method='bfill')

korean_hourly_df.fillna(0, inplace=True)
korean_hourly_df["hour"] = korean_hourly_df["timestamp"].dt.hour
korean_hourly_df["day_of_week"] = korean_hourly_df["timestamp"].dt.dayofweek

X_korean = korean_hourly_df[['total_energy','hour', 'day_of_week', 'house_id', 'total_energy_lag_1h']]

scaler = StandardScaler()
X_korean = scaler.fit_transform(X_korean)

sequence_length = 3
X_seq_korean = []
timestamps_hourly = korean_hourly_df['timestamp'][sequence_length:].reset_index(drop=True)
for i in range(len(X_korean) - sequence_length):
    X_seq_korean.append(X_korean[i:i+sequence_length])
X_seq_korean = np.array(X_seq_korean)

X_korean_tensor = torch.tensor(X_seq_korean, dtype=torch.float32)

model = load_trained_model("comfort_lstm_model.pth", input_dim=X_korean.shape[1], output_dim=5) 

model.eval()
with torch.no_grad():
    predictions = model(X_korean_tensor).numpy()

target_scalers = joblib.load("target_scalers.pkl")

for i in range(predictions.shape[1]):
    predictions[:, i] = target_scalers[i].inverse_transform(predictions[:, i].reshape(-1, 1)).flatten()


predictions_df = pd.DataFrame(predictions, columns=['T_in[°C]', 'RH [%]', 'CO2[ppm]', 'PM2_5[ug/m3]', 'Lighting[lux]'])
predictions_df['Lighting[lux]'] = predictions_df['Lighting[lux]'].clip(lower=0)

predictions_df['timestamp'] = timestamps_hourly
print(predictions_df)
print(korean_hourly_df)
korean_hourly_df = korean_hourly_df.merge(predictions_df, on='timestamp', how='left')
# minute_level_df.interpolate(method='linear', inplace=True)  # Interpolating missing values at minute-level
# korean_hourly_df.head(100)
# minute_level_df.to_csv("korean_dataset_with_comfort_metrics.csv", index=False)
# print("Predictions merged and saved to korean_dataset_with_comfort_metrics.csv")

        T_in[°C]     RH [%]    CO2[ppm]  PM2_5[ug/m3]  Lighting[lux]  \
0      23.105227  59.336433  788.072327     61.793175      11.185279   
1      23.723141  58.102406  778.408325     70.258163       8.466960   
2      23.900393  58.157761  772.012939     74.086441      10.601519   
3      24.571644  55.986900  710.014221     88.515358      18.145378   
4      25.803263  53.349762  651.664124     83.554939      24.879156   
...          ...        ...         ...           ...            ...   
26278  21.840870  48.620373  647.622803     65.691612       0.000000   
26279  21.840870  48.620373  647.622803     65.691612       0.000000   
26280  21.840870  48.620373  647.622803     65.691612       0.000000   
26281  21.840870  48.620373  647.622803     65.691612       0.000000   
26282  21.767984  48.794136  644.748108     65.271469       0.000000   

                timestamp  
0     2016-11-01 03:00:00  
1     2016-11-01 04:00:00  
2     2016-11-01 05:00:00  
3     2016-11-01 06:00:

  korean_hourly_df = korean_df.groupby('house_id', group_keys=False).apply(lambda g: g.set_index('timestamp').resample('h').mean().reset_index()).sort_values(by=['house_id', 'timestamp'])
  korean_hourly_df['total_energy_lag_1h'] = korean_hourly_df.groupby('house_id')['total_energy'].shift(1).fillna(method='bfill')
