In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score

import tensorflow as tf
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense, SimpleRNN, LSTM



ModuleNotFoundError: No module named 'tensorflow'

### 1.Creating the Synthetic Dataset

Let's generate a dataset with the following features:

* Indoor Temperature
* Outdoor Temperature
* Outdoor Humidity
* Solar Radiation
* Time
* Outdoor Temperature History
* Façade Temperature History

$\large We'll create a time series data for 1000 hours.$

In [None]:
# Setting the seed for reproducibility
np.random.seed(42)

# Number of data points
num_points = 1000

# Generate datetime range for the dataset
time_index = pd.date_range(start=datetime.datetime(2024, 1, 1, 0, 0), periods=num_points, freq='H')

# Generate synthetic data
outdoor_temp = 15 + 10 * np.sin(np.linspace(0, 10, num_points)) + np.random.normal(0, 1, num_points)
outdoor_humidity = 50 + 20 * np.sin(np.linspace(0, 5, num_points)) + np.random.normal(0, 5, num_points)
solar_radiation = np.maximum(0, 1000 * np.sin(np.linspace(0, 5, num_points)) + np.random.normal(0, 100, num_points))
facade_temp = outdoor_temp + np.random.normal(0, 2, num_points)
indoor_temp = 20 + 5 * np.sin(np.linspace(0, 10, num_points)) + np.random.normal(0, 1, num_points)

# Create lagged features for outdoor temperature and facade temperature
lag_hours = 3
outdoor_temp_history = np.array([np.roll(outdoor_temp, shift) for shift in range(1, lag_hours + 1)]).T
facade_temp_history = np.array([np.roll(facade_temp, shift) for shift in range(1, lag_hours + 1)]).T

# Combine all data into a DataFrame
data = {
    'Time': time_index,
    'Outdoor Temp': outdoor_temp,
    'Outdoor Humidity': outdoor_humidity,
    'Solar Radiation': solar_radiation,
    'Indoor Temp': indoor_temp,
    'Facade Temp': facade_temp,
}

# Add lagged features to the DataFrame
for i in range(1, lag_hours + 1):
    data[f'Outdoor Temp(t-{i})'] = outdoor_temp_history[:, i - 1]
    data[f'Facade Temp(t-{i})'] = facade_temp_history[:, i - 1]

df = pd.DataFrame(data)

# Drop initial rows with NaN values due to lagging
df = df.dropna().reset_index(drop=True)

# Display the first few rows of the dataset
print(df.head())


                 Time  Outdoor Temp  Outdoor Humidity  Solar Radiation  \
0 2024-01-01 00:00:00     15.496714         56.996777              0.0   
1 2024-01-01 01:00:00     14.961834         54.723268              0.0   
2 2024-01-01 02:00:00     15.847875         50.498349              0.0   
3 2024-01-01 03:00:00     16.823285         47.065605              0.0   
4 2024-01-01 04:00:00     15.166140         53.891490              0.0   

   Indoor Temp  Facade Temp  Outdoor Temp(t-1)  Facade Temp(t-1)  \
0    19.136506    11.681099          10.132372         10.853667   
1    20.018846    13.241064          15.496714         11.681099   
2    20.118110    15.020664          14.961834         13.241064   
3    20.622758    20.598660          15.847875         15.020664   
4    18.833288    16.279246          16.823285         20.598660   

   Outdoor Temp(t-2)  Facade Temp(t-2)  Outdoor Temp(t-3)  Facade Temp(t-3)  
0           9.072872         12.359629          10.369693          9

In [None]:
df.describe()

Unnamed: 0,Outdoor Temp,Outdoor Humidity,Solar Radiation,Indoor Temp,Facade Temp,Outdoor Temp(t-1),Facade Temp(t-1),Outdoor Temp(t-2),Facade Temp(t-2),Outdoor Temp(t-3),Facade Temp(t-3),Hour,DayOfWeek,Month
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,16.853829,53.207072,401.349693,20.867975,16.816391,16.853829,16.816391,16.853829,16.816391,16.853829,16.816391,11.436,2.976,1.256
std,6.774697,15.143893,399.756321,3.45271,6.974753,6.774697,6.974753,6.774697,6.974753,6.774697,6.974753,6.913009,1.990826,0.43664
min,2.698106,15.51514,0.0,13.014947,-1.248627,2.698106,-1.248627,2.698106,-1.248627,2.698106,-1.248627,0.0,0.0,1.0
25%,11.057721,39.74626,0.0,17.983614,11.060063,11.057721,11.060063,11.057721,11.060063,11.057721,11.060063,5.0,1.0,1.0
50%,17.97247,56.296693,310.09783,21.510563,18.164857,17.97247,18.164857,17.97247,18.164857,17.97247,18.164857,11.0,3.0,1.0
75%,22.991996,65.814618,801.852535,23.88185,22.706189,22.991996,22.706189,22.991996,22.706189,22.991996,22.706189,17.0,5.0,2.0
max,27.524478,82.334404,1310.012108,27.755124,29.333112,27.524478,29.333112,27.524478,29.333112,27.524478,29.333112,23.0,6.0,2.0


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Outdoor Temp       1000 non-null   float64
 1   Outdoor Humidity   1000 non-null   float64
 2   Solar Radiation    1000 non-null   float64
 3   Indoor Temp        1000 non-null   float64
 4   Facade Temp        1000 non-null   float64
 5   Outdoor Temp(t-1)  1000 non-null   float64
 6   Facade Temp(t-1)   1000 non-null   float64
 7   Outdoor Temp(t-2)  1000 non-null   float64
 8   Facade Temp(t-2)   1000 non-null   float64
 9   Outdoor Temp(t-3)  1000 non-null   float64
 10  Facade Temp(t-3)   1000 non-null   float64
 11  Hour               1000 non-null   int32  
 12  DayOfWeek          1000 non-null   int32  
 13  Month              1000 non-null   int32  
dtypes: float64(11), int32(3)
memory usage: 97.8 KB


In [None]:
df.isnull()

Unnamed: 0,Outdoor Temp,Outdoor Humidity,Solar Radiation,Indoor Temp,Facade Temp,Outdoor Temp(t-1),Facade Temp(t-1),Outdoor Temp(t-2),Facade Temp(t-2),Outdoor Temp(t-3),Facade Temp(t-3),Hour,DayOfWeek,Month
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,False,False,False,False,False,False,False,False,False,False,False,False,False,False
996,False,False,False,False,False,False,False,False,False,False,False,False,False,False
997,False,False,False,False,False,False,False,False,False,False,False,False,False,False
998,False,False,False,False,False,False,False,False,False,False,False,False,False,False


$\large Code Explanation$

* Imports: We import necessary libraries such as pandas and numpy.
* Time Index: We create a time index starting from January 1, 2024, with hourly frequency.
* Synthetic Data Generation: We generate synthetic data for outdoor temperature, outdoor humidity, solar radiation, facade temperature, and indoor temperature. We use sinusoidal functions with added noise to simulate realistic variations.
* Lagged Features: We create lagged features for outdoor temperature and facade temperature to simulate historical data.
* Combining Data: We combine all features into a single DataFrame and drop initial rows with NaN values due to lagging.

$\large Next Steps$

We can now use this synthetic dataset to train and evaluate both the normal ANN and the autoregressive ANN models.

### 2.Feature Engineering for 'Time' Column

In [None]:
# Extract relevant time features (hour, day of week, month)
df['Hour'] = df['Time'].dt.hour
df['DayOfWeek'] = df['Time'].dt.dayofweek
df['Month'] = df['Time'].dt.month

# Drop the original 'Time' column
df = df.drop(columns=['Time'])

# Display the first few rows of the updated dataset
print(df.head())


   Outdoor Temp  Outdoor Humidity  Solar Radiation  Indoor Temp  Facade Temp  \
0     15.496714         56.996777              0.0    19.136506    11.681099   
1     14.961834         54.723268              0.0    20.018846    13.241064   
2     15.847875         50.498349              0.0    20.118110    15.020664   
3     16.823285         47.065605              0.0    20.622758    20.598660   
4     15.166140         53.891490              0.0    18.833288    16.279246   

   Outdoor Temp(t-1)  Facade Temp(t-1)  Outdoor Temp(t-2)  Facade Temp(t-2)  \
0          10.132372         10.853667           9.072872         12.359629   
1          15.496714         11.681099          10.132372         10.853667   
2          14.961834         13.241064          15.496714         11.681099   
3          15.847875         15.020664          14.961834         13.241064   
4          16.823285         20.598660          15.847875         15.020664   

   Outdoor Temp(t-3)  Facade Temp(t-3)  Hour

### 3. Normalize and Split the Data

In [None]:
# Define input features and output target
X = df[['Outdoor Temp', 'Outdoor Humidity', 'Solar Radiation', 'Hour', 'DayOfWeek', 'Month', 'Facade Temp']]
y = df['Indoor Temp']

# Normalize the input features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


### 4. Normal ANN Training and Evaluation

In [None]:
# Define and train the normal ANN model
ann_model = MLPRegressor(hidden_layer_sizes=(40,80,100), max_iter=500, random_state=42)
ann_model.fit(X_train, y_train)

# Predict and evaluate the normal ANN model
y_pred = ann_model.predict(X_test)
mse_ann = mean_squared_error(y_test, y_pred)
r2_ann = r2_score(y_test, y_pred)

print(f"MSE of the normal ANN: {mse_ann}")
print(f"R2 Score of the normal ANN: {r2_ann}")

MSE of the normal ANN: 1.2443911344803982
R2 Score of the normal ANN: 0.8795281409510027


### 5. Create and Train the Autoregressive ANN Model

In [None]:
# Define input features and output target for the autoregressive model
X_ar = df[['Outdoor Temp', 'Outdoor Humidity', 'Solar Radiation', 'Hour', 'DayOfWeek', 'Month',
           'Facade Temp', 'Outdoor Temp(t-1)', 'Outdoor Temp(t-2)', 'Outdoor Temp(t-3)',
           'Facade Temp(t-1)', 'Facade Temp(t-2)', 'Facade Temp(t-3)']]
y_ar = df['Indoor Temp']

# Normalize the input features
X_ar_scaled = scaler.fit_transform(X_ar)

# Split the dataset into training and testing sets
X_train_ar, X_test_ar, y_train_ar, y_test_ar = train_test_split(X_ar_scaled, y_ar, test_size=0.2, random_state=42)

# Define and train the autoregressive ANN model
ar_ann_model = MLPRegressor(hidden_layer_sizes=(50,), max_iter=500, random_state=42)
ar_ann_model.fit(X_train_ar, y_train_ar)

# Predict and evaluate the autoregressive ANN model
y_pred_ar = ar_ann_model.predict(X_test_ar)
mse_ar_ann = mean_squared_error(y_test_ar, y_pred_ar)
r2_ar_ann = r2_score(y_test_ar, y_pred_ar)

print(f"MSE of the autoregressive ANN: {mse_ar_ann}")
print(f"R2 Score of the autoregressive ANN: {r2_ar_ann}")

MSE of the autoregressive ANN: 3.141745501847138
R2 Score of the autoregressive ANN: 0.6958416764802886




### 7. Compare the Models

In [None]:
print("Comparison of Model Performances:")
print(f"MSE of the normal ANN: {mse_ann}")
print(f"R2 Score of the normal ANN: {r2_ann}")
print(f"MSE of the autoregressive ANN: {mse_ar_ann}")
print(f"R2 Score of the autoregressive ANN: {r2_ar_ann}")


Comparison of Model Performances:
MSE of the normal ANN: 1.7464550475143452
R2 Score of the normal ANN: 0.8309223840561906
MSE of the autoregressive ANN: 3.141745501847138
R2 Score of the autoregressive ANN: 0.6958416764802886


### 8. Preparing Data for RNN and LSTM Models

We need to reshape the data into a 3D array for RNN and LSTM models.

In [None]:
# Define input features and output target for autoregressive model
X_rnn = df[['Outdoor Temp', 'Outdoor Humidity', 'Solar Radiation', 'Hour', 'DayOfWeek', 'Month',
           'Facade Temp', 'Outdoor Temp(t-1)', 'Outdoor Temp(t-2)', 'Outdoor Temp(t-3)',
           'Facade Temp(t-1)', 'Facade Temp(t-2)', 'Facade Temp(t-3)']]
y_rnn = df['Indoor Temp']

# Normalize the input features
scaler = MinMaxScaler()
X_rnn_scaled = scaler.fit_transform(X_rnn)

# Reshape input data to be 3D [samples, time steps, features]
# Assuming lag_hours as time steps
X_rnn_reshaped = X_rnn_scaled.reshape((X_rnn_scaled.shape[0], 1, X_rnn_scaled.shape[1]))

# Split the dataset into training and testing sets
X_train_rnn, X_test_rnn, y_train_rnn, y_test_rnn = train_test_split(X_rnn_reshaped, y_rnn, test_size=0.2, random_state=42)


### 9. Define, Train, and Evaluate the RNN Model

In [None]:
# Define the RNN model
rnn_model = Sequential()
rnn_model.add(SimpleRNN(50, activation='relu', input_shape=(X_train_rnn.shape[1], X_train_rnn.shape[2])))
rnn_model.add(Dense(1))
rnn_model.compile(optimizer='adam', loss='mse')

# Train the RNN model
rnn_model.fit(X_train_rnn, y_train_rnn, epochs=50, batch_size=32, verbose=1)

# Predict and evaluate the RNN model
y_pred_rnn = rnn_model.predict(X_test_rnn)
mse_rnn = mean_squared_error(y_test_rnn, y_pred_rnn)
r2_rnn = r2_score(y_test_rnn, y_pred_rnn)

print(f"MSE of the RNN model: {mse_rnn}")
print(f"R2 Score of the RNN model: {r2_rnn}")


### 10. Define, Train, and Evaluate the LSTM Model

In [None]:
# Define the LSTM model
lstm_model = Sequential()
lstm_model.add(LSTM(50, activation='relu', input_shape=(X_train_rnn.shape[1], X_train_rnn.shape[2])))
lstm_model.add(Dense(1))
lstm_model.compile(optimizer='adam', loss='mse')

# Train the LSTM model
lstm_model.fit(X_train_rnn, y_train_rnn, epochs=50, batch_size=32, verbose=1)

# Predict and evaluate the LSTM model
y_pred_lstm = lstm_model.predict(X_test_rnn)
mse_lstm = mean_squared_error(y_test_rnn, y_pred_lstm)
r2_lstm = r2_score(y_test_rnn, y_pred_lstm)

print(f"MSE of the LSTM model: {mse_lstm}")
print(f"R2 Score of the LSTM model: {r2_lstm}")


In [None]:
print("Comparison of Model Performances:")
print(f"MSE of the normal ANN: {mse_ann}")
print(f"R2 Score of the normal ANN: {r2_ann}")
print(f"MSE of the autoregressive ANN: {mse_ar_ann}")
print(f"R2 Score of the autoregressive ANN: {r2_ar_ann}")
print(f"MSE of the RNN model: {mse_rnn}")
print(f"R2 Score of the RNN model: {r2_rnn}")
print(f"MSE of the LSTM model: {mse_lstm}")
print(f"R2 Score of the LSTM model: {r2_lstm}")

Comparison of Model Performances:
MSE of the normal ANN: 1.7464550475143452
R2 Score of the normal ANN: 0.8309223840561906
MSE of the autoregressive ANN: 3.141745501847138
R2 Score of the autoregressive ANN: 0.6958416764802886
MSE of the RNN model: 4.499734869912052
R2 Score of the RNN model: 0.5643721576076193
MSE of the LSTM model: 6.74285472870434
R2 Score of the LSTM model: 0.3472114820204628


## Using another dataset

In [27]:
df1 = pd.read_csv("SolarPrediction.csv")

In [28]:
df1.head()

Unnamed: 0,UNIXTime,Data,Time,Radiation,Temperature,Pressure,Humidity,WindDirection(Degrees),Speed,TimeSunRise,TimeSunSet
0,1475229326,9/29/2016 12:00:00 AM,23:55:26,1.21,48,30.46,59,177.39,5.62,06:13:00,18:13:00
1,1475229023,9/29/2016 12:00:00 AM,23:50:23,1.21,48,30.46,58,176.78,3.37,06:13:00,18:13:00
2,1475228726,9/29/2016 12:00:00 AM,23:45:26,1.23,48,30.46,57,158.75,3.37,06:13:00,18:13:00
3,1475228421,9/29/2016 12:00:00 AM,23:40:21,1.21,48,30.46,60,137.71,3.37,06:13:00,18:13:00
4,1475228124,9/29/2016 12:00:00 AM,23:35:24,1.17,48,30.46,62,104.95,5.62,06:13:00,18:13:00


In [29]:
df1.describe()

Unnamed: 0,UNIXTime,Radiation,Temperature,Pressure,Humidity,WindDirection(Degrees),Speed
count,11617.0,11617.0,11617.0,11617.0,11617.0,11616.0,11616.0
mean,1475226000.0,231.541504,52.608419,30.431425,81.452699,130.166401,5.701398
std,1727303.0,341.749195,5.885177,0.033114,20.397341,80.378015,2.870814
min,1472724000.0,1.13,41.0,30.34,8.0,0.09,0.0
25%,1473769000.0,1.24,48.0,30.41,69.0,67.66,3.37
50%,1474733000.0,3.45,51.0,30.43,89.0,134.57,5.62
75%,1477120000.0,399.39,57.0,30.46,99.0,171.41,7.87
max,1477994000.0,1601.26,71.0,30.53,103.0,359.93,20.25


In [30]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11617 entries, 0 to 11616
Data columns (total 11 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   UNIXTime                11617 non-null  int64  
 1   Data                    11617 non-null  object 
 2   Time                    11617 non-null  object 
 3   Radiation               11617 non-null  float64
 4   Temperature             11617 non-null  int64  
 5   Pressure                11617 non-null  float64
 6   Humidity                11617 non-null  int64  
 7   WindDirection(Degrees)  11616 non-null  float64
 8   Speed                   11616 non-null  float64
 9   TimeSunRise             11616 non-null  object 
 10  TimeSunSet              11616 non-null  object 
dtypes: float64(4), int64(3), object(4)
memory usage: 998.5+ KB


In [31]:
df1.isnull()

Unnamed: 0,UNIXTime,Data,Time,Radiation,Temperature,Pressure,Humidity,WindDirection(Degrees),Speed,TimeSunRise,TimeSunSet
0,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...
11612,False,False,False,False,False,False,False,False,False,False,False
11613,False,False,False,False,False,False,False,False,False,False,False
11614,False,False,False,False,False,False,False,False,False,False,False
11615,False,False,False,False,False,False,False,False,False,False,False


In [32]:
df1.columns

Index(['UNIXTime', 'Data', 'Time', 'Radiation', 'Temperature', 'Pressure',
       'Humidity', 'WindDirection(Degrees)', 'Speed', 'TimeSunRise',
       'TimeSunSet'],
      dtype='object')

## Adapting the random dataset with the dataset of the real values, from kaggle

In [36]:
# Set a random seed for reproducibility
np.random.seed(42)

# Generate synthetic indoor temperature based on outdoor temperature and some noise
df1['IndoorTemp'] = df1['Temperature'] - 0.5 * np.sin(np.linspace(0, 10, len(df1))) + np.random.normal(0, 0.5, len(df1))

# Display the first few rows of the dataset with the new column
print(df1.head())


     UNIXTime                   Data      Time  Radiation  Temperature  \
0  1475229326  9/29/2016 12:00:00 AM  23:55:26       1.21           48   
1  1475229023  9/29/2016 12:00:00 AM  23:50:23       1.21           48   
2  1475228726  9/29/2016 12:00:00 AM  23:45:26       1.23           48   
3  1475228421  9/29/2016 12:00:00 AM  23:40:21       1.21           48   
4  1475228124  9/29/2016 12:00:00 AM  23:35:24       1.17           48   

   Pressure  Humidity  WindDirection(Degrees)  Speed TimeSunRise TimeSunSet  \
0     30.46        59                  177.39   5.62    06:13:00   18:13:00   
1     30.46        58                  176.78   3.37    06:13:00   18:13:00   
2     30.46        57                  158.75   3.37    06:13:00   18:13:00   
3     30.46        60                  137.71   3.37    06:13:00   18:13:00   
4     30.46        62                  104.95   5.62    06:13:00   18:13:00   

   IndoorTemp  
0   48.248357  
1   47.930437  
2   48.322983  
3   48.760224  


## Creating new columns, and creating lags for times series

In [38]:
# Convert UNIX time to datetime
df1['Datetime'] = pd.to_datetime(df1['UNIXTime'], unit='s')
df1['Hour'] = df1['Datetime'].dt.hour
df1['DayOfWeek'] = df1['Datetime'].dt.dayofweek
df1['Month'] = df1['Datetime'].dt.month

# Define relevant features
features = ['Temperature', 'Humidity', 'Radiation', 'Hour', 'DayOfWeek', 'Month']
target = 'IndoorTemp'

# Create lagged features for the autoregressive model
n_lags = 3
for lag in range(1, n_lags + 1):
    df1[f'Temperature_lag{lag}'] = df1['Temperature'].shift(lag)
    df1[f'IndoorTemp_lag{lag}'] = df1['IndoorTemp'].shift(lag)

# Drop rows with NaN values due to lagging
df1 = df1.dropna().reset_index(drop=True)

# Display the first few rows of the preprocessed dataset
df1.head()


Unnamed: 0,UNIXTime,Data,Time,Radiation,Temperature,Pressure,Humidity,WindDirection(Degrees),Speed,TimeSunRise,...,Datetime,Hour,DayOfWeek,Month,Temperature_lag1,IndoorTemp_lag1,Temperature_lag2,IndoorTemp_lag2,Temperature_lag3,IndoorTemp_lag3
0,1475227519,9/29/2016 12:00:00 AM,23:25:19,1.2,49,30.46,72,112.45,6.75,06:13:00,...,2016-09-30 09:25:19,9,4,9,48.0,47.880779,48.0,47.881202,48.0,48.760224
1,1475227222,9/29/2016 12:00:00 AM,23:20:22,1.24,49,30.46,71,122.97,5.62,06:13:00,...,2016-09-30 09:20:22,9,4,9,49.0,49.787024,48.0,47.880779,48.0,47.881202
2,1475226922,9/29/2016 12:00:00 AM,23:15:22,1.23,49,30.46,80,101.18,4.5,06:13:00,...,2016-09-30 09:15:22,9,4,9,49.0,49.380704,49.0,49.787024,48.0,47.880779
3,1475226622,9/29/2016 12:00:00 AM,23:10:22,1.21,49,30.46,85,141.87,4.5,06:13:00,...,2016-09-30 09:10:22,9,4,9,49.0,48.761819,49.0,49.380704,49.0,49.787024
4,1475226323,9/29/2016 12:00:00 AM,23:05:23,1.23,49,30.47,93,120.55,2.25,06:13:00,...,2016-09-30 09:05:23,9,4,9,49.0,49.267406,49.0,48.761819,49.0,49.380704


## Preparing data and fit data

In [40]:
# Define input and output for the normal ANN
X = df1[features]
y = df1[target]

# Normalize the input features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


# Normal ANN

In [50]:
# Define and train the normal ANN model
ann_model = MLPRegressor(hidden_layer_sizes=(4,), max_iter=500, random_state=42)
ann_model.fit(X_train, y_train)

# Predict and evaluate the normal ANN model
y_pred = ann_model.predict(X_test)
mse_ann = mean_squared_error(y_test, y_pred)
r2_ann = r2_score(y_test, y_pred)

print(f"MSE of the normal ANN: {mse_ann}")
print(f"R2 Score of the normal ANN: {r2_ann}")

MSE of the normal ANN: 0.3471805599918465
R2 Score of the normal ANN: 0.9902600811693808


# Gnerate the new dataset from the arranged dataframe

In [48]:
# Define the file path for the exported CSV file
export_path = 'adapted_weather_dataset.csv'

# Save the adapted dataset to a CSV file
df1.to_csv(export_path, index=False)

print(f"Adapted dataset exported to {export_path}")

Adapted dataset exported to adapted_weather_dataset.csv


In [46]:
from google.colab import files

# Download the file
files.download('adapted_weather_dataset.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Auto-regressive ANN


In [54]:
# Define input and output for the autoregressive ANN
lagged_features = features + [f'Temperature_lag{i}' for i in range(1, n_lags + 1)] + [f'IndoorTemp_lag{i}' for i in range(1, n_lags + 1)]
X_ar = df1[lagged_features]
y_ar = df1[target]

# Normalize the input features
X_ar_scaled = scaler.fit_transform(X_ar)

# Split the dataset into training and testing sets
X_train_ar, X_test_ar, y_train_ar, y_test_ar = train_test_split(X_ar_scaled, y_ar, test_size=0.2, random_state=42)


In [55]:
# Define and train the autoregressive ANN model
ar_ann_model = MLPRegressor(hidden_layer_sizes=(40,80,100,130), max_iter=500, random_state=42)
ar_ann_model.fit(X_train_ar, y_train_ar)

# Predict and evaluate the autoregressive ANN model
y_pred_ar = ar_ann_model.predict(X_test_ar)
mse_ar_ann = mean_squared_error(y_test_ar, y_pred_ar)
r2_ar_ann = r2_score(y_test_ar, y_pred_ar)

print(f"MSE of the autoregressive ANN: {mse_ar_ann}")
print(f"R2 Score of the autoregressive ANN: {r2_ar_ann}")


MSE of the autoregressive ANN: 0.5063429105671633
R2 Score of the autoregressive ANN: 0.9857948876817888


## RNN & LSTM

In [56]:
# Reshape input data to be 3D [samples, time steps, features] for RNN and LSTM
X_ar_reshaped = X_ar_scaled.reshape((X_ar_scaled.shape[0], 1, X_ar_scaled.shape[1]))

# Split the dataset into training and testing sets
X_train_rnn, X_test_rnn, y_train_rnn, y_test_rnn = train_test_split(X_ar_reshaped, y_ar, test_size=0.2, random_state=42)


In [57]:
# Define the RNN model
rnn_model = Sequential()
rnn_model.add(SimpleRNN(50, activation='relu', input_shape=(X_train_rnn.shape[1], X_train_rnn.shape[2])))
rnn_model.add(Dense(1))
rnn_model.compile(optimizer='adam', loss='mse')

# Train the RNN model
rnn_model.fit(X_train_rnn, y_train_rnn, epochs=50, batch_size=32, verbose=1)

# Predict and evaluate the RNN model
y_pred_rnn = rnn_model.predict(X_test_rnn)
mse_rnn = mean_squared_error(y_test_rnn, y_pred_rnn)
r2_rnn = r2_score(y_test_rnn, y_pred_rnn)

print(f"MSE of the RNN model: {mse_rnn}")
print(f"R2 Score of the RNN model: {r2_rnn}")


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
MSE of the RNN model: 0.3414201256856055
R2 Score of the RNN model: 0.9904216863081398


In [58]:
# Define the LSTM model
lstm_model = Sequential()
lstm_model.add(LSTM(50, activation='relu', input_shape=(X_train_rnn.shape[1], X_train_rnn.shape[2])))
lstm_model.add(Dense(1))
lstm_model.compile(optimizer='adam', loss='mse')

# Train the LSTM model
lstm_model.fit(X_train_rnn, y_train_rnn, epochs=50, batch_size=32, verbose=1)

# Predict and evaluate the LSTM model
y_pred_lstm = lstm_model.predict(X_test_rnn)
mse_lstm = mean_squared_error(y_test_rnn, y_pred_lstm)
r2_lstm = r2_score(y_test_rnn, y_pred_lstm)

print(f"MSE of the LSTM model: {mse_lstm}")
print(f"R2 Score of the LSTM model: {r2_lstm}")


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
MSE of the LSTM model: 0.36612866408760464
R2 Score of the LSTM model: 0.9897285047588492


# Comparaison

In [63]:
print("Comparison of Model Performances:")

print(f"Normal ANN: \n MSE = {mse_ann}, R2 = {r2_ann}")

print(f"Autoregressive ANN: \n MSE = {mse_ar_ann}, R2 = {r2_ar_ann}")

print(f"RNN model: \n MSE = {mse_rnn}, R2 = {r2_rnn}")

print(f"LSTM model: \n MSE = {mse_lstm}, R2 = {r2_lstm}")

Comparison of Model Performances:
Normal ANN: 
 MSE = 0.3471805599918465, R2 = 0.9902600811693808
Autoregressive ANN: 
 MSE = 0.5063429105671633, R2 = 0.9857948876817888
RNN model: 
 MSE = 0.3414201256856055, R2 = 0.9904216863081398
LSTM model: 
 MSE = 0.36612866408760464, R2 = 0.9897285047588492
