In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense, Conv1D, MaxPooling1D, Flatten, LSTM, TimeDistributed
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from tensorflow.keras.optimizers import Adam


In [2]:
parent_directory = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
main_directory = os.path.dirname(parent_directory)
phase_1 =  pd.read_csv(main_directory + '/data/phase_1_data_with_survey.csv')
phase_1.head()

Unnamed: 0,ID,From,Date,Hour,Participation_Phase,Demand_kWh,Temperature,home_size,electric_car,no_of_people,electrically_heated
0,Exp_62,2020-02-01 00:00:00+00:00,2020-02-01,1,Phase_1,1.925,7.6,120-159 m2,Yes,3,True
1,Exp_62,2020-02-01 01:00:00+00:00,2020-02-01,2,Phase_1,2.002,8.2,120-159 m2,Yes,3,True
2,Exp_62,2020-02-01 02:00:00+00:00,2020-02-01,3,Phase_1,2.019,8.4,120-159 m2,Yes,3,True
3,Exp_62,2020-02-01 03:00:00+00:00,2020-02-01,4,Phase_1,2.205,8.4,120-159 m2,Yes,3,True
4,Exp_62,2020-02-01 04:00:00+00:00,2020-02-01,5,Phase_1,2.834,8.2,120-159 m2,Yes,3,True


In [3]:
# phase_1['From'] = pd.to_datetime(phase_1['From'])
# phase_1['Day'] = phase_1['From'].dt.day
# phase_1['Month'] = phase_1['From'].dt.month
# phase_1['Weekday'] = phase_1['From'].dt.weekday
phase_1.set_index('From',inplace=True)

phase_1.head()

Unnamed: 0_level_0,ID,Date,Hour,Participation_Phase,Demand_kWh,Temperature,home_size,electric_car,no_of_people,electrically_heated
From,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-02-01 00:00:00+00:00,Exp_62,2020-02-01,1,Phase_1,1.925,7.6,120-159 m2,Yes,3,True
2020-02-01 01:00:00+00:00,Exp_62,2020-02-01,2,Phase_1,2.002,8.2,120-159 m2,Yes,3,True
2020-02-01 02:00:00+00:00,Exp_62,2020-02-01,3,Phase_1,2.019,8.4,120-159 m2,Yes,3,True
2020-02-01 03:00:00+00:00,Exp_62,2020-02-01,4,Phase_1,2.205,8.4,120-159 m2,Yes,3,True
2020-02-01 04:00:00+00:00,Exp_62,2020-02-01,5,Phase_1,2.834,8.2,120-159 m2,Yes,3,True


In [4]:
def convert_range_to_avg(range_str):
    try:
        if 'm2' in range_str:
            range_str = range_str.replace(' m2', '')
        if '-' in range_str:
            numbers = range_str.split('-')
            avg_value = (float(numbers[0]) + float(numbers[1])) / 2
        elif 'or larger' in range_str:
            number = range_str.replace(' or larger', '')
            avg_value = float(number)
        else:
            avg_value = float(range_str)
        return avg_value
    except ValueError:
        return np.nan

phase_1['home_size'] = phase_1['home_size'].apply(convert_range_to_avg)

label_encoders = {}
for column in phase_1.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    phase_1[column] = le.fit_transform(phase_1[column])
    label_encoders[column] = le

phase_1.head()

Unnamed: 0_level_0,ID,Date,Hour,Participation_Phase,Demand_kWh,Temperature,home_size,electric_car,no_of_people,electrically_heated
From,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-02-01 00:00:00+00:00,29,26,1,0,1.925,7.6,139.5,1,3,True
2020-02-01 01:00:00+00:00,29,26,2,0,2.002,8.2,139.5,1,3,True
2020-02-01 02:00:00+00:00,29,26,3,0,2.019,8.4,139.5,1,3,True
2020-02-01 03:00:00+00:00,29,26,4,0,2.205,8.4,139.5,1,3,True
2020-02-01 04:00:00+00:00,29,26,5,0,2.834,8.2,139.5,1,3,True


In [5]:
df_features = ['Temperature',"home_size","electric_car","no_of_people","electrically_heated"]
df_target = ["Demand_kWh"]

X= phase_1[df_features].values
y=phase_1[df_target].values

X = X.reshape((X.shape[0], 1, X.shape[1])) 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)

In [6]:
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_train = scaler_X.fit_transform(X_train.reshape(-1, X_train.shape[1])).reshape(X_train.shape)
X_test = scaler_X.transform(X_test.reshape(-1, X_test.shape[1])).reshape(X_test.shape)

y_train = scaler_y.fit_transform(y_train.reshape(-1, 1)).flatten()
y_test = scaler_y.transform(y_test.reshape(-1, 1)).flatten()

In [7]:
model = Sequential()
model.add(LSTM(units=64, activation='relu', input_shape=(1, len(df_features)))) 
model.add(Dense(50, activation='relu'))
model.add(Dense(1))
learning_rate = 0.001
optimizer = Adam(learning_rate=learning_rate)

model.compile(optimizer=optimizer, loss='mse')

  super().__init__(**kwargs)


In [8]:
history = model.fit(X_train, y_train,epochs=10, batch_size=16, validation_data=(X_test, y_test))


Epoch 1/10
[1m3038/3038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - loss: 0.0104 - val_loss: 0.0109
Epoch 2/10
[1m3038/3038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 0.0079 - val_loss: 0.0115
Epoch 3/10
[1m3038/3038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 0.0077 - val_loss: 0.0117
Epoch 4/10
[1m3038/3038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 0.0074 - val_loss: 0.0126
Epoch 5/10
[1m3038/3038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 0.0074 - val_loss: 0.0131
Epoch 6/10
[1m3038/3038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 0.0074 - val_loss: 0.0130
Epoch 7/10
[1m3038/3038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 0.0074 - val_loss: 0.0130
Epoch 8/10
[1m3038/3038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 0.0073 - val_loss: 0.0119
Epoch 9/10
[1m3038/3038

In [9]:
loss = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss}')

forecast = model.predict(X_test)
y_pred = scaler_y.inverse_transform(forecast)

forecast_df = pd.DataFrame(data=y_pred, columns=['forecast_data'], index=phase_1.index[:len(y_pred)])

forecast_df = phase_1.join(forecast_df, how="left")

print(forecast_df)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mse)

print(f'MSE: {mse}')
print(f'MAE: {mae}')
print(f'RMSE: {rmse}')

[1m507/507[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 588us/step - loss: 0.0175
Test Loss: 0.013041052967309952
[1m507/507[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 785us/step
                           ID  Date  Hour  Participation_Phase  Demand_kWh  \
From                                                                         
2020-02-01 00:00:00+00:00  29    26     1                    0       1.925   
2020-02-01 00:00:00+00:00  29    26     1                    0       1.925   
2020-02-01 00:00:00+00:00  29    26     1                    0       1.925   
2020-02-01 00:00:00+00:00  29    26     1                    0       1.925   
2020-02-01 00:00:00+00:00  29    26     1                    0       1.925   
...                        ..   ...   ...                  ...         ...   
2020-03-20 23:00:00+00:00  19    74    24                    0       0.626   
2020-03-20 23:00:00+00:00  19    74    24                    0       0.626   
2020-03-20 23:00:00

In [10]:
df = forecast_df[forecast_df["forecast_data"].notna()]
df = df.drop(columns=["Temperature","Hour","Participation_Phase"])

df.to_csv(parent_directory+"/Neural_Network/predictions/phase_1_with_survey_data/cnn_lstm_phase_1_with_survey_method_2_75_25.csv",index=True)

80/20

In [11]:
df_features = ['Temperature',"home_size","electric_car","no_of_people","electrically_heated"]
df_target = ["Demand_kWh"]

X= phase_1[df_features].values
y=phase_1[df_target].values

X = X.reshape((X.shape[0], 1, X.shape[1])) 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [12]:
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_train = scaler_X.fit_transform(X_train.reshape(-1, X_train.shape[1])).reshape(X_train.shape)
X_test = scaler_X.transform(X_test.reshape(-1, X_test.shape[1])).reshape(X_test.shape)

y_train = scaler_y.fit_transform(y_train.reshape(-1, 1)).flatten()
y_test = scaler_y.transform(y_test.reshape(-1, 1)).flatten()

In [13]:
model = Sequential()
model.add(LSTM(units=64, activation='relu', input_shape=(1, len(df_features)))) 
model.add(Dense(50, activation='relu'))
model.add(Dense(1))
learning_rate = 0.001
optimizer = Adam(learning_rate=learning_rate)

model.compile(optimizer=optimizer, loss='mse')

  super().__init__(**kwargs)


In [14]:
history = model.fit(X_train, y_train,epochs=10, batch_size=16, validation_data=(X_test, y_test))


Epoch 1/10
[1m3240/3240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 0.0102 - val_loss: 0.0096
Epoch 2/10
[1m3240/3240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 967us/step - loss: 0.0082 - val_loss: 0.0078
Epoch 3/10
[1m3240/3240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 935us/step - loss: 0.0080 - val_loss: 0.0101
Epoch 4/10
[1m3240/3240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 935us/step - loss: 0.0079 - val_loss: 0.0102
Epoch 5/10
[1m3240/3240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 913us/step - loss: 0.0077 - val_loss: 0.0071
Epoch 6/10
[1m3240/3240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 933us/step - loss: 0.0078 - val_loss: 0.0169
Epoch 7/10
[1m3240/3240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 928us/step - loss: 0.0074 - val_loss: 0.0137
Epoch 8/10
[1m3240/3240[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 901us/step - loss: 0.0074 - val_loss: 0.0082
Epoch 9/10

In [15]:
loss = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss}')

forecast = model.predict(X_test)
y_pred = scaler_y.inverse_transform(forecast)

forecast_df = pd.DataFrame(data=y_pred, columns=['forecast_data'], index=phase_1.index[:len(y_pred)])

forecast_df = phase_1.join(forecast_df, how="left")

print(forecast_df)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mse)

print(f'MSE: {mse}')
print(f'MAE: {mae}')
print(f'RMSE: {rmse}')

[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 663us/step - loss: 0.0165
Test Loss: 0.0119638005271554
[1m405/405[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 571us/step
                           ID  Date  Hour  Participation_Phase  Demand_kWh  \
From                                                                         
2020-02-01 00:00:00+00:00  29    26     1                    0       1.925   
2020-02-01 00:00:00+00:00  29    26     1                    0       1.925   
2020-02-01 00:00:00+00:00  29    26     1                    0       1.925   
2020-02-01 00:00:00+00:00  29    26     1                    0       1.925   
2020-02-01 00:00:00+00:00  29    26     1                    0       1.925   
...                        ..   ...   ...                  ...         ...   
2020-03-20 23:00:00+00:00  19    74    24                    0       0.626   
2020-03-20 23:00:00+00:00  19    74    24                    0       0.626   
2020-03-20 23:00:00+0

In [16]:
df = forecast_df[forecast_df["forecast_data"].notna()]
df = df.drop(columns=["Temperature","Hour","Participation_Phase"])

df.to_csv(parent_directory+"/Neural_Network/predictions/phase_1_with_survey_data/cnn_lstm_phase_1_with_survey_method_2_80_20.csv",index=True)

50/50

In [17]:
df_features = ['Temperature',"home_size","electric_car","no_of_people","electrically_heated"]
df_target = ["Demand_kWh"]

X= phase_1[df_features].values
y=phase_1[df_target].values

X = X.reshape((X.shape[0], 1, X.shape[1])) 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=False)

In [18]:
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_train = scaler_X.fit_transform(X_train.reshape(-1, X_train.shape[1])).reshape(X_train.shape)
X_test = scaler_X.transform(X_test.reshape(-1, X_test.shape[1])).reshape(X_test.shape)

y_train = scaler_y.fit_transform(y_train.reshape(-1, 1)).flatten()
y_test = scaler_y.transform(y_test.reshape(-1, 1)).flatten()

In [19]:
model = Sequential()
model.add(LSTM(units=64, activation='relu', input_shape=(1, len(df_features)))) 
model.add(Dense(50, activation='relu'))
model.add(Dense(1))
learning_rate = 0.001
optimizer = Adam(learning_rate=learning_rate)

model.compile(optimizer=optimizer, loss='mse')

  super().__init__(**kwargs)


In [20]:
history = model.fit(X_train, y_train,epochs=10, batch_size=16, validation_data=(X_test, y_test))


Epoch 1/10
[1m2025/2025[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 0.0129 - val_loss: 0.0086
Epoch 2/10
[1m2025/2025[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 0.0094 - val_loss: 0.0081
Epoch 3/10
[1m2025/2025[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 0.0092 - val_loss: 0.0087
Epoch 4/10
[1m2025/2025[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 0.0090 - val_loss: 0.0083
Epoch 5/10
[1m2025/2025[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 0.0090 - val_loss: 0.0087
Epoch 6/10
[1m2025/2025[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 0.0089 - val_loss: 0.0100
Epoch 7/10
[1m2025/2025[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 0.0086 - val_loss: 0.0088
Epoch 8/10
[1m2025/2025[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 0.0086 - val_loss: 0.0115
Epoch 9/10
[1m2025/2025

In [21]:
loss = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss}')

forecast = model.predict(X_test)
y_pred = scaler_y.inverse_transform(forecast)

forecast_df = pd.DataFrame(data=y_pred, columns=['forecast_data'], index=phase_1.index[:len(y_pred)])

forecast_df = phase_1.join(forecast_df, how="left")

print(forecast_df)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mse)

print(f'MSE: {mse}')
print(f'MAE: {mae}')
print(f'RMSE: {rmse}')

[1m1013/1013[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 628us/step - loss: 0.0065
Test Loss: 0.010523049160838127
[1m1013/1013[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 729us/step
                           ID  Date  Hour  Participation_Phase  Demand_kWh  \
From                                                                         
2020-02-01 00:00:00+00:00  29    26     1                    0       1.925   
2020-02-01 00:00:00+00:00  29    26     1                    0       1.925   
2020-02-01 00:00:00+00:00  29    26     1                    0       1.925   
2020-02-01 00:00:00+00:00  29    26     1                    0       1.925   
2020-02-01 00:00:00+00:00  29    26     1                    0       1.925   
...                        ..   ...   ...                  ...         ...   
2020-03-20 23:00:00+00:00  19    74    24                    0       0.626   
2020-03-20 23:00:00+00:00  19    74    24                    0       0.626   
2020-03-20 23:0

In [22]:
df = forecast_df[forecast_df["forecast_data"].notna()]
df = df.drop(columns=["Temperature","Hour","Participation_Phase"])

df.to_csv(parent_directory+"/Neural_Network/predictions/phase_1_with_survey_data/cnn_lstm_phase_1_with_survey_method_2_50_50.csv",index=True)