In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, LSTM, TimeDistributed
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from tensorflow.keras.optimizers import Adam


In [2]:
parent_directory = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
main_directory = os.path.dirname(parent_directory)
phase_2 =  pd.read_csv(main_directory + '/data/phase_2_data_with_survey.csv')
phase_2.head()

Unnamed: 0,ID,From,Date,Hour,Participation_Phase,Demand_kWh,Temperature,home_size,electric_car,no_of_people,electrically_heated
0,Exp_3928,2020-12-01 00:00:00+00:00,2020-12-01,1,Phase_2,4.43,4.2,120-159 m2,No,4,True
1,Exp_3928,2020-12-01 01:00:00+00:00,2020-12-01,2,Phase_2,4.5,4.2,120-159 m2,No,4,True
2,Exp_3928,2020-12-01 02:00:00+00:00,2020-12-01,3,Phase_2,5.44,3.9,120-159 m2,No,4,True
3,Exp_3928,2020-12-01 03:00:00+00:00,2020-12-01,4,Phase_2,4.5,3.1,120-159 m2,No,4,True
4,Exp_3928,2020-12-01 04:00:00+00:00,2020-12-01,5,Phase_2,4.48,2.4,120-159 m2,No,4,True


In [3]:
unique_ids = phase_2["ID"].unique()[:500]
phase_2 = phase_2[phase_2["ID"].isin(unique_ids)]

In [4]:
# phase_1['From'] = pd.to_datetime(phase_1['From'])
# phase_1['Day'] = phase_1['From'].dt.day
# phase_1['Month'] = phase_1['From'].dt.month
# phase_1['Weekday'] = phase_1['From'].dt.weekday
phase_2.set_index('From',inplace=True)

phase_2.head()

Unnamed: 0_level_0,ID,Date,Hour,Participation_Phase,Demand_kWh,Temperature,home_size,electric_car,no_of_people,electrically_heated
From,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-12-01 00:00:00+00:00,Exp_3928,2020-12-01,1,Phase_2,4.43,4.2,120-159 m2,No,4,True
2020-12-01 01:00:00+00:00,Exp_3928,2020-12-01,2,Phase_2,4.5,4.2,120-159 m2,No,4,True
2020-12-01 02:00:00+00:00,Exp_3928,2020-12-01,3,Phase_2,5.44,3.9,120-159 m2,No,4,True
2020-12-01 03:00:00+00:00,Exp_3928,2020-12-01,4,Phase_2,4.5,3.1,120-159 m2,No,4,True
2020-12-01 04:00:00+00:00,Exp_3928,2020-12-01,5,Phase_2,4.48,2.4,120-159 m2,No,4,True


In [4]:
def convert_range_to_avg(range_str):
    try:
        if 'm2' in range_str:
            range_str = range_str.replace(' m2', '')
        if '-' in range_str:
            numbers = range_str.split('-')
            avg_value = (float(numbers[0]) + float(numbers[1])) / 2
        elif 'or larger' in range_str:
            number = range_str.replace(' or larger', '')
            avg_value = float(number)
        else:
            avg_value = float(range_str)
        return avg_value
    except ValueError:
        return np.nan

phase_2['home_size'] = phase_2['home_size'].apply(convert_range_to_avg)

label_encoders = {}
for column in phase_2.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    phase_2[column] = le.fit_transform(phase_2[column])
    label_encoders[column] = le

phase_2.head()

Unnamed: 0,ID,From,Date,Hour,Participation_Phase,Demand_kWh,Temperature,home_size,electric_car,no_of_people,electrically_heated
0,406,0,0,1,0,4.43,4.2,139.5,0,4,True
1,406,1,0,2,0,4.5,4.2,139.5,0,4,True
2,406,2,0,3,0,5.44,3.9,139.5,0,4,True
3,406,3,0,4,0,4.5,3.1,139.5,0,4,True
4,406,4,0,5,0,4.48,2.4,139.5,0,4,True


75/25

In [6]:
df_features = ['Temperature']
df_target = ["Demand_kWh"]

X= phase_2[df_features].values
y=phase_2[df_target].values

X = X.reshape((X.shape[0], 1, X.shape[1])) 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)

In [7]:
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_train = scaler_X.fit_transform(X_train.reshape(-1, X_train.shape[1])).reshape(X_train.shape)
X_test = scaler_X.transform(X_test.reshape(-1, X_test.shape[1])).reshape(X_test.shape)

y_train = scaler_y.fit_transform(y_train.reshape(-1, 1)).flatten()
y_test = scaler_y.transform(y_test.reshape(-1, 1)).flatten()

In [8]:
model = Sequential()
model.add(LSTM(units=64, activation='relu', input_shape=(1, len(df_features)))) 
model.add(Dense(50, activation='relu'))
model.add(Dense(1))
learning_rate = 0.001
optimizer = Adam(learning_rate=learning_rate)

model.compile(optimizer=optimizer, loss='mse')

  super().__init__(**kwargs)


In [9]:
history = model.fit(X_train, y_train,epochs=10, batch_size=16, validation_data=(X_test, y_test))


Epoch 1/10
[1m65250/65250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 1ms/step - loss: 0.0096 - val_loss: 0.0099
Epoch 2/10
[1m65250/65250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 1ms/step - loss: 0.0095 - val_loss: 0.0101
Epoch 3/10
[1m65250/65250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 1ms/step - loss: 0.0095 - val_loss: 0.0102
Epoch 4/10
[1m65250/65250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 1ms/step - loss: 0.0095 - val_loss: 0.0099
Epoch 5/10
[1m65250/65250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 1ms/step - loss: 0.0095 - val_loss: 0.0103
Epoch 6/10
[1m65250/65250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 1ms/step - loss: 0.0095 - val_loss: 0.0105
Epoch 7/10
[1m65250/65250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 1ms/step - loss: 0.0095 - val_loss: 0.0102
Epoch 8/10
[1m65250/65250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 1ms/step - loss: 0.0096 - val_loss: 0.0096


In [10]:
loss = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss}')

forecast = model.predict(X_test)
y_pred = scaler_y.inverse_transform(forecast)

forecast_df = pd.DataFrame(data=y_pred, columns=['forecast_data'], index=phase_2.index[:len(y_pred)])

forecast_df = phase_2.join(forecast_df, how="left")

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mse)

print(f'MSE: {mse}')
print(f'MAE: {mae}')
print(f'RMSE: {rmse}')

[1m10875/10875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 790us/step - loss: 0.0094
Test Loss: 0.010143236257135868
[1m10875/10875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 748us/step
MSE: 9.87396081034679
MAE: 3.0979304513199084
RMSE: 3.1422859211642074


In [11]:
df = forecast_df[forecast_df["forecast_data"].notna()]
df = df.drop(columns=["Temperature","Hour","Participation_Phase"])

df.to_csv(parent_directory+"/Neural_Network/predictions/phase_2_with_survey_data/cnn_lstm_phase_2_with_survey_method_2_75_25.csv",index=True)

80/20

In [12]:
df_features = ['Temperature']
df_target = ["Demand_kWh"]

X= phase_2[df_features].values
y=phase_2[df_target].values

X = X.reshape((X.shape[0], 1, X.shape[1])) 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [13]:
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_train = scaler_X.fit_transform(X_train.reshape(-1, X_train.shape[1])).reshape(X_train.shape)
X_test = scaler_X.transform(X_test.reshape(-1, X_test.shape[1])).reshape(X_test.shape)

y_train = scaler_y.fit_transform(y_train.reshape(-1, 1)).flatten()
y_test = scaler_y.transform(y_test.reshape(-1, 1)).flatten()

In [14]:
model = Sequential()
model.add(LSTM(units=64, activation='relu', input_shape=(1, len(df_features)))) 
model.add(Dense(50, activation='relu'))
model.add(Dense(1))
learning_rate = 0.001
optimizer = Adam(learning_rate=learning_rate)

model.compile(optimizer=optimizer, loss='mse')

  super().__init__(**kwargs)


In [15]:
history = model.fit(X_train, y_train,epochs=10, batch_size=16, validation_data=(X_test, y_test))


Epoch 1/10
[1m69600/69600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 1ms/step - loss: 0.0095 - val_loss: 0.0103
Epoch 2/10
[1m69600/69600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 1ms/step - loss: 0.0094 - val_loss: 0.0103
Epoch 3/10
[1m69600/69600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 1ms/step - loss: 0.0094 - val_loss: 0.0101
Epoch 4/10
[1m69600/69600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 1ms/step - loss: 0.0094 - val_loss: 0.0102
Epoch 5/10
[1m69600/69600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 1ms/step - loss: 0.0094 - val_loss: 0.0104
Epoch 6/10
[1m69600/69600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 1ms/step - loss: 0.0094 - val_loss: 0.0108
Epoch 7/10
[1m69600/69600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 1ms/step - loss: 0.0094 - val_loss: 0.0103
Epoch 8/10
[1m69600/69600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 1ms/step - loss: 0.0094 - val_loss: 0.0104


In [16]:
loss = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss}')

forecast = model.predict(X_test)
y_pred = scaler_y.inverse_transform(forecast)

forecast_df = pd.DataFrame(data=y_pred, columns=['forecast_data'], index=phase_2.index[:len(y_pred)])

forecast_df = phase_2.join(forecast_df, how="left")

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mse)

print(f'MSE: {mse}')
print(f'MAE: {mae}')
print(f'RMSE: {rmse}')

[1m8700/8700[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 790us/step - loss: 0.0098
Test Loss: 0.010551730170845985
[1m8700/8700[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 815us/step
MSE: 9.745334487288934
MAE: 3.0777564092818896
RMSE: 3.1217518298687574


In [17]:
df = forecast_df[forecast_df["forecast_data"].notna()]
df = df.drop(columns=["Temperature","Hour","Participation_Phase"])

df.to_csv(parent_directory+"/Neural_Network/predictions/phase_2_with_survey_data/cnn_lstm_phase_2_with_survey_method_2_80_20.csv",index=True)

50/50

In [5]:
df_features = ['Temperature']
df_target = ["Demand_kWh"]

X= phase_2[df_features].values
y=phase_2[df_target].values

X = X.reshape((X.shape[0], 1, X.shape[1])) 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=False)

In [6]:
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_train = scaler_X.fit_transform(X_train.reshape(-1, X_train.shape[1])).reshape(X_train.shape)
X_test = scaler_X.transform(X_test.reshape(-1, X_test.shape[1])).reshape(X_test.shape)

y_train = scaler_y.fit_transform(y_train.reshape(-1, 1)).flatten()
y_test = scaler_y.transform(y_test.reshape(-1, 1)).flatten()

In [7]:
model = Sequential()
model.add(LSTM(units=64, activation='relu', input_shape=(1, len(df_features)))) 
model.add(Dense(50, activation='relu'))
model.add(Dense(1))
learning_rate = 0.001
optimizer = Adam(learning_rate=learning_rate)

model.compile(optimizer=optimizer, loss='mse')

  super().__init__(**kwargs)


In [8]:
history = model.fit(X_train, y_train,epochs=10, batch_size=16, validation_data=(X_test, y_test))


Epoch 1/10
[1m43500/43500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 2ms/step - loss: 0.0104 - val_loss: 0.0113
Epoch 2/10
[1m43500/43500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 2ms/step - loss: 0.0102 - val_loss: 0.0114
Epoch 3/10
[1m43500/43500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 2ms/step - loss: 0.0102 - val_loss: 0.0112
Epoch 4/10
[1m43500/43500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 2ms/step - loss: 0.0103 - val_loss: 0.0113
Epoch 5/10
[1m43500/43500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 2ms/step - loss: 0.0102 - val_loss: 0.0114
Epoch 6/10
[1m43500/43500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 2ms/step - loss: 0.0103 - val_loss: 0.0113
Epoch 7/10
[1m43500/43500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 2ms/step - loss: 0.0102 - val_loss: 0.0112
Epoch 8/10
[1m43500/43500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 2ms/step - loss: 0.0102 - val_loss: 0.0113


In [9]:
loss = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss}')

forecast = model.predict(X_test)
y_pred = scaler_y.inverse_transform(forecast)

forecast_df = pd.DataFrame(data=y_pred, columns=['forecast_data'], index=phase_2.index[:len(y_pred)])

forecast_df = phase_2.join(forecast_df, how="left")

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mse)

print(f'MSE: {mse}')
print(f'MAE: {mae}')
print(f'RMSE: {rmse}')

[1m21750/21750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 749us/step - loss: 0.0110
Test Loss: 0.011358583346009254
[1m21750/21750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 758us/step
MSE: 9.302427122634509
MAE: 3.0065416981051096
RMSE: 3.049988052867504


In [10]:
df = forecast_df[forecast_df["forecast_data"].notna()]
df = df.drop(columns=["Temperature","Hour","Participation_Phase"])

df.to_csv(parent_directory+"/Neural_Network/predictions/phase_2_with_survey_data/cnn_lstm_phase_2_with_survey_method_2_50_50.csv",index=True)