# LSTM PREDICTION

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import os

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.metrics import mean_squared_error,r2_score
from tensorflow.keras.callbacks import TensorBoard

In [None]:
df = pd.read_csv(os.getcwd()+"/pre_processing/dataset_traite.csv", sep=',', parse_dates=["DateTime"])

In [None]:
df_lstm = df.set_index('DateTime')
df_lstm = df_lstm.resample('h').mean()
df_lstm['Date'] = pd.to_datetime(df_lstm.index.date)
temperatures = pd.read_csv("pre_processing/temperatures.csv", parse_dates=['Date'], index_col='Date')
temperatures['avg_t'] = (temperatures['max_t'] - temperatures['min_t'])/2
df_lstm = df_lstm.join(temperatures, how="left", on='Date')
df_lstm = df_lstm.drop(columns=['Date','max_t','min_t'])
#df_lstm["Day"] = df_lstm.index.dayofweek+1
#y = pd.get_dummies(df_lstm.Day, prefix='Day')
#df_lstm = df_lstm.join(y, how="left")
#df_lstm = df_lstm.drop(columns=["Day"])
df_lstm = df_lstm.drop(columns=["Global_reactive_power"])
df_lstm.head()

In [None]:
scaler = MinMaxScaler(feature_range=(-1,1))
df_lstm[df_lstm.columns] = scaler.fit_transform(df_lstm[df_lstm.columns])
df_lstm.describe()

In [None]:
df_lstm.head(3)

In [None]:
# def data_to_supervised(df, steps_ahead = 1, col_to_predict="Global_active_power"):
#     data = pd.DataFrame(df)
#     n_vars = data.shape[1]
#     columns = []
#     columns.append(data.shift(0))
#     # temp = data.shift(0)
#     columns.append(data.shift(-steps_ahead)[col_to_predict])
#     df_labeled = pd.concat(columns, axis=1)
#     names = [col + "(t-1)" for col in df.columns]
#     names.append(f"{col_to_predict}(t)")
#     print(names)
#     df_labeled.columns = names
#     df_labeled.dropna(inplace=True)
    
#     return df_labeled

def data_to_supervised(df, col_to_predict="Global_active_power"):
    data = pd.DataFrame(df)
    n_vars = data.shape[1]
    columns = []
    columns.append(data.shift(1))
    # temp = data.shift(0)
    columns.append(data.shift(0)[col_to_predict])
    df_labeled = pd.concat(columns, axis=1)
    names = [col + "(t-1)" for col in df.columns]
    names.append(f"{col_to_predict}(t)")
    print(names)
    df_labeled.columns = names
    df_labeled.dropna(inplace=True)
    
    return df_labeled

In [None]:
df_lstm = data_to_supervised(df_lstm)
df_lstm.head(5)

In [None]:
values = df_lstm.values

trainsize = 0.8
n_rows = round(len(values)*trainsize)
print(f"Taille du trainset : {n_rows}")

train = values[:n_rows, :]
test = values[n_rows:, :]

train_X, train_y = train[:, :-1], train[:, -1]
test_X, test_y = test[:, :-1], test[:, -1]

# reshape input to be 3D format as expected by LSTMs [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)

In [None]:
model = Sequential()
#recurrent_activation ='selu' # 1er place
#recurrent_activation ='softsign' # 2e place

model.add(LSTM(125, input_shape=(train_X.shape[1], train_X.shape[2]),recurrent_activation ='sigmoid',activation='tanh',return_sequences=False))
#model.add(LSTM(2*24,recurrent_activation ='sigmoid',activation='tanh',return_sequences=True))
#model.add(LSTM(2*24,recurrent_activation ='sigmoid',activation='tanh'))
#model.add(Dropout(0.2))
model.add(Dense(1))
#model.compile(loss='mean_squared_error', optimizer='adam')

model.compile(loss='mean_absolute_error', optimizer='adam')
#model.compile(optimizer=tensorflow.keras.optimizers.SGD(learning_rate=0.001),
             # loss=tensorflow.keras.losses.MeanSquaredError(),
             # metrics=['mse'])
model.summary()

In [None]:
# fit network
history = model.fit(train_X, train_y, epochs=75, batch_size=70, validation_data=(test_X, test_y), verbose=2, shuffle=False)

In [None]:
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.show()
size = df_lstm.shape[1]-1
# make a prediction
yhat = model.predict(test_X)
test_X = test_X.reshape((test_X.shape[0], size))
# invert scaling for forecast
inv_yhat = np.concatenate((yhat, test_X[:, 1-size:]), axis=1)
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:,0]
# invert scaling for actual
test_y = test_y.reshape((len(test_y), 1))
inv_y = np.concatenate((test_y, test_X[:, 1-size:]), axis=1)
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:,0]

In [None]:
# calculate RMSE
rmse = np.sqrt(mean_squared_error(inv_y, inv_yhat))
print('Test RMSE: %.3f' % rmse)

In [None]:
# print("test")

In [None]:
import sklearn
r2 = sklearn.metrics.r2_score(inv_y, inv_yhat)
print(r2)
r_adjusted = 1 - ( 1-r2 ) * ( len(test_y) - 1 ) / ( len(test_y) - test_X.shape[1] - 1 )
print(r_adjusted)

In [None]:
aa=[x for x in range(100)]
plt.figure(figsize=(40,10))
plt.plot(aa, inv_y[:100], marker='.', label="actual")
plt.plot(aa, inv_yhat[:100], 'r', label="prediction")
plt.ylabel(df.columns[1], size=15)
plt.xlabel('Time step for first 500 hours', size=15)
plt.legend(fontsize=15)
plt.show()

# LSTM by Day

In [None]:
df_lstm = df.set_index('DateTime')
df_lstm = df_lstm.resample('h').mean()
df_lstm['Date'] = pd.to_datetime(df_lstm.index.date)
temperatures = pd.read_csv("pre_processing/temperatures.csv", parse_dates=['Date'], index_col='Date')
temperatures['avg_t'] = (temperatures['max_t'] - temperatures['min_t'])/2
df_lstm = df_lstm.join(temperatures, how="left", on='Date')
df_lstm = df_lstm.drop(columns=['Date','max_t','min_t'])
#df_lstm["Day"] = df_lstm.index.dayofweek+1
#y = pd.get_dummies(df_lstm.Day, prefix='Day')
#df_lstm = df_lstm.join(y, how="left")
#df_lstm = df_lstm.drop(columns=["Day"])
df_lstm = df_lstm.drop(columns=["Global_reactive_power"])
df_lstm2 = df_lstm.resample('d').mean()
df_lstm2.head(5)

In [None]:
scaler2 = MinMaxScaler(feature_range=(-1,1))
df_lstm2[df_lstm2.columns] = scaler.fit_transform(df_lstm2[df_lstm2.columns])
df_lstm2.describe()

In [None]:
df_lstm2 = data_to_supervised(df_lstm2)
df_lstm2.head(3)

In [None]:
values = df_lstm2.values

trainsize = 0.8
n_rows = round(len(values)*trainsize)
print(f"Taille du trainset : {n_rows}")

train = values[:n_rows, :]
test = values[n_rows:, :]

train_X, train_y = train[:, :-1], train[:, -1]
test_X, test_y = test[:, :-1], test[:, -1]

# reshape input to be 3D format as expected by LSTMs [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)

In [None]:
model = Sequential()
model.add(LSTM(7, input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.summary()

In [None]:
#history = model.fit(train_X, train_y, epochs=75, batch_size=1, validation_data=(test_X, test_y), verbose=2, shuffle=False)

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.show()
size = df_lstm.shape[1]-1
# make a prediction
yhat = model.predict(test_X)
test_X = test_X.reshape((test_X.shape[0], size))
# invert scaling for forecast
inv_yhat = np.concatenate((yhat, test_X[:, 1-size:]), axis=1)
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:,0]
# invert scaling for actual
test_y = test_y.reshape((len(test_y), 1))
inv_y = np.concatenate((test_y, test_X[:, 1-size:]), axis=1)
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:,0]

In [None]:
# calculate RMSE
rmse = np.sqrt(mean_squared_error(inv_y, inv_yhat))
print('Test RMSE: %.3f' % rmse)

In [None]:
import sklearn
r2 = sklearn.metrics.r2_score(inv_y, inv_yhat)
print(r2)
r_adjusted = 1 - ( 1-r2 ) * ( len(test_y) - 1 ) / ( len(test_y) - test_X.shape[1] - 1 )
print(r_adjusted)

In [None]:
aa=[x for x in range(100)]
plt.figure(figsize=(40,10))
plt.plot(aa, inv_y[:100], marker='.', label="actual")
plt.plot(aa, inv_yhat[:100], 'r', label="prediction")
plt.ylabel(df.columns[1], size=15)
plt.xlabel('Time step for first 500 days', size=15)
plt.legend(fontsize=15)
plt.show()

# LSTM by minute

In [None]:
df_lstm = df.set_index('DateTime')
# df_lstm = df_lstm.resample('h').mean()
df_lstm['Date'] = pd.to_datetime(df_lstm.index.date)
temperatures = pd.read_csv("pre_processing/temperatures.csv", parse_dates=['Date'], index_col='Date')
temperatures['avg_t'] = (temperatures['max_t'] - temperatures['min_t'])/2
df_lstm = df_lstm.join(temperatures, how="left", on='Date')
df_lstm = df_lstm.drop(columns=['Date','max_t','min_t'])
#df_lstm["Day"] = df_lstm.index.dayofweek+1
#y = pd.get_dummies(df_lstm.Day, prefix='Day')
#df_lstm = df_lstm.join(y, how="left")
#df_lstm = df_lstm.drop(columns=["Day"])
df_lstm = df_lstm.drop(columns=["Global_reactive_power"])
# df_lstm2 = df_lstm.resample('d').mean()
# df_lstm2.head(5)

In [None]:
scaler = MinMaxScaler(feature_range=(-1,1))
df_lstm[df_lstm.columns] = scaler.fit_transform(df_lstm[df_lstm.columns])
df_lstm.describe()

In [None]:
df_lstm = data_to_supervised(df_lstm)
df_lstm.head(3)

In [None]:
values = df_lstm.values

trainsize = 0.80
n_rows = round(len(values)*trainsize)
print(f"Taille du trainset : {n_rows}")

train = values[:n_rows, :]
test = values[n_rows:, :]

train_X, train_y = train[:, :-1], train[:, -1]
test_X, test_y = test[:, :-1], test[:, -1]

# reshape input to be 3D format as expected by LSTMs [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)

In [None]:
model = Sequential()
model.add(LSTM(3*60, input_shape=(train_X.shape[1], train_X.shape[2])))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.summary()

In [None]:
history = model.fit(train_X, train_y, epochs=5, batch_size=100, validation_data=(test_X, test_y), verbose=2, shuffle=False)

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.show()

In [None]:
size = df_lstm.shape[1]-1
# make a prediction
yhat = model.predict(test_X)
test_X = test_X.reshape((test_X.shape[0], size))
# invert scaling for forecast
inv_yhat = np.concatenate((yhat, test_X[:, 1-size:]), axis=1)
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:,0]
# invert scaling for actual
test_y = test_y.reshape((len(test_y), 1))
inv_y = np.concatenate((test_y, test_X[:, 1-size:]), axis=1)
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:,0]

In [None]:
# calculate RMSE
rmse = np.sqrt(mean_squared_error(inv_y, inv_yhat))
print('Test RMSE: %.3f' % rmse)

In [None]:
import sklearn
r2 = sklearn.metrics.r2_score(inv_y, inv_yhat)
print(r2)
r_adjusted = 1 - ( 1-r2 ) * ( len(test_y) - 1 ) / ( len(test_y) - test_X.shape[1] - 1 )
print(r_adjusted)

In [None]:
nbr_step = 100
aa=[x for x in range(nbr_step)]
plt.figure(figsize=(40,10))
plt.plot(aa, inv_y[:nbr_step], marker='.', label="actual")
plt.plot(aa, inv_yhat[:nbr_step], 'r', label="prediction")
plt.ylabel(df.columns[1], size=15)
plt.xlabel(f'Time step for first {nbr_step} time units', size=15)
plt.legend(fontsize=15)
plt.show()