[34m______  ___     ______                        _________
___   |/  /________  /_______ _______ ______________  /
__  /|_/ /_  __ \_  __ \  __ `/_  __ `__ \  _ \  __  / 
_  /  / / / /_/ /  / / / /_/ /_  / / / / /  __/ /_/ /  
/_/  /_/  \____//_/ /_/\__,_/ /_/ /_/ /_/\___/\__,_/   
                                                       
_____________ _________      ___________        _________            
___    |__  /_______  /_____ ___  /__  /_______ ______  /____________
__  /| |_  __ \  __  /_  __ `/_  /__  //_/  __ `/  __  /_  _ \_  ___/
_  ___ |  /_/ / /_/ / / /_/ /_  / _  ,<  / /_/ // /_/ / /  __/  /    
/_/  |_/_.___/\__,_/  \__,_/ /_/  /_/|_| \__,_/ \__,_/  \___//_/     
                                                                     
[0m


#### 

# Evaluation Metrics.

### Evaluation Metrics are used to evaluate our models by calculating its accuracy or its loss in different ways, I'll go over a few of them

### 1- Mean absolute error (MAE) -> This is a loss function 
#### Calculated by ∑ |output - predicted value| / samples_number

### 2- Mean absolute percentage error (MAPE) -> This is a loss function 
#### Calculated by ∑ |100*(output - predicted value)/output| / samples_number

### 3- Mean squared error (MSE)  -> This is a loss function
#### Calculated by ∑ (output - predicted value)^2 / samples_number

### 4- Root mean squared error (RMSE)  -> This is a loss function
#### Calculated by ∑ sqrt((output - predicted value)^2) / samples_number

### 5- Root mean squared logarithmic error (RMSLE) -> This is a loss function
#### Calculated by ∑ log(sqrt((output - predicted value)^2)) / samples_number

### 6- Coefficient of determination (R2) -> This is an accuracy function
### 
### Others functions ...
#### 7- Symmetric mean absolute percentage error (sMAPE)
#### 8- Mean absolute scaled error (MASE) 
#### 9- Mean squared prediction error (MSPE)
#### 10- Mean directional accuracy (MDA) 
#### 11- Median absolute deviation (MAD)

#### I have not found functions from 7:11 in sklearn so i'm not sure if they are built-in or can be calculated manually


##### ref : https://www.analyticsvidhya.com/blog/2021/05/know-the-best-evaluation-metrics-for-your-regression-model/
##### ref : https://en.wikipedia.org/wiki/Mean_absolute_percentage_error


### Import libraries

In [2]:
import pandas as pd
import numpy as np

from pandas import DataFrame
from pandas import concat

from sklearn.preprocessing import MinMaxScaler

from numpy import array
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import TimeDistributed

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from math import sqrt
from math import log
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_percentage_error

### Data preprocessing

In [3]:
df = pd.read_csv('household_power_consumption  new  .csv',parse_dates=['datetime'], index_col= 'datetime')

In [4]:
df.head()

Unnamed: 0_level_0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3,sub_metering_4
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2006-12-17 00:00:00,1.044,0.152,242.73,4.4,0.0,2.0,0.0,15.4
2006-12-17 00:01:00,1.52,0.22,242.2,7.4,0.0,1.0,0.0,24.333334
2006-12-17 00:02:00,3.038,0.194,240.14,12.6,0.0,2.0,0.0,48.633335
2006-12-17 00:03:00,2.974,0.194,239.97,12.4,0.0,1.0,0.0,48.566666
2006-12-17 00:04:00,2.846,0.198,240.39,11.8,0.0,2.0,0.0,45.433334


### data preparation

In [5]:
def train_test_split(df):
    
    # compute split point
    end_idx = df.shape[0]* 70 // 100
    
    train_data = df.iloc[:end_idx, : ]
    test_data = df.iloc[end_idx:, :]
    
    return train_data, test_data

train, test = train_test_split(df.iloc[:1000,:])

In [6]:
def scale_data(train, test):
    scaler = MinMaxScaler().fit(train)
    return scaler.transform(train), scaler.transform(test), scaler

train, test, scaler = scale_data(train, test)

In [7]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [8]:
data_train = series_to_supervised(list(DataFrame(train).iloc[:,0]),6)
data_train = DataFrame(data_train)
X_train = np.array(data_train.iloc[:,0:6])
y_train = np.array(data_train.iloc[:,6])

In [9]:
X_train.shape

(694, 6)

## Time Distributed

### 1- One to one

In [10]:
# prepare sequence
length = X_train.shape[0]

X = X_train.reshape(694, 6, 1)
y = y_train.reshape(694, 1)
# define LSTM configuration
n_neurons = 200
n_batch = 200
n_epoch = 800
# create LSTM
model = Sequential()
model.add(LSTM(n_neurons, input_shape=(6,1)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
print(model.summary())
# train LSTM
model.fit(X, y, epochs=n_epoch, batch_size=n_batch, verbose=2)
# evaluate
result = model.predict(X, batch_size=n_batch, verbose=0)

print(50 * "-")

for x_value , y_value in zip(X,y):
    print(f"X -> \n{x_value}\ny -> \n{y_value}" ) 

print(50 * "-")

print("Real value | Predicted value")
for real_value , predicted_value in zip(X,result):
    print(f"{str(round(real_value.flatten()[0],5)).center(12)}|{str(round(predicted_value.flatten()[0],5)).center(12)}")

print("\n",50 * "-","\n")

print("Mean absolute error ----------->", mean_absolute_error(y.flatten(),result.flatten()))
print("Mean squared error ------------>", mean_squared_error(y.flatten(),result.flatten()))
print("Root mean squared error ------->", sqrt(mean_squared_error(y.flatten(),result.flatten())))
print("Root mean squared log error --->", log(sqrt(mean_squared_error(y.flatten(),result.flatten()))),sep="")
print("R squared accuracy ------------>", r2_score(y.flatten(),result.flatten()) , " |  Note! This is the accuracy not the loss.")

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 200)               161600    
                                                                 
 dense (Dense)               (None, 1)                 201       
                                                                 
Total params: 161,801
Trainable params: 161,801
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/800
4/4 - 2s - loss: 0.0830 - 2s/epoch - 534ms/step
Epoch 2/800
4/4 - 0s - loss: 0.0276 - 60ms/epoch - 15ms/step
Epoch 3/800
4/4 - 0s - loss: 0.0320 - 59ms/epoch - 15ms/step
Epoch 4/800
4/4 - 0s - loss: 0.0251 - 60ms/epoch - 15ms/step
Epoch 5/800
4/4 - 0s - loss: 0.0237 - 57ms/epoch - 14ms/step
Epoch 6/800
4/4 - 0s - loss: 0.0253 - 55ms/epoch - 14ms/step
Epoch 7/800
4/4 - 0s - loss: 0.0235 - 57ms/epoch - 14ms/step
Epoch 8/800
4/4

#### 

### 2- Many to one

#### - Mean absolute error

In [11]:
length = X_train.shape[0]

X = X_train.reshape(1,694, 6)
y = y_train.reshape(1,694)
# define LSTM configuration
n_neurons = 200
n_batch = 200
n_epoch = 800

model = Sequential()
model.add(LSTM(n_neurons, input_shape=(694,6)))
model.add(Dense(length))
model.compile(loss='mean_squared_error', optimizer='adam')
print(model.summary())
# train LSTM
model.fit(X, y, epochs=n_epoch, batch_size=n_batch, verbose=2)
# evaluate
result = model.predict(X, batch_size=n_batch, verbose=0)

print(50 * "-")

for x_value , y_value in zip(X,y):
    print(f"X -> \n{x_value} \ny -> \n{y_value}" ) 

print("\n",50 * "-","\n")

print("Mean absolute error ----------->", mean_absolute_error(y.flatten(),result.flatten()))
print("Mean squared error ------------>", mean_squared_error(y.flatten(),result.flatten()))
print("Root mean squared error ------->", sqrt(mean_squared_error(y.flatten(),result.flatten())))
print("Root mean squared log error --->", log(sqrt(mean_squared_error(y.flatten(),result.flatten()))),sep="")
print("R squared accuracy ------------>", r2_score(y.flatten(),result.flatten()) , " |  Note! This is the accuracy not the loss.")

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_1 (LSTM)               (None, 200)               165600    
                                                                 
 dense_1 (Dense)             (None, 694)               139494    
                                                                 
Total params: 305,094
Trainable params: 305,094
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/800
1/1 - 2s - loss: 0.1213 - 2s/epoch - 2s/step
Epoch 2/800
1/1 - 0s - loss: 0.1136 - 169ms/epoch - 169ms/step
Epoch 3/800
1/1 - 0s - loss: 0.1060 - 169ms/epoch - 169ms/step
Epoch 4/800
1/1 - 0s - loss: 0.0968 - 170ms/epoch - 170ms/step
Epoch 5/800
1/1 - 0s - loss: 0.0843 - 174ms/epoch - 174ms/step
Epoch 6/800
1/1 - 0s - loss: 0.0658 - 169ms/epoch - 169ms/step
Epoch 7/800
1/1 - 0s - loss: 0.0424 - 169ms/epoch - 169ms/step
Epoc

### 3- Many to many

In [12]:
length = X_train.shape[0]

X = X_train.reshape(1,694, 6)
y = y_train.reshape(1,694,1)
# define LSTM configuration
n_neurons = 200
n_batch = 200
n_epoch = 800

model = Sequential()
model.add(LSTM(n_neurons, input_shape=(694, 6), return_sequences=True))
model.add(TimeDistributed(Dense(1)))
model.compile(loss='mean_squared_error', optimizer='adam')
print(model.summary())
# train LSTM
model.fit(X, y, epochs=n_epoch, batch_size=n_batch, verbose=2)
# evaluate
result = model.predict(X, batch_size=n_batch, verbose=0)

print(50 * "-")

for x_value , y_value in zip(X,y):
    print(f"X -> \n{x_value} \ny -> \n{y_value}" ) 

print("\n",50 * "-","\n")

print("Mean absolute error ----------->", mean_absolute_error(y.flatten(),result.flatten()))
print("Mean squared error ------------>", mean_squared_error(y.flatten(),result.flatten()))
print("Root mean squared error ------->", sqrt(mean_squared_error(y.flatten(),result.flatten())))
print("Root mean squared log error --->", log(sqrt(mean_squared_error(y.flatten(),result.flatten()))))
print("R squared accuracy ------------>", r2_score(y.flatten(),result.flatten()) , " |  Note! This is the accuracy not the loss.")

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 694, 200)          165600    
                                                                 
 time_distributed (TimeDistr  (None, 694, 1)           201       
 ibuted)                                                         
                                                                 
Total params: 165,801
Trainable params: 165,801
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/800
1/1 - 2s - loss: 0.1572 - 2s/epoch - 2s/step
Epoch 2/800
1/1 - 0s - loss: 0.0574 - 175ms/epoch - 175ms/step
Epoch 3/800
1/1 - 0s - loss: 0.0300 - 173ms/epoch - 173ms/step
Epoch 4/800
1/1 - 0s - loss: 0.0558 - 174ms/epoch - 174ms/step
Epoch 5/800
1/1 - 0s - loss: 0.0487 - 175ms/epoch - 175ms/step
Epoch 6/800
1/1 - 0s - loss: 0.0343 - 171ms/epoch - 171ms/step
E

#### 

[34m __  
 \ \ 
(_) |
 _| |
(_) |
 /_/ 
[0m
