In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
import warnings
import tensorflow as tf
from Energy_Models import ConvLstm as CL
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('D:\College\Final\Gpr\Datasets\household_power_consumption.txt',sep=';', 
                 parse_dates={'date_time' : ['Date', 'Time']}, infer_datetime_format=True, 
                 low_memory=False, na_values=['nan','?'], index_col='date_time')

In [3]:
df.head()

Unnamed: 0_level_0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2006-12-16 17:24:00,4.216,0.418,234.84,18.4,0.0,1.0,17.0
2006-12-16 17:25:00,5.36,0.436,233.63,23.0,0.0,1.0,16.0
2006-12-16 17:26:00,5.374,0.498,233.29,23.0,0.0,2.0,17.0
2006-12-16 17:27:00,5.388,0.502,233.74,23.0,0.0,1.0,17.0
2006-12-16 17:28:00,3.666,0.528,235.68,15.8,0.0,1.0,17.0


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2075259 entries, 2006-12-16 17:24:00 to 2010-11-26 21:02:00
Data columns (total 7 columns):
 #   Column                 Dtype  
---  ------                 -----  
 0   Global_active_power    float64
 1   Global_reactive_power  float64
 2   Voltage                float64
 3   Global_intensity       float64
 4   Sub_metering_1         float64
 5   Sub_metering_2         float64
 6   Sub_metering_3         float64
dtypes: float64(7)
memory usage: 126.7 MB


In [5]:
df.shape

(2075259, 7)

In [6]:
df.isna().sum()

Global_active_power      25979
Global_reactive_power    25979
Voltage                  25979
Global_intensity         25979
Sub_metering_1           25979
Sub_metering_2           25979
Sub_metering_3           25979
dtype: int64

In [7]:
# filling missing values by the value of one day before 
def fill_missing(data):
    one_day = 24*60
    for row in range(data.shape[0]):
        for col in range(data.shape[1]):
            if np.isnan(data[row,col]):
                data[row,col] = data[row-one_day,col]

In [8]:
fill_missing(df.values)

In [9]:
df.isna().sum()

Global_active_power      0
Global_reactive_power    0
Voltage                  0
Global_intensity         0
Sub_metering_1           0
Sub_metering_2           0
Sub_metering_3           0
dtype: int64

In [10]:
df.to_csv('new_household_power_consumption.csv')

In [11]:
df = pd.read_csv('new_household_power_consumption.csv',parse_dates=['date_time'], index_col= 'date_time')

In [12]:
df['sub_metering_remaining'] = (df.Global_active_power * 1000  / 60 ) - (df.Sub_metering_1 + df.Sub_metering_2 + df.Sub_metering_3)
df.describe()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3,sub_metering_remaining
count,2075259.0,2075259.0,2075259.0,2075259.0,2075259.0,2075259.0,2075259.0,2075259.0
mean,1.089418,0.1236871,240.8364,4.618401,1.118474,1.291131,6.448635,9.298722
std,1.054678,0.1125933,3.240051,4.433165,6.14146,5.796922,8.433584,9.561278
min,0.076,0.0,223.2,0.2,0.0,0.0,0.0,-2.4
25%,0.308,0.048,238.99,1.4,0.0,0.0,0.0,3.8
50%,0.602,0.1,241.0,2.6,0.0,0.0,1.0,5.5
75%,1.526,0.194,242.87,6.4,0.0,1.0,17.0,10.36667
max,11.122,1.39,254.15,48.4,88.0,80.0,31.0,124.8333


In [13]:
# resample the data on daily basis
df = df.resample('D').mean()
df.shape

(1442, 8)

In [14]:
def train_test_split(df):
    
    # compute split point
    end_idx = df.shape[0]* 70 // 100
    
    train_data = df.iloc[:end_idx, : ]
    test_data = df.iloc[end_idx:, :]
    
    return train_data, test_data

In [406]:
# Split the data into train and test
X_train, X_test = train_test_split(df)

In [407]:
X_test.shape
X_train.shape

(1009, 8)

In [408]:
def scale_data(train, test):
    scaler = MinMaxScaler().fit(train)
    return scaler.transform(train), scaler.transform(test), scaler

In [409]:
X_train, X_test, scaler = scale_data(X_train, X_test)

In [410]:
def convert_to_supervised(df):

    input_features = []
    ouput_feature = []
    
    len_df = df.shape[0]
    
    for i in range(len_df):
        
        end_idx = i + 1 
        
        if end_idx > len_df-1:
            break
            
        input_x , output_y = df[i:end_idx, 1:], df[end_idx: end_idx+1, 0]
        
        input_features.append(input_x)
        ouput_feature.append(output_y)
    
    return np.array(input_features), np.mean(np.array(ouput_feature), axis=1)

In [411]:
# Split the training data into input features and out feature
X_train, Y_train = convert_to_supervised(X_train)
print('Shape of (training data) input features : %s and ouput feature %s' % (X_train.shape, Y_train.shape))

Shape of (training data) input features : (1008, 1, 7) and ouput feature (1008,)


In [412]:
# Split the testing data into input features and out feature
X_test, Y_test = convert_to_supervised(X_test)
print('Shape of (testing data) input features : %s and ouput feature %s' % (X_test.shape, Y_test.shape))

Shape of (testing data) input features : (432, 1, 7) and ouput feature (432,)


In [415]:
n_steps, n_features = X_train.shape[1], X_train.shape[2]
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10)
convlstm_model = CL.ConvLstm(n_steps,n_features).getModel()

In [416]:
X_train=X_train.reshape((X_train.shape[0],1,1,n_steps,X_train.shape[2]))
X_test=X_test.reshape((X_test.shape[0],1,1,n_steps,X_test.shape[2]))

In [417]:
convlstm_model.compile(optimizer='adam', loss ='mse')
history = convlstm_model.fit(X_train , Y_train, epochs=200, batch_size=256, verbose= 1,validation_split=0.3)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 15

Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200


In [418]:
y_pred_train = convlstm_model.predict(X_train)
y_pred_test  = convlstm_model.predict(X_test) 



In [420]:
convlstm_model.evaluate(X_train,Y_train)



0.010141022503376007

In [29]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, mean_squared_log_error, mean_absolute_percentage_error

def root_mean_squared_error(y_true, y_pred):    
    return np.sqrt(mean_squared_error(y_true, y_pred))

In [421]:
print('Train RMSE value for LSTM Model  : %.3f ' % root_mean_squared_error(Y_train, y_pred_train))
print('Train MSE value for LSTM Model  : %.3f ' % mean_squared_error(Y_train, y_pred_train))
print('Train R2 value for LSTM Model  : %.3f ' % r2_score(Y_train, y_pred_train))
print('Train MAPE value for LSTM Model  : %.3f ' % mean_absolute_percentage_error(Y_train, y_pred_train))
print('Train RMLSE value for LSTM Model  : %.3f ' % mean_squared_log_error(Y_train, y_pred_train))
print('Train MAE value for LSTM Model  : %.3f ' % mean_absolute_error(Y_train, y_pred_train))
print('---------------------------------------------')
print('Test RMSE value for LSTM Model  : %.3f ' % root_mean_squared_error(Y_test, y_pred_test))
print('Test MSE value for LSTM Model  : %.3f ' % mean_squared_error(Y_test, y_pred_test))
print('Test R2 value for LSTM Model  : %.3f ' % r2_score(Y_test, y_pred_test))
print('Test MAPE value for LSTM Model  : %.3f ' % mean_absolute_percentage_error(Y_test, y_pred_test))
print('Test RMLSE value for LSTM Model  : %.3f ' % mean_squared_log_error(Y_test, y_pred_test))
print('Test MAE value for LSTM Model  : %.3f ' % mean_absolute_error(Y_test, y_pred_test))

Train RMSE value for LSTM Model  : 0.101 
Train MSE value for LSTM Model  : 0.010 
Train R2 value for LSTM Model  : 0.498 
Train MAPE value for LSTM Model  : 146783796208.287 
Train RMLSE value for LSTM Model  : 0.006 
Train MAE value for LSTM Model  : 0.076 
---------------------------------------------
Test RMSE value for LSTM Model  : 0.079 
Test MSE value for LSTM Model  : 0.006 
Test R2 value for LSTM Model  : 0.429 
Test MAPE value for LSTM Model  : 0.257 
Test RMLSE value for LSTM Model  : 0.004 
Test MAE value for LSTM Model  : 0.061 


In [422]:
X_train, X_test = train_test_split(df)

In [423]:
X_train, X_test, scaler = scale_data(X_train, X_test)

In [424]:
def convert_to_supervised(df):

    input_features = []
    ouput_feature = []
    
    len_df = df.shape[0]
    
    for i in range(len_df):
        
        end_idx = i + 7
        
        if end_idx > len_df-7:
            break
            
        input_x , output_y = df[i:end_idx, 1:], df[end_idx: end_idx+7, 0]
        
        input_features.append(input_x)
        ouput_feature.append(output_y)
    
    return np.array(input_features), np.array(ouput_feature)

In [425]:
X_train, Y_train = convert_to_supervised(X_train)
print('Shape of (training data) input features : %s and ouput feature %s' % (X_train.shape, Y_train.shape))

Shape of (training data) input features : (996, 7, 7) and ouput feature (996, 7)


In [426]:
X_test, Y_test = convert_to_supervised(X_test)
print('Shape of (testing data) input features : %s and ouput feature %s' % (X_test.shape, Y_test.shape))

Shape of (testing data) input features : (420, 7, 7) and ouput feature (420, 7)


In [427]:
n_steps, n_features = X_train.shape[1], X_train.shape[2]
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10)
convlstm_model = CL.ConvLstm(n_steps,n_features,7).getModel()

In [428]:
X_train=X_train.reshape((X_train.shape[0],1,1,n_steps,X_train.shape[2]))
X_test=X_test.reshape((X_test.shape[0],1,1,n_steps,X_test.shape[2]))

In [429]:
convlstm_model.compile(optimizer='adam', loss ='mae')
history = convlstm_model.fit(X_train , Y_train, epochs=200, batch_size=256, verbose= 1,validation_split=0.3)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 15

In [430]:
y_pred_train = convlstm_model.predict(X_train)
y_pred_test  = convlstm_model.predict(X_test) 



In [432]:
print('Train RMSE value for LSTM Model  : %.3f ' % root_mean_squared_error(Y_train, y_pred_train))
print('Train MSE value for LSTM Model  : %.3f ' % mean_squared_error(Y_train, y_pred_train))
print('Train R2 value for LSTM Model  : %.3f ' % r2_score(Y_train, y_pred_train))
print('Train MAPE value for LSTM Model  : %.3f ' % mean_absolute_percentage_error(Y_train, y_pred_train))
#print('Train RMLSE value for LSTM Model  : %.3f ' % mean_squared_log_error(Y_train, y_pred_train))
print('Train MAE value for LSTM Model  : %.3f ' % mean_absolute_error(Y_train, y_pred_train))
print('---------------------------------------------')
print('Test RMSE value for LSTM Model  : %.3f ' % root_mean_squared_error(Y_test, y_pred_test))
print('Test MSE value for LSTM Model  : %.3f ' % mean_squared_error(Y_test, y_pred_test))
print('Test R2 value for LSTM Model  : %.3f ' % r2_score(Y_test, y_pred_test))
print('Test MAPE value for LSTM Model  : %.3f ' % mean_absolute_percentage_error(Y_test, y_pred_test))
#print('Test RMLSE value for LSTM Model  : %.3f ' % mean_squared_log_error(Y_test, y_pred_test))
print('Test MAE value for LSTM Model  : %.3f ' % mean_absolute_error(Y_test, y_pred_test))

Train RMSE value for LSTM Model  : 0.103 
Train MSE value for LSTM Model  : 0.011 
Train R2 value for LSTM Model  : 0.455 
Train MAPE value for LSTM Model  : 29462041405.988 
Train MAE value for LSTM Model  : 0.074 
---------------------------------------------
Test RMSE value for LSTM Model  : 0.081 
Test MSE value for LSTM Model  : 0.007 
Test R2 value for LSTM Model  : 0.408 
Test MAPE value for LSTM Model  : 0.264 
Test MAE value for LSTM Model  : 0.062 


In [433]:
convlstm_model.evaluate(X_train,Y_train)



0.07443315535783768

In [434]:
convlstm_model.evaluate(X_test,Y_test)



0.062288351356983185