In [1]:
import warnings
import numpy as np 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt 
# warnings.filterwarnings('ignore')

from Energy_Models import CNN_LSTM_V2
from Energy_Models import Evaluation_Metrix

from tensorflow.keras import Sequential
from sklearn.metrics import mean_squared_error
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, TimeDistributed




# Data Reading

In [2]:
#index >> date-time
df = pd.read_csv(r'household_power_consumption Preprocessed.csv',parse_dates=['date_time'], index_col= 'date_time')

# Data Analysis

In [3]:
#  group data by day
data = df.resample('D').sum() 

In [4]:
data.head()

Unnamed: 0_level_0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2006-12-16,1209.176,34.922,93552.53,5180.8,0.0,546.0,4926.0
2006-12-17,3390.46,226.006,345725.32,14398.6,2033.0,4187.0,13341.0
2006-12-18,2203.826,161.792,347373.64,9247.2,1063.0,2621.0,14018.0
2006-12-19,1666.194,150.942,348479.01,7094.0,839.0,7602.0,6197.0
2006-12-20,2225.748,160.998,348923.61,9313.0,0.0,2648.0,14063.0


# Data Preprocessing

In [5]:
# Compute remaining active power
df['sub_metering_remaining'] = (df.Global_active_power * 1000  / 60 ) - (df.Sub_metering_1 + df.Sub_metering_2 + df.Sub_metering_3)

In [6]:
# descriptive statistics
df.describe()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3,sub_metering_remaining
count,2075259.0,2075259.0,2075259.0,2075259.0,2075259.0,2075259.0,2075259.0,2075259.0
mean,1.089418,0.1236871,240.8364,4.618401,1.118474,1.291131,6.448635,9.298722
std,1.054678,0.1125933,3.240051,4.433165,6.14146,5.796922,8.433584,9.561278
min,0.076,0.0,223.2,0.2,0.0,0.0,0.0,-2.4
25%,0.308,0.048,238.99,1.4,0.0,0.0,0.0,3.8
50%,0.602,0.1,241.0,2.6,0.0,0.0,1.0,5.5
75%,1.526,0.194,242.87,6.4,0.0,1.0,17.0,10.36667
max,11.122,1.39,254.15,48.4,88.0,80.0,31.0,124.8333


In [7]:
# resample the data on daily basis
df = df.resample('D').mean()
df.shape

(1442, 8)

In [8]:
def train_test_split(df):
    
    # compute split point
    end_idx = df.shape[0]* 70 // 100
    
    train_data = df.iloc[:end_idx, : ]
    test_data = df.iloc[end_idx:, :]
    
    return train_data, test_data

In [9]:
# Split the data into train and test
X_train, X_test = train_test_split(df)

In [10]:
X_test.shape
X_train.shape

(1009, 8)

In [11]:
from sklearn.preprocessing import MinMaxScaler
def scale_data(train, test):
    scaler = MinMaxScaler().fit(train)
    return scaler.transform(train), scaler.transform(test), scaler

In [12]:
# Scale the data
X_train, X_test, scaler = scale_data(X_train, X_test)

In [13]:
def convert_to_supervised(df):

    input_features = []
    ouput_feature = []
    
    len_df = df.shape[0]
    
    for i in range(len_df):
        
        end_idx = i + 1 
        
        if end_idx > len_df-1:
            break
            
        input_x , output_y = df[i:end_idx, 1:], df[end_idx: end_idx+1, 0]
        
        input_features.append(input_x)
        ouput_feature.append(output_y)
    
    return np.array(input_features), np.mean(np.array(ouput_feature), axis=1)

In [14]:
# Split the training data into input features and out feature
X_train, Y_train = convert_to_supervised(X_train)
print('Shape of (training data) input features : %s and ouput feature %s' % (X_train.shape, Y_train.shape))

Shape of (training data) input features : (1008, 1, 7) and ouput feature (1008,)


In [15]:
# Split the testing data into input features and out feature
X_test, Y_test = convert_to_supervised(X_test)
print('Shape of (testing data) input features : %s and ouput feature %s' % (X_test.shape, Y_test.shape))

Shape of (testing data) input features : (432, 1, 7) and ouput feature (432,)


In [16]:
import tensorflow as tf
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)

## CNN-LSTM

In [18]:
cnn_lstm_model = CNN_LSTM.CNN_LSTM(1,7,1).getModel()
cnn_lstm_model.compile(optimizer="adam",loss="mse")

In [19]:
cnn_lstm_model.fit(X_train, Y_train, epochs= 100, verbose= 1 ,callbacks=callback)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100


<keras.callbacks.History at 0x2313b53edf0>

In [20]:
Evaluation_Metrix.print_metrics(Y_train, 
                                cnn_lstm_model.predict(X_train),
                                Y_test,
                                cnn_lstm_model.predict(X_test))

Train RMSE value   : 0.103 
Train MSE value    : 0.011 
Train R2 value     : 0.478 
Train MAE value    : 0.077 
---------------------------
Test RMSE value    : 0.080 
Test MSE value     : 0.006 
Test R2 value      : 0.416 
Test MAE value     : 0.062 
