# Power Consumption Prediction

### Load Dataset

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout

In [None]:
df = pd.read_csv('household_power_consumption.txt', sep=';', 
                 parse_dates={'dt' : ['Date', 'Time']}, infer_datetime_format=True, 
                 low_memory=False, na_values=['nan','?'], index_col='dt')
df.head()

In [None]:
df.info()

In [None]:
df.describe()

### Visualization

In [None]:
df['Global_intensity'].resample('D').agg(['mean', 'std']).plot(subplots=True, kind='line', title='Global_intensity resampled over day')
plt.show()

In [None]:
df['Global_active_power'].resample('D').agg(['mean', 'std']).plot(subplots=True, kind='line', title='Global_active_power resampled over day')
plt.show()

In [None]:
df['Global_reactive_power'].resample('D').agg(['mean', 'std']).plot(subplots=True, kind='line', title='Global_reactive_power resampled over day')
plt.show()

In [None]:
plt.figure(figsize=(16,8))
df['Global_active_power'].resample('M').mean().plot(kind='bar')
plt.xticks(rotation=60)
plt.ylabel('Global_active_power')
plt.title('Global_active_power per month')
plt.show()

In [None]:
plt.figure(figsize=(16,8))
df['Global_active_power'].resample('Q').mean().plot(kind='bar')
plt.xticks(rotation=60)
plt.ylabel('Global_active_power')
plt.title('Global_active_power per quarter')
plt.show()

In [None]:
plt.figure(figsize=(16,8))
df['Voltage'].resample('M').mean().plot(kind='bar')
plt.xticks(rotation=60)
plt.ylabel('Voltage')
plt.title('Voltage per month')
plt.show()

In [None]:
def plot_features(v):
    cols = [0, 1, 2, 3, 5, 6]
    i = 1
    values = df.resample(v).mean().values
    plt.figure(figsize=(15, 10))
    for c in cols:
        plt.subplot(len(cols), 1, i)
        plt.plot(values[:, c])
        plt.title(df.columns[c], y=0.75, loc='right')
        i += 1
    plt.show()

In [None]:
plot_features('D')

In [None]:
plot_features('M')

In [None]:
plot_features('Q')

In [None]:
df['Global_active_power'].resample('W').mean().plot(color='r', legend=True)
df['Global_reactive_power'].resample('W').mean().plot(color='b', legend=True)
df['Global_intensity'].resample('W').mean().plot(color='g', legend=True)
df['Sub_metering_1'].resample('W').mean().plot(color='y', legend=True)
plt.title("Weekly Mean")
plt.show()


In [None]:
df['Global_active_power'].resample('W').mean().plot(kind='hist', color='r', legend=True)
df['Global_reactive_power'].resample('W').mean().plot(kind='hist', color='b', legend=True)
df['Global_intensity'].resample('W').mean().plot(kind='hist', color='g', legend=True)
df['Sub_metering_1'].resample('W').mean().plot(kind='hist', color='y', legend=True)
plt.title("Monthly Mean")
plt.show()

In [None]:
plt.figure(figsize=(8,6))
sns.heatmap(df.corr(), annot=True, fmt= '.2f')

In [None]:
plt.figure(figsize=(8,6))
sns.heatmap(df.resample('M').mean().corr(), annot=True, fmt= '.2f')

In [None]:
plt.figure(figsize=(8,6))
sns.heatmap(df.resample('A').mean().corr(), annot=True, fmt= '.2f')

### Preprocess

In [None]:
def series_to_supervised(data):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = [], []
    for i in range(1, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    for i in range(0, 1):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
    concat = pd.concat(cols, axis=1)
    concat.columns = names
    concat.dropna(inplace=True)
    return concat

In [None]:
df_h = df.resample('h').mean() 
df_h.shape

In [None]:
scaler = MinMaxScaler(feature_range=(0,1))
data = scaler.fit_transform(df_h.values)

new_df = series_to_supervised(data)

In [None]:
new_df.head()

In [None]:
new_df.drop(new_df.columns[[8,9,10,11,12,13]], axis=1, inplace=True)

In [None]:
new_df.head()

In [None]:
X = new_df.drop(['var1(t)'], axis=1)
y = new_df['var1(t)']

split_time = 365*24
X_train = X[:split_time]
y_train = y[:split_time]
X_test = X[split_time:]
y_test = y[split_time:]

In [None]:
X_train = np.array(X_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

### Training

In [None]:
model = Sequential()
model.add(LSTM(100, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(50))
model.add(Dropout(0.2))
model.add(Dense(1))

model.compile(loss='mean_squared_error', optimizer='adam')

history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=16)

### Evaluation

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()

In [None]:
pred = model.predict(X_test)
pred = np.concatenate((pred, X_test[:, -6:, 0]), axis=1)
pred = scaler.inverse_transform(pred)
pred = pred[:, 0]

y_test = np.array(y_test).reshape((len(y_test), 1))
y_test = np.concatenate((y_test, X_test[:, -6:, 0]), axis=1)
y_test = scaler.inverse_transform(y_test)
y_test = y_test[:,0]

In [None]:
rms = np.sqrt(np.mean(np.power((pred-y_test), 2)))
print(rms)

In [None]:
aa = [x for x in range(500)]
plt.figure(figsize=(16,6))
plt.plot(aa, y_test[:500], marker='.', label="actual")
plt.plot(aa, pred[:500], 'r', label="prediction")
plt.ylabel('Global_active_power')
plt.xlabel('Time')
plt.legend()
plt.show()

In [None]:
'''
Inspiration
1. https://www.kaggle.com/amirrezaeian/time-series-data-analysis-using-lstm-tutorial
'''