# Energy Dataset Prediction Algorithms

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
%matplotlib inline

In [None]:
df_loaded = pd.read_csv("../Part3_Feature_Engineering/energydata_complete_transformed.csv")
df = df_loaded

# Creating Metrics Dataframe
metrics_df = pd.DataFrame(index = ['RSquared_train', 'RSquared_test', 'RMS_train', 'RMS_test', 'MAE_train', 'MAE_test', 'MAPE_train', 'MAPE_test'])

## Dividing Data into Training and Testing

In [None]:
X = df.drop(['Appliances'],axis=1)
y = df['Appliances']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [None]:
# Function to print metrics
def print_training_metrics(**kwargs):
    for name, value in kwargs.items():
        value = float("{0:.2f}".format(value))
        print('{0} for Training Dataset is: {1}'.format(name, value))
def print_testing_metrics(**kwargs):
    for name, value in kwargs.items():
        value = float("{0:.2f}".format(value))
        print('{0} for Testing Dataset is: {1}'.format(name, value))

# Function to print and add metrics to dataframe
def print_metrics(df, model, r2_train, rms_train, mae_train, mape_train, r2_test, rms_test, mae_test, mape_test):
    #print('R Squared for Training Data:',float("{0:.2f}".format(r2_train)))
    #print('R Squared for Testing Data:',float("{0:.2f}".format(r2_test)))
    #print('RMS for Training Data:',float("{0:.2f}".format(rms_train)))
    #print('RMS for Testing Data:',float("{0:.2f}".format(rms_test)))
    #print('MAE for Training Data:',float("{0:.2f}".format(mae_train)))
    #print('MAE for Testing Data:',float("{0:.2f}".format(mae_test)))
    #print('MAPE for Training Data:',float("{0:.2f}".format(mape_train)))
    #print('MAPE for Testing Data:',float("{0:.2f}".format(mape_test)))
    df[model] = [float("{0:.2f}".format(r2_train)), float("{0:.2f}".format(r2_test)),
                 float("{0:.2f}".format(rms_train)), float("{0:.2f}".format(rms_test)),
                 float("{0:.2f}".format(mae_train)), float("{0:.2f}".format(mae_test)),
                 float("{0:.2f}".format(mape_train)), float("{0:.2f}".format(mape_test))]
    return df

### Linear Regression Model

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from math import sqrt
from sklearn.metrics import mean_absolute_error

lr = LinearRegression()
lr.fit(X_train,y_train)

# Predicting and Calculating the Metrices for Prediction of Testing Dataset
prediction_test_lr = lr.predict(X_test)

r2_test_lr = r2_score(y_test, prediction_test_lr)
rms_test_lr = sqrt(mean_squared_error(y_test, prediction_test_lr))
mae_test_lr = mean_absolute_error(y_test,prediction_test_lr)
mape_test_lr = np.mean(np.abs((y_test - prediction_test_lr) / y_test)) * 100

# Predicting and Calculating the Metrices for Prediction of Training Dataset
prediction_train_lr = lr.predict(X_train)

r2_train_lr = r2_score(y_train, prediction_train_lr)
rms_train_lr = sqrt(mean_squared_error(y_train, prediction_train_lr))
mae_train_lr = mean_absolute_error(y_train,prediction_train_lr)
mape_train_lr = np.mean(np.abs((y_train - prediction_train_lr) / y_train)) * 100

# Printing the training and testing metrices
metrics_df = print_metrics(metrics_df, 'LR_Model', r2_train_lr, rms_train_lr, mae_train_lr, mape_train_lr, r2_test_lr, rms_test_lr, mae_test_lr, mape_test_lr)
metrics_df

### Random Forest Model

In [None]:
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(n_estimators = 500)
rf.fit(X_train, y_train)

# Predicting and Calculating the Metrices for Prediction of Testing Dataset
prediction_test_rf = rf.predict(X_test)
r2_test_rf = r2_score(y_test, prediction_test_rf)
rms_test_rf = sqrt(mean_squared_error(y_test, prediction_test_rf))
mae_test_rf = mean_absolute_error(y_test,prediction_test_rf)
mape_test_rf = np.mean(np.abs((y_test - prediction_test_rf) / y_test)) * 100

# Predicting and Calculating the Metrices for Prediction of Training Dataset
prediction_train_rf = rf.predict(X_train)
r2_train_rf = r2_score(y_train, prediction_train_rf)
rms_train_rf = sqrt(mean_squared_error(y_train, prediction_train_rf))
mae_train_rf = mean_absolute_error(y_train,prediction_train_rf)
mape_train_rf = np.mean(np.abs((y_train - prediction_train_rf) / y_train)) * 100

# Printing the training and testing metrices
print('Random Forest Model\n')
metrics_df = print_metrics(metrics_df, 'RF_Model', r2_train_rf, rms_train_rf, mae_train_rf, mape_train_rf, r2_test_rf, rms_test_rf, mae_test_rf, mape_test_rf)
metrics_df

### Neural Network Model

In [None]:
# Import Multi-Layer Perceptron Classifier Model
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(hidden_layer_sizes=(37,37,37))
mlp.fit(X_train,y_train)

# Predicting and Calculating the Metrices for Prediction of Testing Dataset
prediction_test_nn = mlp.predict(X_test)
r2_test_nn = r2_score(y_test, prediction_test_nn)
rms_test_nn = sqrt(mean_squared_error(y_test, prediction_test_nn))
mae_test_nn = mean_absolute_error(y_test,prediction_test_nn)
mape_test_nn = np.mean(np.abs((y_test - prediction_test_nn) / y_test)) * 100

# Predicting and Calculating the Metrices for Prediction of Training Dataset
prediction_train_nn = mlp.predict(X_train)
r2_train_nn = r2_score(y_train, prediction_train_nn)
rms_train_nn = sqrt(mean_squared_error(y_train, prediction_train_nn))
mae_train_nn = mean_absolute_error(y_train,prediction_train_nn)
mape_train_nn = np.mean(np.abs((y_train - prediction_train_nn) / y_train)) * 100

# Printing the training and testing metrices
print('Neural Network Model')
metrics_df = print_metrics(metrics_df, 'NN_Model', r2_train_nn, rms_train_nn, mae_train_nn, mape_train_nn, r2_test_nn, rms_test_nn, mae_test_nn, mape_test_nn)
metrics_df

**Chosen Model Fits Worse**
   * R2 compares the fit of the chosen model with that of a horizontal straight line (the null hypothesis). If the chosen model fits worse than a horizontal line, then R2 is negative. Therefore, Neural Network is not a suitable predictive model.