In [36]:
import numpy as np
import pandas as pd

In [37]:
import os

In [38]:
df = pd.read_csv('/content/ElectricityDeptDataset.csv')

In [39]:
df

Unnamed: 0,AT,V,AP,RH,PE
0,8.34,40.77,1010.84,90.01,480.48
1,23.64,58.49,1011.40,74.20,445.75
2,29.74,56.90,1007.15,41.91,438.76
3,19.07,49.69,1007.22,76.79,453.09
4,11.80,40.66,1017.13,97.20,464.43
...,...,...,...,...,...
9563,15.12,48.92,1011.80,72.93,462.59
9564,33.41,77.95,1010.30,59.72,432.90
9565,15.99,43.34,1014.20,78.66,465.96
9566,17.65,59.87,1018.58,94.65,450.93


In [40]:
df.rename(columns={'AT': 'Average Temperature', 'V': 'Exhaust Vacuum','AP': 'Ambient Pressure',
                   'RH': 'Relative Humidity ','PE': 'Net Hourly Electrical Energy Output'}, inplace=True)
df.head()

Unnamed: 0,Average Temperature,Exhaust Vacuum,Ambient Pressure,Relative Humidity,Net Hourly Electrical Energy Output
0,8.34,40.77,1010.84,90.01,480.48
1,23.64,58.49,1011.4,74.2,445.75
2,29.74,56.9,1007.15,41.91,438.76
3,19.07,49.69,1007.22,76.79,453.09
4,11.8,40.66,1017.13,97.2,464.43


In [41]:
df.corr()["Net Hourly Electrical Energy Output"].sort_values(ascending=False)

Net Hourly Electrical Energy Output    1.000000
Ambient Pressure                       0.518429
Relative Humidity                      0.389794
Exhaust Vacuum                        -0.869780
Average Temperature                   -0.948128
Name: Net Hourly Electrical Energy Output, dtype: float64

In [42]:
#Preparing and training the model

x = df.drop('Net Hourly Electrical Energy Output',axis = 1).values
y = df['Net Hourly Electrical Energy Output'].values
print(x)
print(y)

[[   8.34   40.77 1010.84   90.01]
 [  23.64   58.49 1011.4    74.2 ]
 [  29.74   56.9  1007.15   41.91]
 ...
 [  15.99   43.34 1014.2    78.66]
 [  17.65   59.87 1018.58   94.65]
 [  23.68   51.3  1011.86   71.24]]
[480.48 445.75 438.76 ... 465.96 450.93 451.67]


In [43]:
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size=0.25,random_state=42)
print(xtrain.shape)
print(xtest.shape)
print(ytrain.shape)
print(ytest.shape)


(7176, 4)
(2392, 4)
(7176,)
(2392,)


In [44]:
# Building and training neural network models

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping

In [45]:
# Iniialising ANN and adding layers

ann = Sequential()
ann.add(Dense(units=6, activation="relu"))  #Adding First Hidden Layer
ann.add(Dense(units=6, activation="relu"))  # Adding Second Hidden Layer
ann.add(Dense(units=1))   # Adding Output Layer

In [46]:
# compiling and training the ANN
ann.compile(optimizer='Adam', loss='mean_squared_error')

In [47]:
ann.fit(x=xtrain,y=ytrain, epochs=100, batch_size=32,validation_data=(xtest,ytest), callbacks=EarlyStopping(monitor='val_loss',patience=4))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100


<keras.callbacks.History at 0x7cbf9117b9a0>

In [49]:
# Evaluating the model and making predictions

ann.evaluate(xtrain, ytrain)



29.183244705200195

In [50]:
ann.evaluate(xtest, ytest)



27.030235290527344

In [51]:
predictions = ann.predict(xtest)
predictions_df = pd.DataFrame(np.ravel(predictions),columns=['Predictions'])
comparison_df = pd.concat([pd.DataFrame(ytest,columns=['Real values']), predictions_df],axis=1)
comparison_df



Unnamed: 0,Real values,Predictions
0,433.27,434.290405
1,438.16,434.809967
2,458.42,465.119415
3,480.82,478.350067
4,441.41,437.322296
...,...,...
2387,446.70,451.706665
2388,430.82,425.970062
2389,440.10,435.413574
2390,439.39,435.862457


As we can see in abouve comparison dataframe, our model's predictions pretty well closer to actual value.

In [52]:
# Let's evaluate the model performance

from sklearn import metrics
print("MAE:",metrics.mean_absolute_error(ytest,predictions))
print ("MSE:",metrics.mean_squared_error(ytest,predictions))
print("RMSE:",np.sqrt(metrics.mean_squared_error(ytest,predictions)))

MAE: 4.099064737275293
MSE: 27.03023186733069
RMSE: 5.199060671633934


The Mean Absolute Error is just 3.6 which is so small which shows that the model is almost as the actual values.

In [53]:
# R^2
metrics.explained_variance_score(ytest,predictions)

0.9132570186452051

This shows our model predict 93% of the target correctly.