In [None]:
#First step :- Load libraries
from keras.models import Sequential
from keras.layers import Dense      #neural net layers
import pandas as pd                  
from sklearn.model_selection import train_test_split   #to spit dataset into train,validation,test 
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

import tensorflow as tf

!pip install -q xlrd           #library to read from excel sheet
import io                      #input-output = io
from google.colab import files #important to locally save files
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth    #to use Google Cloud SDK
from pydrive.drive import GoogleDrive  
from google.colab import auth           #to authenticate gmail ID
from oauth2client.client import GoogleCredentials

auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()   #to authenticate gmail ID in Google Cloud SDK
drive = GoogleDrive(gauth)

import math

from IPython import display
from matplotlib import cm
from matplotlib import gridspec
import numpy as np
from sklearn import metrics

# to find MSE of overall data
def mse(actual, pred): 
    actual, pred = np.array(actual), np.array(pred)
    return np.square(np.subtract(actual,pred)).mean()


def rmse(actual, pred): 
    actual, pred = np.array(actual), np.array(pred)
    return np.sqrt(np.square(np.subtract(actual,pred)).mean())

In [None]:
#Store the data set
file_id = '1oQw9vdiTsmsvOayjpskpVj_ZvBLPCV' #file key of google sheet

downloaded = drive.CreateFile({'id': file_id}) 
downloaded.GetContentFile('data.xlsx')   

df = pd.read_excel('data.xlsx')

dataset = df.values                        #Convert the data into an array
inputs  = dataset[1:212,0:4]               #set input data 
targets = dataset[1:212,4:5]               #set output data

In [None]:
#How to split this into train & test data?
X_train, x_test, Y_train, y_test = train_test_split(inputs, targets, test_size=0.2, random_state = 4)   
#Split train into train & validation(80% train, 20% test, 20% val)
x_train, x_val, y_train, y_val = train_test_split(X_train, Y_train, test_size=0.25, random_state=1) 
#Why 0.25? Because 0.25 x 0.8 = 0.2 

#from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(x_train)   #Standardizing all the values
X_test = sc.fit_transform(x_test)
Y_train = sc.fit_transform(y_train)
Y_test = sc.fit_transform(y_test)
INPUTS = sc.fit_transform(inputs)
TARGETS = sc.fit_transform(targets)
X_val = sc.fit_transform(x_val)
Y_val = sc.fit_transform(y_val)

In [None]:
tf.keras.backend.clear_session() #close the previous model variable before starting a fresh run

In [None]:
#Voila, now lets build Neural network model
#from keras.layers import LeakyReLU
model = Sequential([                                                      #Sequential specifies to keras that we are creating model sequentially and the output of each layer we add is input to the next layer we specify.
                    Dense(16, activation = 'sigmoid', input_shape=(4, )), #you only need to specify no. of attibutes aka input_shape for first HIDDEN layer
                    Dense(48, activation = 'relu'),                       #second HIDDEN layer
                    Dense(16, activation = 'tanh') 
])
model.add(Dense(1,))                                 #another way to add a layer. This is the output layer. 


In [None]:
#To specify the loss function and the optimizer
from keras import optimizers
from keras.optimizers import adam_v2, adagrad_v2, adamax_v2
import random
#opt = tf.keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(loss='mse', optimizer = adam_v2.Adam(learning_rate=0.01), metrics=['mae'])

#Training the model
hist = model.fit(X_train, Y_train, validation_split = 0,validation_data = (X_val,Y_val), epochs= 80, batch_size=2)   

In [None]:
#To extract weights from the Neural network
print(model.get_weights())

In [None]:
#Visualize the training loss and the validation loss to see if the model is overfitting
plt.plot(hist.history['loss'], 'k-')
plt.plot(hist.history['val_loss'], 'b--')
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val', 'test'], loc='upper right')
plt.show()

#Visualize the training accuracy and the validation accuracy to see if the model is overfitting
plt.plot(hist.history['mae'], 'k-')
plt.plot(hist.history['val_mae'], 'b--')
plt.title('', fontsize = 16)
plt.ylabel(' mae')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper right')
plt.show()

In [None]:
# inverse transform
y_pred = model.predict(INPUTS)
y_pred_inv = sc.inverse_transform(y_pred)

y_predtrain = model.predict(X_train)
y_predtrain_inv = sc.inverse_transform(y_predtrain)

y_predval = model.predict(X_val)
y_predval_inv = sc.inverse_transform(y_predval)

y_predtest = model.predict(X_test)
y_predtest_inv = sc.inverse_transform(y_predtest)

In [None]:
print(mse(targets, y_pred_inv))
print(rmse(targets, y_pred_inv))

In [None]:
# Using sklearn
from sklearn.metrics import r2_score
r2_all = r2_score(targets, y_pred_inv)
r2_test = r2_score(y_test, y_predtest_inv)
r2_train = r2_score(y_train, y_predtrain_inv)
r2_val = r2_score(y_val, y_predval_inv)
print ('r2 Train',r2_train)
print ('r2 Test',r2_test)
print ('r2 validation',r2_val)
print ('r2 overall',r2_all)

In [None]:
# calculating R2 values of standardized variables. The results are slightly better
from sklearn.metrics import r2_score
r2_all1 = r2_score(TARGETS, y_pred)
r2_test1 = r2_score(Y_test, y_predtest)
r2_train1 = r2_score(Y_train, y_predtrain)
r2_val1 = r2_score(Y_val, y_predval)
print ('r2 Train',r2_train1)
print ('r2 Test',r2_test1)
print ('r2 validation',r2_val1)
print ('r2 overall',r2_all1)

In [None]:
# plotting overall fit
z=[]
a=[]
e=[]

for i in range(0,len(y_pred)):
  z.append(inputs[i][1])
  a.append(targets[i][0])
  e.append(y_pred_inv[i][0])
r=np.array(z)
n=np.array(a)
t = np.array(e)

plt.scatter(a,e)
plt.xlabel('Actual values')
plt.ylabel('Predicted values')

plt.plot(np.unique(a), np.poly1d(np.polyfit(a, e, 1))(np.unique(a)))

plt.text(0.6, 0.5, 'R-squared = %0.2f' % r2_all)

In [None]:
# plotting training fit
z=[]
a=[]
e=[]

for i in range(0,len(Y_train)):
  z.append(x_train[i][1])
  a.append(y_train[i][0])
  e.append(y_predtrain_inv[i][0])
r=np.array(z)
n=np.array(a)
t = np.array(e)

plt.scatter(a,e)
plt.xlabel('Actual values')
plt.ylabel('Predicted values')

plt.plot(np.unique(a), np.poly1d(np.polyfit(a, e, 1))(np.unique(a)))

plt.text(0.6, 0.5, 'R-squared = %0.2f' % r2_train)

In [None]:
# plotting testing fit
z=[]
a=[]
e=[]

for i in range(0,len(Y_test)):
  z.append(x_test[i][1])
  a.append(y_test[i][0])
  e.append(y_predtest_inv[i][0])
r=np.array(z)
n=np.array(a)
t = np.array(e)

plt.scatter(a,e)
plt.xlabel('Actual values')
plt.ylabel('Predicted values')

plt.plot(np.unique(a), np.poly1d(np.polyfit(a, e, 1))(np.unique(a)))
plt.text(0.6, 0.5, 'R-squared = %0.2f' % r2_test)


In [None]:
# plotting validation fit
z=[]
a=[]
e=[]

for i in range(0,len(Y_val)):
  z.append(x_val[i][1])
  a.append(y_val[i][0])
  e.append(y_predval_inv[i][0])
r=np.array(z)
n=np.array(a)
t = np.array(e)

plt.scatter(a,e)
plt.xlabel('Actual values')
plt.ylabel('Predicted values')

plt.plot(np.unique(a), np.poly1d(np.polyfit(a, e, 1))(np.unique(a)))
plt.text(0.6, 0.5, 'R-squared = %0.2f' % r2_val)
