In [None]:
#First step :- Load libraries
from keras.models import Sequential
from keras.layers import Dense      #neural net layers
import pandas as pd                  
from sklearn.model_selection import train_test_split   #to spit dataset into train,validation,test 
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

import tensorflow as tf

!pip install -q xlrd           #library to read from excel sheet
import io                      #input-output = io
from google.colab import files #important to locally save files
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth    #to use Google Cloud SDK
from pydrive.drive import GoogleDrive  
from google.colab import auth           #to authenticate gmail ID
from oauth2client.client import GoogleCredentials
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()   #to authenticate gmail ID in Google Cloud SDK
drive = GoogleDrive(gauth)

import random
from __future__ import print_function

import math

from IPython import display
from matplotlib import cm
from matplotlib import gridspec
import numpy as np
from sklearn import metrics

In [None]:
BIG_SCORE = 1.e6  # type: float

class Particle:
    def __init__(self, model, params):
        self.model = model
        self.params = params
        self.init_weights = model.get_weights()
        self.velocities = [None] * len(self.init_weights)
        self.length = len(self.init_weights)
        for i, layer in enumerate(self.init_weights):
            self.velocities[i] = np.random.rand(*layer.shape) / 5 - 0.10
            #self.velocities[i] = np.zeros(layer.shape)

        self.best_weights = []
        self.best_score = BIG_SCORE

    def get_score(self, x, y, update=True):
        local_score = self.model.evaluate(x, y, verbose=0)
        if local_score < self.best_score and update:
            self.best_score = local_score
            self.best_weights = self.model.get_weights()

        return local_score

    def _update_velocities(self, global_best_weights, depth):
        new_velocities = [None] * len(self.init_weights)
        weights = self.model.get_weights()
        local_rand, global_rand = random.random(), random.random()

        for i, layer in enumerate(weights):
            if i >= depth:
              new_velocities[i] = self.velocities[i]
              continue
            new_v = self.params['acc'] * self.velocities[i]
            new_v = new_v + self.params['local_acc'] * local_rand * (self.best_weights[i] - layer)
            new_v = new_v + self.params['global_acc'] * global_rand * (global_best_weights[i] - layer)
            new_velocities[i] = new_v

        self.velocities = new_velocities
        
        
    def _update_weights(self, depth):
      old_weights = self.model.get_weights()
      new_weights = [None] * len(old_weights)
      for i, layer in enumerate(old_weights):
        if i>= depth:
          new_weights[i] = layer
          continue
        new_w = layer + self.velocities[i]
        new_weights[i] = new_w
    

      self.model.set_weights(new_weights)

    def step(self, x, y, global_best_weights,depth=None):
        if depth is None:
            depth = self.length
        self._update_velocities(global_best_weights, depth)
        self._update_weights(depth)
        return self.get_score(x, y)

    def get_best_weights(self):
        return self.best_weights
        

In [None]:
class ProgressBar:
    def __init__(self, steps, updates=1):
        self.step = 100
        self.step_size = (steps // updates) + 1
        self.total_steps = steps
        self.updates = updates

        bar = self._make_bar(0)
        print(bar, end=' ')

    def update(self, i):
        if i % self.step_size > 0:
            return

        self.step = i // self.step_size
        bar = self._make_bar(i)

        print(bar, end=' ')

    def done(self):
        self.step = self.total_steps
        bar = self._make_bar(self.updates)
        print(bar)

    def _make_bar(self, x):
        bar = "["
        for x in range(self.updates):
            print("\r", end=' ')
            bar += "=" if x < self.step else " "
        bar += "]"
        return bar

# tqdm

In [None]:
import keras
class Optimizer:
    def __init__(self, model, loss,
                 n = 150,
                 acceleration=3,
                 local_rate=2.5,      #1.2
                 global_rate=3):  #1.6

        self.n_particles = n
        self.structure = model #.to_json()
        self.particles = [None] * n
        self.loss = loss
        self.length = len(model.get_weights())
        self.history = []
        

        params = {'acc': acceleration, 'local_acc': local_rate, 'global_acc': global_rate}

        for i in range(n-1):
            m = model #_from_json(self.structure)
                 #keras.models.       
            m.compile(loss=loss,optimizer='Adam')
            self.particles[i] = Particle(m, params)

        self.particles[n-1] = Particle(model, params)

        self.global_best_weights = None
        self.global_best_score = BIG_SCORE

    def fit(self, x, y, steps=100, batch_size=4):
        num_batches = x.shape[0] // batch_size

        for i, p in enumerate(self.particles):
            local_score = p.get_score(x, y)

            if local_score < self.global_best_score:
                self.global_best_score = local_score
                self.global_best_weights = p.get_best_weights()

        print("PSO -- Initial best score {:0.4f}".format(self.global_best_score))

        bar = ProgressBar(steps, updates=20)

        for i in range(0,steps):
            print('i {}'.format(str(i)) )
            for j in range(0,num_batches):
                
                x_ = x[j*batch_size:(j+1)*batch_size,:]
                y_ = y[j*batch_size:(j+1)*batch_size]
                
                
                
                for i in range(0,len(p.velocities)):    #NEW MODIFICATION
                  MIN1 = p.velocities[i] <= -1000;
                  MAX1 = p.velocities[i] >= 1000;

                if not any(MIN1) and not any(MAX1):     #Min_VAl & Max_VAL @- NEW MODIFICATION 

                  for p in self.particles:
                    local_score = p.step(x_, y_, self.global_best_weights)

                    if local_score < self.global_best_score:
                        self.global_best_score = local_score
                        self.global_best_weights = p.get_best_weights()
              
                self.history.append(self.global_best_score)
                
                
            bar.update(i)
        for i, p in enumerate(self.particles):
          local_score = p.get_score(x, y)

          if local_score < self.global_best_score:
            self.global_best_score = local_score
            self.global_best_weights = p.get_best_weights()

        print("PSO -- Mean square error {:0.4f}".format(self.global_best_score))
      
        bar.done()
  
    def get_best_model(self):
        best_model = model_s #_from_json(self.structure)
      #keras.models.
        best_model.set_weights(self.global_best_weights)
        best_model.compile(loss=self.loss,optimizer='Adam') #self.loss
        return best_model

In [None]:
#Store the data set
file_id = '1oQw9vdiTsmsvOayjpskpVj_ZvBLPCVII' # file key of the relevant google sheet

downloaded = drive.CreateFile({'id': file_id}) 
downloaded.GetContentFile('data.xlsx')   

df = pd.read_excel('data.xlsx')

dataset = df.values                       #Convert the dataframe into an array
inputs  = dataset[1:212,0:4]                #end - 1  
targets = dataset[1:212,4:5]               #end

#How to split this into train & test & val data?
X_train, x_test, Y_train, y_test = train_test_split(inputs, targets, test_size=0.2, random_state = 4) 
x_train, x_val, y_train, y_val = train_test_split(X_train, Y_train, test_size=0.25, random_state=1)

In [None]:
# way to standardize the values to common scale.
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(x_train)
X_test = sc.fit_transform(x_test)
Y_train = sc.fit_transform(y_train)
Y_test = sc.fit_transform(y_test)
INPUTS = sc.fit_transform(inputs)
TARGETS = sc.fit_transform(targets)
X_val = sc.fit_transform(x_val)
Y_val = sc.fit_transform(y_val)

In [None]:
N = 150     # number of particles
STEPS = 15  # number of steps
LOSS = 'mse'# Loss function
BATCH_SIZE = 2  # Size of batches to train on


In [None]:
#close the previous model variable
tf.keras.backend.clear_session()

In [None]:
from keras import optimizers
from keras.optimizers import adam_v2, adagrad_v2, adamax_v2
def build_model():
    """
    Builds test Keras model for predicting Iris classifications
    :param loss (str): Type of loss - must be one of Keras accepted keras losses
    :return: Keras dense model of predefined structure
    """
    #Voila, now letsd build Neural network model
    model = Sequential([                                              
                    Dense(16, activation = 'sigmoid', input_shape=(4, )),
                    Dense(48, activation = 'relu'),                   
                    Dense(16, activation = 'tanh') 
                  ])
    model.add(Dense(1,))  

    model.compile(loss='mse', optimizer = adam_v2.Adam(learning_rate=0.01) , metrics=['mae'])
    return model


In [None]:
def vanilla_backpropagation(X_train, Y_train, X_val, Y_val):
    """
    Runs N number of backpropagation model training simulations
    :param x_train: x values to train on
    :param y_train: target labels to train with
    :return: best model run as measured by LOSS
    """
    best_model = None
    best_score = 100.0
    N = 150
    for i in range(N):
        model_s = build_model()
        model_s.fit(X_train, Y_train, validation_data = (X_val,Y_val),
                    epochs=60,
                    batch_size=2,
                    verbose=0)
        train_score = model_s.evaluate(X_train, Y_train, batch_size=BATCH_SIZE, verbose=0)
        #print(model_s.metrics_names)
        for i in train_score:
          if i < best_score:
            best_model = model_s
            best_score = i
    print("Mean absolute Score = ", best_score)
    return best_model


In [None]:
#Main function part 1
LOSS = 'mse'
BATCH_SIZE = 2
model_s = vanilla_backpropagation(X_train, Y_train, X_val, Y_val)
# Instantiate optimizer with model, loss function, and hyperparameters

In [None]:
#Main function part 2
pso = Optimizer(model=model_s,
loss=LOSS,
n=N,                # Number of particles
acceleration=3,     # Contribution of recursive particle velocity (inertia)
local_rate=2.5,     # Contribution of locally best weights to new velocity
global_rate=3)      # Contribution of globally best weights to new velocity

# Train model on provided data
pso.fit(X_train, Y_train, steps=STEPS, batch_size=BATCH_SIZE)

In [None]:
#Main function part 3
# Get a copy of the model with the globally best weights
model_p = pso.get_best_model()

p_train_score = model_p.evaluate(X_train, Y_train, batch_size=BATCH_SIZE, verbose=0)
p_test_score = model_p.evaluate(X_test, Y_test, batch_size=BATCH_SIZE, verbose=0)
p_val_score = model_p.evaluate(X_val, Y_val, batch_size=BATCH_SIZE, verbose=0)
print("PSO -- train: {:.4f}  test: {:.4f}  val:  {:.4f}".format(p_train_score, p_test_score, p_val_score))


PSO -- train: 0.0033  test: 0.0451  val:  0.0693


In [None]:
#way to extract weights from the Neural network
#for lay in model_p.layers:
 #   print(lay.name)
  #  print(lay.get_weights())

In [None]:
# inverse transform
y_pred = model_p.predict(INPUTS)
y_pred_inv = sc.inverse_transform(y_pred)

y_predtrain = model_p.predict(X_train)
y_predtrain_inv = sc.inverse_transform(y_predtrain)

y_predtest = model_p.predict(X_test)
y_predtest_inv = sc.inverse_transform(y_predtest)

y_predval = model_p.predict(X_val)
y_predval_inv = sc.inverse_transform(y_predval)

In [None]:
# to find MSE of overall data
def mse(actual, pred): 
    actual, pred = np.array(actual), np.array(pred)
    return np.square(np.subtract(actual,pred)).mean()
print(mse(targets, y_pred_inv))

def rmse(actual, pred): 
    actual, pred = np.array(actual), np.array(pred)
    return np.sqrt(np.square(np.subtract(actual,pred)).mean())
print(rmse(targets, y_pred_inv))

1.4964567142233682
1.2232974757692294


In [None]:
# calculating R2 values 
from sklearn.metrics import r2_score
r2_all1 = r2_score(TARGETS, y_pred)
r2_test1 = r2_score(Y_test, y_predtest)
r2_train1 = r2_score(Y_train, y_predtrain)
r2_val1 = r2_score(Y_val, y_predval)
print ('r2 Train',r2_train1)
print ('r2 Test',r2_test1)
print ('r2 validation',r2_val1)
print ('r2 overall',r2_all1)

r2 Train 0.9967048433785978
r2 Test 0.9549252460475335
r2 validation 0.9306851556751203
r2 overall 0.9941424948082406


In [None]:
# plotting training fit
z=[]
a=[]
e=[]

for i in range(0,len(y_train)):
  z.append(x_train[i][1])
  a.append(y_train[i][0])
  e.append(y_predtrain_inv[i][0])
r=np.array(z)
n=np.array(a)
t = np.array(e)

plt.scatter(a,e)
plt.xlabel('Actual values')
plt.ylabel('Predicted values')

plt.plot(np.unique(a), np.poly1d(np.polyfit(a, e, 1))(np.unique(a)))

plt.text(0.6, 0.5, 'R-squared = %0.2f' % r2_train)


In [None]:
# plotting training fit
z=[]
a=[]
e=[]

for i in range(0,len(y_val)):
  z.append(x_val[i][1])
  a.append(y_val[i][0])
  e.append(y_predval_inv[i][0])
r=np.array(z)
n=np.array(a)
t = np.array(e)

plt.scatter(a,e)
plt.xlabel('Actual values')
plt.ylabel('Predicted values')

plt.plot(np.unique(a), np.poly1d(np.polyfit(a, e, 1))(np.unique(a)))

plt.text(0.6, 0.5, 'R-squared = %0.2f' % r2_train)

In [None]:
# plotting testing fit
z=[]
a=[]
e=[]

for i in range(0,len(y_test)):
  z.append(x_test[i][1])
  a.append(y_test[i][0])
  e.append(y_predtest_inv[i][0])
r=np.array(z)
n=np.array(a)
t = np.array(e)

plt.scatter(a,e)
plt.xlabel('Actual values')
plt.ylabel('Predicted values')

plt.plot(np.unique(a), np.poly1d(np.polyfit(a, e, 1))(np.unique(a)))
plt.text(0.6, 0.5, 'R-squared = %0.2f' % r2_train)