In [None]:
#Necessary imports
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
import random
warnings.filterwarnings('ignore')
import time
import math
import operator
from sklearn.metrics import accuracy_score,precision_score
from sklearn.metrics import confusion_matrix
from sklearn.datasets import make_blobs,make_classification
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import plot_confusion_matrix
from sklearn.decomposition import PCA

In [None]:
def train_test_split(df , train_split):
    df = df.sample(frac=1).reset_index(drop=True) #randomizing dataset
    n = math.ceil((1-train_split)*len(df)) #calculating number of rows for training set
    df_train = df.head(n) #training dataset
    df_test = df.tail(len(df) - n) #test dataset
    return df_train,df_test #returning splitted dataset

## HW-1 Perceptron

## References:

### Dataset:  Synthetic data using sklearn make_classification                                                                                        (Source:  https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html )


### Unified Learning Algorithm : Lecture - 2 , Instructor: Basabdatta Sen Bhattacharya , Course:  Neural Networks and Fuzzy Logic (BITS F312) ,  BITS PILANI, GOA CAMPUS

### Stochastic Gradient Algorithm : Aishwarya  V Srinivasan , Stochastic Gradient Descent - Clearly Explained (Source:  https://towardsdatascience.com/stochastic-gradient-descent-clearly-explained-53d239905d31 )

In [None]:
# Generating Clean data
X,y = make_classification(n_samples=10000, n_features=10, n_informative=2,n_redundant=0, n_repeated=0, n_classes=2, n_clusters_per_class=1,class_sep=2,flip_y=0,weights=[0.5,0.5], random_state=17)
df = pd.DataFrame(data = X)
df['Output'] = y
df.to_csv('dataset.csv' , index = False)

In [None]:
if X.shape[1]>2:
    pca = PCA(n_components=2)
    plotX=pca.fit_transform(X)
else:
    plotX=X
f,ax1 = plt.subplots(nrows=1, ncols=1,figsize=(10,8))
sns.scatterplot(plotX[:,0],plotX[:,1],hue=y,ax=ax1);
ax1.set_title("First two Principal Componenets of the Data-set",fontsize=20)
ax1.set_xlabel("First Dimension of the data", fontsize = 18)
ax1.set_ylabel("Second Dimension of the data", fontsize=16)

In [None]:
def activation_function(z):
    if(z>=0):
        return 1
    else:
        return 0
    
def activation_function_sgd(z):
    return 1/(1+math.exp(-z))

def get_y(z):
    if(z>=0.5):
        return 1.0
    else:
        return 0.0
    
def rmse(predictions, targets): 
    return np.sqrt(((predictions - targets) ** 2).mean())



In [None]:
class Perceptron:
    def __init__(self,dimension):
        self.n = dimension
        self.W = np.random.rand(self.n)#assigning random weights initially
        self.bias = random.random() #assigning random bias value
    def predict(self, X_test ):
        predict_outputs = []
        for i in range(len(X_test)):
            z = np.dot(self.W,X_test.iloc[i].values) # we calculate the sum corresponding the weights
            z = z + self.bias #adding bias since not considered in above step
        
            predict = activation_function(z) #this is our predicted output
            predict_outputs.append(get_y(predict))
        return predict_outputs

    def unified_training(self,X_train , y_train , epochs = 100):        
        start = time.time()        
        error_list = []
        acc_list = []
        X_train_copy = X_train.copy(deep=True)
        y_train_copy = y_train.copy(deep=True)
        for j in range(epochs):
            #we need to train our model now using these weights
            for i in range(len(X_train_copy)):
                X = X_train_copy.iloc[i].values #input is in X
                z = np.dot(self.W,X) # we calculate the sum corresponding the weights
                z = z + self.bias #adding bias since not considered in above step

                predict_ = activation_function(z) #this is our predicted output
                target = y_train_copy[i] # correct output corresponding to the input in the current loop
                
                error = target-predict_ #calculating error for the given input
                self.W = self.W + error * X  #changing the weights as per the error
                self.bias = self.bias + error                  #changing bias as per the above weights
                
                    
            predict_outputs = self.predict(X_train)
            cal1 = rmse(predict_outputs , y_train)
            cal2 = accuracy_score(predict_outputs , y_train)
            print("Epoch : " , j , "Error : " , cal1 , "Accuracy : " , cal2)
            error_list.append(cal1)
            acc_list.append(cal2)
            
        end = time.time()
        time_taken = end-start
        return error_list,acc_list,time_taken
            
    def stochastic_gradient(self,X_train , y_train , learning_rate = 0.1 , epochs = 100):    
        start = time.time()
        error_list = []
        acc_list = []        
        X_train_copy = X_train.copy(deep=True)
        y_train_copy = y_train.copy(deep=True)
        #training the neural network using stochastic gradient
        for j in range(epochs):
            X_train_copy["Output"] = y_train_copy
            X_train_copy = X_train_copy.sample(frac=1).reset_index(drop=True)
            y_train_copy = X_train_copy["Output"]
            X_train_copy.drop("Output",axis=1,inplace=True)
            for i in range(len(X_train_copy)) : 
                #input_index = random.randint(0,no_of_inputs-1) #taking random input
                
                X = X_train_copy.iloc[i].values #input is in X
                z = np.dot(self.W,X) #took the summation over all features
                z = z + self.bias #adding bias since not considered above
                target = y_train_copy[i]
                
                sigma = activation_function_sgd(z) #applying activation function on summation
                y = get_y(sigma) #getting the predicted value from the current weights
                #update weights as per the error
                self.W = self.W - ((learning_rate)*(target - y)*sigma*(sigma-1)*X)
                self.bias = self.bias - ((learning_rate)*(target-y)*sigma*(sigma-1))

            predict_outputs = self.predict(X_train)
            cal1 = rmse(predict_outputs , y_train)
            cal2 = accuracy_score(predict_outputs , y_train)
            print("Epoch : " , j , "Error :" , cal1 , "Accuracy : " , cal2)
            error_list.append(cal1)
            acc_list.append(cal2)        

        end = time.time()
        time_taken = end-start
        return error_list,acc_list,time_taken    

In [None]:
#preprocess data
df = pd.read_csv("dataset.csv") #reading the csv file
df = df.sample(frac=1).reset_index(drop=True) #randomizing dataset

In [None]:
#calculate features that we require for training
features = df.columns.values.tolist() # all features or inputs that we have
features.remove("Output") #we have to predict TARGET_5Yrs so removing it

In [None]:
### normalizing the dataset given using Standard Scaler ###

scaler=StandardScaler()
scaled_data=scaler.fit(df[features]).transform(df[features])
df[features]=pd.DataFrame(scaled_data,columns=features)
df.tail()

In [None]:
new_split_ratio = 0.4 #to check the effect of split ratio we will use this later on
new_learning_rate = 0.2 #to check the effect of learning rate we will use this later on
epochs = 10 #number of epochs to run for all training algorithms

In [None]:
df_train,df_test = train_test_split(df , 0.2)
df_train_split,df_test_split = train_test_split(df , new_split_ratio)
df_train_split = df_train.reset_index(drop=True)
df_test_split = df_test.reset_index(drop=True)
df_train = df_train.reset_index(drop=True)
df_test = df_test.reset_index(drop=True)

In [None]:
y_train = df_train["Output"] #this is our training prediction parameter
y_test = df_test["Output"] #keeping so that we can check accuracy of our model
X_train = df_train[features] #this becomes our training dataset
X_test = df_test[features] #this becomes our test dataset

y_train_split = df_train_split["Output"] #this is our training prediction parameter
y_test_split = df_test_split["Output"] #keeping so that we can check accuracy of our model
X_train_split = df_train_split[features] #this becomes our training dataset
X_test_split = df_test_split[features] #this becomes our test dataset

In [None]:
### unified algorithm training ###
perceptron1 = Perceptron(X_train.shape[1])
error_list_unified,acc_list_unified,time_taken_unified = perceptron1.unified_training(X_train , y_train , epochs = epochs)

In [None]:
### training using stochastic gradient descent algorithm with learning rate = 0.2 ###
perceptron2 = Perceptron(X_train.shape[1])
error_list_sgd,acc_list_sgd,time_taken_sgd = perceptron2.stochastic_gradient(X_train , y_train , learning_rate = 0.005 , epochs = epochs)

In [None]:
#print error curve for both type of training
x = list(range(0,epochs))
plt.plot(x,error_list_unified,label='Unified training error')
plt.plot(x,error_list_sgd,label='Stochastic gradient error')
plt.xlabel('Epochs')
plt.ylabel('Respective errors')
plt.title('Training comparison in terms of error')
plt.legend()
plt.show()

In [None]:
#print accuracy curve for both type of training
plt.plot(x,acc_list_unified,label='Unified training accuracy')
plt.plot(x,acc_list_sgd,label='Stochastic gradient accuracy')
plt.xlabel('Epochs')
plt.ylabel('Respective accuracy')
plt.title('Training comparison in terms of accuracy on training dataset')
plt.legend()
plt.show()

In [None]:
#comparing algorithm based on their time stamps with equal parameters
print("Time taken by following algorithms in seconds : ")
print("Unified_training    : " , time_taken_unified)
print("Stochastic gradient : " , time_taken_sgd)

In [None]:
#seeing how sgd get affected by changing learning rate
perceptron3  = Perceptron(X_train.shape[1])
error_list_sgd_new,acc_list_sgd_new,time_taken_sgd_new = perceptron3.stochastic_gradient(X_train , y_train , 
                                                        learning_rate = new_learning_rate , epochs = epochs)



In [None]:
x = list(range(0,epochs))
plt.plot(x,error_list_sgd,label='SGD training error, lr = 0.005')
plt.plot(x,error_list_sgd_new,label='SGD training error , lr = '+ str(new_learning_rate))
plt.xlabel('Epochs')
plt.ylabel('Respective errors')
plt.title('Training comparison in terms of error')
plt.legend()
plt.show()

In [None]:
#seeing how sgd_acc get affected by changing learning rate
plt.plot(x,acc_list_sgd,label='SGD training accuracy, lr = 0.005')
plt.plot(x,acc_list_sgd_new,label='SGD training accuracy , lr = '+ str(new_learning_rate))
plt.xlabel('Epochs')
plt.ylabel('Respective accuracy')
plt.title('Training comparison in terms of accuracy')
plt.legend()
plt.show()

In [None]:
# seeing the effect of test train split on varying split ratio
perceptron4 = Perceptron(X_train.shape[1])
error_list_sgd_split,acc_list_sgd_split,time_taken_sgd_split = perceptron4.stochastic_gradient(X_train_split , y_train_split , learning_rate = 0.005 , epochs =epochs )

In [None]:
#seeing how sgd_error get affected by changing split ratio
x = list(range(0,epochs))
plt.plot(x,error_list_sgd,label='SGD training error, split = 0.2')
plt.plot(x,error_list_sgd_split,label='SGD training error , split = '+ str(new_split_ratio))
plt.xlabel('Epochs')
plt.ylabel('Respective errors')
plt.title('Training comparison in terms of error')
plt.legend()
plt.show()

In [None]:
#seeing how sgd_acc get affected by changing split ratio
plt.plot(x,acc_list_sgd,label='SGD training accuracy, split = 0.2')
plt.plot(x,acc_list_sgd_split,label='SGD training accuracy , split = '+ str(new_split_ratio))
plt.xlabel('Epochs')
plt.ylabel('Respective accuracy')
plt.title('Training comparison in terms of accuracy')
plt.legend()
plt.show()

In [None]:
#predicting values on X_test for each training algorithm
predict_xtest_unified = perceptron1.predict(X_test )
predict_xtest_sgd = perceptron2.predict(X_test )
predict_xtest_sgd_new = perceptron3.predict(X_test)
predict_xtest_sgd_split = perceptron4.predict(X_test_split)

In [None]:
#print confusion matrix for both type of training


cf_matrix_unified = confusion_matrix(y_test , predict_xtest_unified)
cf_matrix_sgd = confusion_matrix(y_test , predict_xtest_sgd)
cf_matrix_sgd_new = confusion_matrix(y_test , predict_xtest_sgd_new)
cf_matrix_sgd_split = confusion_matrix(y_test_split, predict_xtest_sgd_split)


group_names = ['True Neg','False Pos','False Neg','True Pos']
group_counts = ["{0:0.0f}".format(value) for value in
                cf_matrix_unified.flatten()]
group_percentages = ["{0:.2%}".format(value) for value in
                     cf_matrix_unified.flatten()/np.sum(cf_matrix_unified)]
labels = [f"{v1}\n{v2}\n{v3}" for v1, v2, v3 in
          zip(group_names,group_counts,group_percentages)]
labels = np.asarray(labels).reshape(2,2)

sns.heatmap(cf_matrix_unified, annot=labels, fmt='', cmap='Blues')
plt.xlabel("Predicted values")
plt.ylabel("Actual values")
plt.title("Heatmap for unified learning")


In [None]:
group_names = ['True Neg','False Pos','False Neg','True Pos']
group_counts = ["{0:0.0f}".format(value) for value in
                cf_matrix_sgd.flatten()]
group_percentages = ["{0:.2%}".format(value) for value in
                     cf_matrix_sgd.flatten()/np.sum(cf_matrix_sgd)]
labels = [f"{v1}\n{v2}\n{v3}" for v1, v2, v3 in
          zip(group_names,group_counts,group_percentages)]
labels = np.asarray(labels).reshape(2,2)

sns.heatmap(cf_matrix_sgd, annot=labels, fmt='', cmap='Blues')
plt.xlabel("Predicted values")
plt.ylabel("Actual values")
plt.title("Heatmap for stochastic gradient learning with lr = 0.005")

In [None]:
group_names = ['True Neg','False Pos','False Neg','True Pos']
group_counts = ["{0:0.0f}".format(value) for value in
                cf_matrix_sgd_new.flatten()]
group_percentages = ["{0:.2%}".format(value) for value in
                     cf_matrix_sgd_new.flatten()/np.sum(cf_matrix_sgd_new)]
labels = [f"{v1}\n{v2}\n{v3}" for v1, v2, v3 in
          zip(group_names,group_counts,group_percentages)]
labels = np.asarray(labels).reshape(2,2)

sns.heatmap(cf_matrix_sgd_new, annot=labels, fmt='', cmap='Blues')
plt.xlabel("Predicted values")
plt.ylabel("Actual values")
plt.title("Heatmap for stochastic gradient learning with lr =" + str(new_learning_rate))

In [None]:
group_names = ['True Neg','False Pos','False Neg','True Pos']
group_counts = ["{0:0.0f}".format(value) for value in
                cf_matrix_sgd_split.flatten()]
group_percentages = ["{0:.2%}".format(value) for value in
                     cf_matrix_sgd_split.flatten()/np.sum(cf_matrix_sgd_split)]
labels = [f"{v1}\n{v2}\n{v3}" for v1, v2, v3 in
          zip(group_names,group_counts,group_percentages)]
labels = np.asarray(labels).reshape(2,2)

sns.heatmap(cf_matrix_sgd_split, annot=labels, fmt='', cmap='Blues')
plt.xlabel("Predicted values")
plt.ylabel("Actual values")
plt.title("Heatmap for stochastic gradient learning with split ratio =" + str(new_split_ratio))

In [None]:
precision_unified = precision_score(predict_xtest_unified , y_test)
precision_sgd = precision_score(predict_xtest_sgd , y_test)
precision_sgd_new = precision_score(predict_xtest_sgd_new , y_test)
precision_sgd_split = precision_score(predict_xtest_sgd_split , y_test_split)

accuracy_unified = accuracy_score(predict_xtest_unified , y_test)
accuracy_sgd = accuracy_score(predict_xtest_sgd , y_test)
accuracy_sgd_new = accuracy_score(predict_xtest_sgd_new , y_test)
accuracy_sgd_split = accuracy_score(predict_xtest_sgd_split , y_test_split)

print("TEST DATASET RESULTS : ")
print()

print("Following are precisions in different cases :")
print()

print("Precision in unified learning                   : " , precision_unified)
print("Precision in sgd learning with lr 0.005         : " , precision_sgd)
print("Precision in sgd learning with lr ",new_learning_rate,"         : " , precision_sgd_new)
print("Precision in sgd learning with split ratio ",new_split_ratio,": " , precision_sgd_split)
print()

print("Following are accuracies in different cases :")
print()
print("Accuracy in unified learning                   : " , accuracy_unified)
print("Accuracy in sgd learning with lr 0.005         : " , accuracy_sgd)
print("Accuracy in sgd learning with lr",new_learning_rate,"          : " , accuracy_sgd_new)
print("Accuracy in sgd learning with split ratio ",new_split_ratio,": " , accuracy_sgd_split)