# BITS F312 - Neural Network and Fuzzy Logic



## Assignment 1

In [None]:
# importing libraries required
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
# connecting gdrive to access the datasets
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# finding out current working directory
!pwd

/content


In [None]:
# changing directory to - 'drive/MyDrive/NNFL/Data_A1/'
%cd drive/MyDrive/NNFL/Data_A1/

/content/drive/MyDrive/NNFL/Data_A1


In [None]:
# defining plotting style
plt.style.use('ggplot')
plt.rcParams["figure.figsize"] = (14, 14)

# Q5
Repeat the Q4 using a 5-fold CV-based selection of training and test instances for each model. Evaluate the accuracy, sensitivity, and specificity values of LoR, LoR+L2-norm regularization, LoR+L1-norm regularization models using BGD, SGD, and MBGD algorithms. You must use the dataset data_q4_q5.xlsx for this question.

In [None]:
def fiveFoldCV(filename):
  # obtaining the data from the file
  df = pd.read_excel(filename)
  df = df.sample(frac=1).reset_index(drop=True)
  df.insert(0, 'ones', 1)

  # encoding the data
  for i in range(len(df)):
    if(df['diagnosis'][i] == 'B'):
      df['diagnosis'][i] = 0
    elif(df['diagnosis'][i] == 'M'):
      df['diagnosis'][i] = 1


  fold_length = int((df.shape[0])/5)

  # preparing the data for 5 fold CV
  cv_val = 5

  cv_lists = [[] for i in range(0, cv_val)]
  fold_np = [[] for i in range(0, cv_val)]
  
  for i in range(cv_val):
    cv_lists[i] = df[i*fold_length:(i+1)*fold_length]
    fold_np[i] = cv_lists[i].to_numpy()
  
  print('--------------------------------------------------------------------------------')  
  print('Logistic Regression using L1-norm regularization approach & BGD')
  print('--------------------------------------------------------------------------------')  
  for iteration in range(cv_val):
    test_data = None
    train_data = None
    train_list  = []
    for j in range(cv_val):
      if(i==j):
        test_data = fold_np[iteration]
      else:
        train_list.append(cv_lists[iteration])

    train_data = np.vstack(train_list)

    print('Fold: ', iteration)
    print('--------------------------------------------------------------------------------')
    # normalizing data
    XVec_train = normalize(train_data[:, :(train_data.shape[1]-1)])
    YVec_train = train_data[:,train_data.shape[1]-1]

    XVec_test = normalize(test_data[:, :(test_data.shape[1]-1)])
    YVec_test = test_data[:,test_data.shape[1]-1]

    fiveFoldBGD(XVec_train, YVec_train, XVec_test, YVec_test)    

  print('--------------------------------------------------------------------------------')  
  print('Logistic Regression using L1-norm regularization approach & MBGD')
  print('--------------------------------------------------------------------------------')  
  for iter in range(cv_val):
    test_data = None
    train_data = None
    train_list  = []
    for j in range(cv_val):
      if(i==j):
        test_data = fold_np[iter]
      else:
        train_list.append(cv_lists[iter])

    train_data = np.vstack(train_list)

    print('Fold: ', iter)
    print('--------------------------------------------------------------------------------')
    # normalizing data
    XVec_train = normalize(train_data[:, :(train_data.shape[1]-1)])
    YVec_train = train_data[:,train_data.shape[1]-1]

    XVec_test = normalize(test_data[:, :(test_data.shape[1]-1)])
    YVec_test = test_data[:,test_data.shape[1]-1]

    fiveFoldMBGD(XVec_train, YVec_train, XVec_test, YVec_test)    

  print('--------------------------------------------------------------------------------')  
  print('Logistic Regression using L1-norm regularization approach & SGD')
  print('--------------------------------------------------------------------------------')  
  for iter in range(cv_val):
    test_data = None
    train_data = None
    train_list  = []
    for j in range(cv_val):
      if(i==j):
        test_data = fold_np[iter]
      else:
        train_list.append(cv_lists[iter])

    train_data = np.vstack(train_list)

    print('Fold: ', iter)
    print('--------------------------------------------------------------------------------')
    # normalizing data
    XVec_train = normalize(train_data[:, :(train_data.shape[1]-1)])
    YVec_train = train_data[:,train_data.shape[1]-1]

    XVec_test = normalize(test_data[:, :(test_data.shape[1]-1)])
    YVec_test = test_data[:,test_data.shape[1]-1]

    fiveFoldSGD(XVec_train, YVec_train, XVec_test, YVec_test)    


In [None]:
def fiveFoldBGD(XVec_train, YVec_train, XVec_test, YVec_test, epoch = 200, alpha = 0.1, lambd = 0.0005):
  W = (np.random.randn((XVec_train.shape)[1])).T
  costs = [] # for recording the cost function

  for i in range(epoch):
    Z = XVec_train.dot(W)
    Y_prob = sigmoid(Z)

    cost = costFunctionLORL1(YVec_train, Y_prob, W, lambd)
    loss = np.mean(cost)
    costs.append(loss)

    W = W - alpha*np.mean(((Y_prob - YVec_train).reshape(YVec_train.shape[0],1))*XVec_train, axis=0) - alpha*lambd*np.sign(W) # using L1 norm

    #print('EPOCH : {}  &  LOSS : {}'.format(i, loss))
  
  Y_pred_train = predictLORL1(XVec_train, W)
  Y_pred_test = predictLORL1(normalize(XVec_test), W)

  print("Metrics measured for training data")
  metrics(YVec_train, Y_pred_train)
  print("Metrics measured for testing data")
  metrics(YVec_test, Y_pred_test)



In [None]:
def fiveFoldMBGD(XVec_train, YVec_train, XVec_test, YVec_test, epoch = 300, alpha = 0.075, lambd = 0.0005, batch_size = 50):
  W = (np.random.randn((XVec_train.shape)[1])).T
  costs = [] # for recording the cost function

  for i in range(epoch):
    Z = XVec_train.dot(W)
    Y_prob = sigmoid(Z)

    cost = costFunctionLORL1(YVec_train, Y_prob, W, lambd)
    loss = np.mean(cost)
    costs.append(loss)

    idx = np.random.randint(0,XVec_train.shape[0], size=batch_size)
    W = W - alpha*np.mean(((Y_prob[idx] - YVec_train[idx])).dot(XVec_train[idx]), axis=0) - alpha*lambd*np.sign(W)

    #print('EPOCH : {}  &  LOSS : {}'.format(i, loss))
  
  Y_pred_train = predictLORL1(XVec_train, W)
  Y_pred_test = predictLORL1(normalize(XVec_test), W)

  print("Metrics measured for training data")
  metrics(YVec_train, Y_pred_train)
  print("Metrics measured for testing data")
  metrics(YVec_test, Y_pred_test)



In [None]:
def fiveFoldSGD(XVec_train, YVec_train, XVec_test, YVec_test, epoch = 300, alpha = 0.075, lambd = 0.0005):
  W = (np.random.randn((XVec_train.shape)[1])).T
  costs = [] # for recording the cost function

  for i in range(epoch):
    Z = XVec_train.dot(W)
    Y_prob = sigmoid(Z)

    cost = costFunctionLORL1(YVec_train, Y_prob, W, lambd)
    loss = np.mean(cost)
    costs.append(loss)

    idx = np.random.randint(0,XVec_train.shape[0])
    W = W - alpha*np.mean(((Y_prob[idx] - YVec_train[idx]))*(XVec_train[idx]), axis=0) - alpha*lambd*np.sign(W)

    #print('EPOCH : {}  &  LOSS : {}'.format(i, loss))
  
  Y_pred_train = predictLORL1(XVec_train, W)
  Y_pred_test = predictLORL1(normalize(XVec_test), W)

  print("Metrics measured for training data")
  metrics(YVec_train, Y_pred_train)
  print("Metrics measured for testing data")
  metrics(YVec_test, Y_pred_test)



In [None]:
fiveFoldCV('data_q4_q5.xlsx')

--------------------------------------------------------------------------------
Logistic Regression using L1-norm regularization approach & BGD
--------------------------------------------------------------------------------
Fold:  0
--------------------------------------------------------------------------------
Metrics measured for training data
--------------------------------------------------------------------------------
False Positives :  12
False Negatives :  8
True Negatives  :  312
True Positives  :  120
--------------------------------------------------------------------------------
Sensitivity :  0.9375
Specificity :  0.9629629629629629
Accuracy ((TN+TP)/(TN+TP+FN+FP)) :  0.9557522123893806
--------------------------------------------------------------------------------
Metrics measured for testing data
--------------------------------------------------------------------------------
False Positives :  3
False Negatives :  2
True Negatives  :  78
True Positives  :  30
-----