# BITS F312 - Neural Network and Fuzzy Logic



In [None]:
# importing libraries required
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [None]:
# connecting gdrive to access the datasets
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# finding out current working directory
!pwd

/content/drive/MyDrive/NNFL/Data_A1


In [None]:
# changing directory to - 'drive/MyDrive/NNFL/Data_A1/'
%cd drive/MyDrive/NNFL/Data_A1/

/content/drive/MyDrive/NNFL/Data_A1


In [None]:
# defining plotting style
plt.style.use('ggplot')
plt.rcParams["figure.figsize"] = (14, 14)

In [None]:
def metrics(Y_true, Y_pred):
    FP=0 # For counting the False Positives
    FN=0 # For counting the False Negatives
    TN=0 # For counting the True Negatives
    TP=0 # For counting the True Positives

    for i in range(len(Y_true)):
      if Y_true[i]==1:
        if Y_pred[i]==1:
          TP+=1
        else:
          FN+=1
      else:
        if Y_pred[i]==0:
          TN+=1
        else:
          FP+=1

    print('--------------------------------------------------------------------------------')
  
    print("False Positives : ", FP)
    print("False Negatives : ", FN)
    print("True Negatives  : ", TN)
    print("True Positives  : ", TP)

    print('--------------------------------------------------------------------------------')

    sens= TP/(TP+FN)
    spes = TN/(TN+FP)

    print("Sensitivity : ", sens)
    print("Specificity : ", spes)
    print("Accuracy ((TN+TP)/(TN+TP+FN+FP)) : ", ((TP+TN)/(TN+FN+TP+FP)))
    print('--------------------------------------------------------------------------------')



In [None]:
def normalize(data):
  normalized_vec = data

  if(len(data.shape) == 1):
    mean = np.mean(data[:])
    std_dev = np.std(data[:])
    normalized_vec[:] = (normalized_vec[:] - mean)/std_dev
  else:
    for i in range(1,data.shape[1]):
      mean = np.mean(data[:, i])
      std_dev = np.std(data[:, i])
      normalized_vec[:,i] = (normalized_vec[:,i] - mean)/std_dev
  
  return normalized_vec

#Q8
Use the likelihood ratio test (LRT) for the binary classification using the dataset
(“data_q4_q5.xlsx”). You must use a 5-fold CV-based selection of training and test
instances to evaluate the LRT classifier. Evaluate the accuracy, sensitivity, and
specificity values for the binary classifier.

In [None]:
def fiveCVLRT(filename):
  # obtaining the data from the file
  df = pd.read_excel(filename)
  df = df.sample(frac=1).reset_index(drop=True)
  #df.insert(0, 'ones', 1)

  # encoding the data
  for i in range(len(df)):
    if(df['diagnosis'][i] == 'B'):
      df['diagnosis'][i] = 0
    elif(df['diagnosis'][i] == 'M'):
      df['diagnosis'][i] = 1


  fold_length = int((df.shape[0])/5)

  # preparing the data for 5 fold CV
  cv_val = 5

  cv_lists = [[] for i in range(0, cv_val)]
  fold_np = [[] for i in range(0, cv_val)]
  
  for i in range(cv_val):
    cv_lists[i] = df[i*fold_length:(i+1)*fold_length]
    fold_np[i] = cv_lists[i].to_numpy()

  for iteration in range(cv_val):
    test_data = None
    train_data = None
    train_list  = []
    for j in range(cv_val):
      if(i==j):
        test_data = fold_np[iteration]
      else:
        train_list.append(cv_lists[iteration])

    train_data = np.vstack(train_list)

    print('Fold: ', iteration)
    print('--------------------------------------------------------------------------------')
    # normalizing data
    XVec_train = normalize(train_data[:, :(train_data.shape[1]-1)])
    YVec_train = train_data[:,train_data.shape[1]-1]

    XVec_test = normalize(test_data[:, :(test_data.shape[1]-1)])
    YVec_test = test_data[:,test_data.shape[1]-1]
    LRT(XVec_train, YVec_train, XVec_test, YVec_test)
    print('--------------------------------------------------------------------------------')


In [None]:
def LRT(XVec_train, YVec_train, XVec_test, YVec_test):
  YVec_train = list(YVec_train)
  Y_prob = [YVec_train.count(y)/len(YVec_train) for y in list(set(YVec_train))]

  mean_val = []
  covariance_val = []

  for i in range(2):
    temp_X = []

    for j in range(XVec_train.shape[0]):
      if (YVec_train[j] == i):
        temp_X.append(XVec_train[j])

    temp_X = np.array(temp_X)
    mean_val.append(np.mean(temp_X, axis=0))
    covariance_val.append(np.cov((temp_X.T).astype(float)))


  FP=0
  FN=0
  TP=0
  TN=0

  for j in range(XVec_test.shape[0]):
    LH_val = [ Y_prob[i]*np.exp(-0.5*(np.transpose(XVec_test[j] - mean_val[i]).dot(np.linalg.inv(covariance_val[i])).dot(XVec_test[j] - mean_val[i]))) for i in range(2) ]
    
    Y_pred = LH_val.index(max(LH_val))

    if (YVec_test[j] == 1):
      if (Y_pred == 1):
        TP+=1
      else:
        FN+=1
    else:
      if Y_pred==0:
        TN+=1
      else:
        FP+=1

  sens = TP/(TP+FN)
  specs = TN/(TN+FP)
  accuracy = (TP+TN)/(TP+TN+FP+FN)
  
  print("Metrics: ")
  print('--------------------------------------------------------------------------------')
  print("True Positives: ", TP)
  print("False Positives: ", FP)
  print("True Negatives: ", TN)
  print("False Negatives: ", FN)
  print("Sensitivity: ", sens)
  print("Specificity: ", specs)
  print("Accuracy: ", accuracy)
  print('--------------------------------------------------------------------------------')

  FP=0
  FN=0
  TP=0
  TN=0

In [None]:
fiveCVLRT('data_q4_q5.xlsx')

Fold:  0
--------------------------------------------------------------------------------
Metrics: 
--------------------------------------------------------------------------------
True Positives:  41
False Positives:  2
True Negatives:  70
False Negatives:  0
Sensitivity:  1.0
Specificity:  0.9722222222222222
Accuracy:  0.9823008849557522
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Fold:  1
--------------------------------------------------------------------------------
Metrics: 
--------------------------------------------------------------------------------
True Positives:  53
False Positives:  0
True Negatives:  60
False Negatives:  0
Sensitivity:  1.0
Specificity:  1.0
Accuracy:  1.0
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Fold:  2
-------------