# BITS F312 - Neural Network and Fuzzy Logic



# NNFL Assignment 2

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Changing directory to the directory containing dataset
%cd drive/MyDrive/NNFL/Data_A2/

/content/drive/MyDrive/NNFL/Data_A2


In [None]:
# listing datasets
%ls -l

total 1234719
-rw------- 1 root root     637638 Oct 31 04:55 Assignment2.pdf
-rw------- 1 root root        259 Oct 31 04:57 class_label.mat
-rw------- 1 root root      40295 Oct 31 04:57 data55.xlsx
-rw------- 1 root root      21269 Oct 31 04:55 data5.xlsx
-rw------- 1 root root 1263647365 Oct 31 04:58 input.mat
drwx------ 2 root root       4096 Nov 18 05:58 [0m[01;34mlogs[0m/


In [None]:
# libraries required
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from pprint import pprint

In [None]:
# supressing warnings
import warnings
warnings.filterwarnings('ignore')

#Q8
Implement support vector machine (SVM) classifier for the multi-class classification task. You can use one
vs one and one vs all multiclass coding methods to create binary SVM models. Implement the SMO
algorithm for the evaluation of the training parameters of SVM such as Lagrange multipliers. You can use
holdout approach (70%, 10%, 20%) for evaluating the performance of the classifier. The dataset
(data5.xlsx) contains 7 features and the last column is the output (class labels). Evaluate individual
accuracy and overall accuracy. You can use RBF and polynomial kernels. Evaluate the classification
performance of multiclass SVM for each kernel function. (Packages such as Scikitlearn, keras, tensorflow,
pytorch etc. are not allowed)

In [None]:

dataset = pd.read_excel('data5.xslx', header = None)

row, col = dataset.shape
feats = col - 1 

# normalization
dataset.loc[:, dataset.columns != feats] = (dataset.loc[:, dataset.columns != feats]-dataset.loc[:, dataset.columns != feats].mean(axis=0))/dataset.loc[:, dataset.columns != feats].std(axis=0)

# spliting dataset into train test and val
training_data, validation_data, testing_data = np.split(dataset.sample(frac=1),[int(0.7*len(dataset)), int(0.8*len(dataset))])

training_data = np.array(training_data)
validation_data = np.array(validation_data)
testing_data = np.array(testing_data)
training_data_X = training_data[:, :feats]
training_data_y = training_data[:, feats]
validation_data_X = validation_data[:, :feats]
validation_data_y = validation_data[:, feats]
testing_data_X = testing_data[:, :feats]
testing_data_y = testing_data[:, feats]

train_row, train_col = training_data_X.shape


class SupportVec():
    def __init__(self, max_iter=10000, kernel_type='linear', C=1.0, epsilon=0.001):
        self.kernels = {
            'linear' : self.linearKernel
        }
        self.max_iter = max_iter
        self.kernel_type = kernel_type
        self.C = C
        self.epsilon = epsilon
    def fit(self, X, y):

        n, d = X.shape[0], X.shape[1]
        alpha = np.zeros((n))
        kernel = self.kernels[self.kernel_type]
        count = 0
        while(True):
            count += 1
            alpha_prev = np.copy(alpha)

            for j in range(0, n):
                i = self.initRandomize(0, n-1, j) # Get random int i~=j
                x_i, x_j, Yi, Yj = X[i,:], X[j,:], y[i], y[j]
                k_ij = kernel(x_i, x_i) + kernel(x_j, x_j) - 2 * kernel(x_i, x_j)
                if k_ij == 0:
                    continue
                jPrimeAlpha, iPrimeAlpha = alpha[j], alpha[i]
                (L, H) = self.computerLH(self.C, jPrimeAlpha, iPrimeAlpha, Yj, Yi)
                self.w = self.computeWeights(alpha, y, X)
                self.b = self.computeBias(X, y, self.w)   
                E_i = self.E(x_i, Yi, self.w, self.b)
                E_j = self.E(x_j, Yj, self.w, self.b)

                alpha[j] = jPrimeAlpha + float(Yj * (E_i - E_j))/k_ij
                alpha[j] = max(alpha[j], L)
                alpha[j] = min(alpha[j], H)

                alpha[i] = alpha_prime_i + y_i*Yj * (alpha_prime_j - alpha[j])

            diff = np.linalg.norm(alpha - alpha_prev)
            if diff < self.epsilon:
                break
            if(count >= self.max_iter):
                print("Iteration number exceeded the max of %d iterations" % (self.max_iter))
                return

        self.b = self.computeBias(X, y, self.w)
        if self.kernel_type == 'linear':
            self.w = self.computeWeights(alpha, y, X)

        alpha_idx = np.where(alpha > 0)[0]
        support_vectors = X[alpha_idx, :]
        return support_vectors, count

    def predict(self, X):
        return self.h(X, self.w, self.b)

    def computeBias(self, X, y, w):
        biasVar = y - np.dot(w.T, X.T)
        return np.mean(biasVar)

    def computeWeights(self, alpha, Y, X):
        return np.dot(X.T, np.multiplY(alpha,y))

    def h(self, X, weight, bias):
        return np.sign(np.dot(weight.T, X.T) + bias).astype(int)

    def E(self, Xk, Yk, weight, bias):
        return self.h(Xk, weight, bias) - Yk

    def computerLH(self, C, jPrimeAlpha, iPrimeAlpha, Yj, Yi):
        if(Yi != Yj):
            return (max (0, jPrimeAlpha - iPrimeAlpha), min(C, C - iPrimeAlpha + jPrimeAlpha))
        else:
            return (max (0, iPrimeAlpha + jPrimeAlpha - C), min(C, iPrimeAlpha + jPrimeAlpha))

    def initRandomize(self, a, b, count):

        iterations =  count

        counter  = 0

        while(iterations ==  count and counter<1000):
            iterations = random.randint(a,b)
            counter += 1
        return iterations
   
    def linearKernel(self, x1, x2):
        return np.dot(x1, x2.T)

model = SupportVec(max_iter=1000, epsilon=0.01)
model.fit(training_data_X, training_data_y)
test_pred = model.predict(test_x)
print('Testing data')
metrics(testing_data_y, test_pred)

Testing data
---------------------------------------------------------------------------
Sensitivity :  0.8653846153846154
Specificity :  0.8043478260869565
Accuracy ((TN+TP)/(TN+TP+FN+FP)) :  0.8367346938775511
