<a href="https://colab.research.google.com/github/Param-Bhatt/NNFL-Assignment/blob/master/2/Q4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<h3> Ignore warnings

In [3]:
import warnings
warnings.filterwarnings("ignore")

<h3>Mounting drive

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


<h3> Navigating to respective directory

In [5]:
%cd "/content/drive/My Drive/NNFL-Assignments/2"
!ls

/content/drive/My Drive/NNFL-Assignments/2
assignment2.gdoc  class2_images    class_label.mat  data5.mat	  input.mat
assignment2.pdf   class3_images    data55.xlsx	    input_a2.mat  label.mat
class1_images	  class_label.csv  data5.csv	    input.gsheet  Q8.ipynb


<h3>Importing all libraries required

In [6]:
import numpy as np
from scipy.io import loadmat
import random
from sklearn.metrics import confusion_matrix as cm

<h3>Loading the data

In [7]:
f = loadmat('data5.mat')
D = f['x']
np.random.shuffle(D)

def init_data(data):
    X = np.array(data[:2148, :-1], dtype = float)
    y = np.array(data[:2148, -1], dtype = int)
    X = (X - X.mean(axis = 0))/X.std(axis = 0)
    return X, y

X_tot, y_tot = init_data(D)

train_X = X_tot[ : 1600]
train_y = y_tot[ : 1600]
test_X = X_tot[1600 : 2148]
test_y = y_tot[1600 : 2148]


<h3>Kernel functions

In [8]:
def gaussian(x,center,sigma,beta):
    #Gaussian Kernel
    return np.exp(-beta * (np.linalg.norm(x - center)) ** 2)

def multi_quadric(x, center, sigma, beta):
    #Multi Quadratic Kernel
    return ((np.linalg.norm(x - center)) ** 2 + sigma ** 2) ** 0.5

def linear(x, center, sigma, beta):
    #Linear Kernel
    return np.linalg.norm(x - center)

def fit_rbf(train_X, train_y, test_X, test_y):
    centers, labels = kmeans(train_X,K=550)

    sigma = np.zeros((len(centers), 1))
    beta = np.zeros((len(centers), 1))
    cluster_size = np.zeros((len(centers), 1))

    for i in range(len(train_X)):
        sigma[labels[i]] += np.linalg.norm(train_X[i] - centers[labels[i]])
        cluster_size[labels[i]] += 1

    sigma /= cluster_size
    beta = 1 / 2 * (sigma * sigma + 1e-6)

    H = np.zeros((len(train_X), len(centers)))

    for i in range(len(train_X)):
        for j in range(len(centers)):
            H[i, j] = linear(train_X[i], centers[j], sigma[j], beta[j])

    W = np.dot(np.linalg.pinv(H), train_y)

    #Test run
    H_test = np.zeros([len(test_X), len(centers)])
    for i in range(len(test_X)):
        for j in range(len(centers)):
            H_test[i, j] = linear(test_X[i], centers[j], sigma[j], beta[j])

    y_pred = np.dot(H_test, W)
    for i in range(len(y_pred)):
        y_pred[i] = 1 if y_pred[i]>=0.5 else 0
        
    accuracy = 0    
    for i in range(len(y_pred)):
        if y_pred[i] == test_y[i]:
            accuracy +=1
    accuracy /= len(y_pred)
    print(accuracy)
    return y_pred, accuracy

<h3>KMeans 

In [9]:
def kmeans(X,K=3,max_iter=5000):
    '''
    K= number of clusters
    '''
    m,n = np.shape(X)
    def create_dict(n):
        a = {}
        for i in range(n):
            a[f'{i}'] = []
        return a 
    def calc_distance(A,B):
       return np.linalg.norm(A-B,2)
    
    def return_cluster(C_ind,ind):
        '''
        Returns the cluster that the index belongs to
        '''
        for i in C_ind.keys():
            if ind in C_ind[i]:
                return int(i)


    #random centers
    index_random = np.random.randint(0,m,K)
    C = []
    #Initializing random cluster centers
    for i in range(K):
        C.append(X[index_random[i]])
    #New Centers
    NC = np.zeros((K,n))

    #converged state bool
    st = False
    iter = 0
    while True:
        if iter == max_iter or st:
            break
            
        C_ind = create_dict(K)
        for i in range(m):
            d = []
            for j in range(K):
                d.append(calc_distance(X[i],C[j]))
            index = np.argmin(d)
            C_ind[f'{index}'].append(i) 

        for i in range(K):
            c1_ind = C_ind[f'{i}']
            if len(c1_ind):
                for j in range(len(c1_ind)):
                    NC[i] += X[c1_ind[j]]
                NC[i] /= len(c1_ind)

        if (C==NC).all():
            st = True
        
        C = NC
        iter += 1
    
    # return np.array(C),(C_ind)
    labels = []
    for i in range(m):
        labels.append(return_cluster(C_ind,i))
    return C,labels

<h3>Accuracy and K-fold cross validation

In [11]:
%%time

y_pred, _ = fit_rbf(train_X, train_y, test_X, test_y)
for i in range(len(y_pred)):
    y_pred[i] = 1 if y_pred[i] > 0.5 else 0

confmat = cm(test_y, y_pred)

Accuracy = (confmat[0][0]+confmat[1][1])/len(y_pred)
Sensitivity = (confmat[1][1])/(confmat[1][0] + confmat[1][1])
Specificity = (confmat[0][0])/(confmat[0][0] + confmat[0][1])
print("Confusion Matrix:")
print(confmat)
print("\n")
print(f"Accuracy: {Accuracy}\nSensitivity: {Sensitivity}\nSpecificity: {Specificity}\n")

avg_acc = 0
# K - fold cross validation



for k in range(5):
    X = X_tot[0 : 1718]
    y = y_tot[0 : 1718]
    X_val = X_tot[1718 :]
    y_val = y_tot[1718 :]
    print(f'Fold {k+1}:')
    print("Accuracy: ", end="")
    _, acc = fit_rbf(X, y, X_val, y_val)
    print("===================")
    avg_acc += acc
    X_tot[0 : 430] = X_val
    X_tot[430 : ] = X
    y_tot[0 : 430] = y_val
    y_tot[430 : ] = y

avg_acc /= 5
print(f'\nAverage Accuracy: {avg_acc*100}%')

0.9635036496350365
Confusion Matrix:
[[251  13]
 [  7 277]]


Accuracy: 0.9635036496350365
Sensitivity: 0.9753521126760564
Specificity: 0.9507575757575758

Fold 1:
Accuracy: 0.9558139534883721
Fold 2:
Accuracy: 0.9418604651162791
Fold 3:
Accuracy: 0.9534883720930233
Fold 4:
Accuracy: 0.9697674418604652
Fold 5:
Accuracy: 0.958139534883721

Average Accuracy: 95.58139534883722%
CPU times: user 2min 3s, sys: 2.06 s, total: 2min 5s
Wall time: 2min 1s
