In [None]:
import math 
import numpy as np
import pandas as pd

def get_column(data,c):
    column = []
    for i in range(len(data)):
        column.append(data[i][c])
    return column

def set(a):
    for i in range(len(a)):
        if a[i] >= 0.5:
          a[i] = 1;
        else:
          a[i] = 0;
    return a

def sigmoid(x):
    return 1 / (1 + math.exp(-x))

def poly_kernel(X1, X2):
    k = np.matmul(X1 , np.transpose(X2))
    k = k + 1
    k = np.power(k,3)
    return k

def kernel_perceptron_pred(w, y, K, i):
    hyp = np.multiply(np.multiply(w, y), K[i,:])
    hyp = sum(hyp)
    if (hyp > 0):
          hyp = 1
    else:
          hyp = -1
    return hyp

def kernel_perceptron(x_tr, y_tr, iterations):
    m = x_tr.shape[0]
    n = x_tr.shape[1]
    alpha = np.zeros(m)
    w = np.zeros(n)
    K = poly_kernel(x_tr,x_tr)
    hyp = np.zeros(m)
    for iter in range(iterations):
      for i in range(m):
        hyp[i] = kernel_perceptron_pred(alpha, y_tr, K, i)
        if (hyp[i]!=y_tr[i]):
          alpha[i] = alpha[i] + 1
    print('alpha: ' + str(alpha) + '\n')
    for i in range(m):
      w += (alpha[i] * y_tr[i]) * x[i,:]
    print('weights: ' +str(w))
    return w

#Normalization
def normalize(data):
    normalized_data = data
    for i in range(5):
        sd = np.std(get_column(data,i))
        mean = np.mean(get_column(data,i))
        for j in range(len(data)):
            normalized_data[j][i] = (data[j][i]-mean)/sd
    return normalized_data

#Data
data = pd.read_excel('data55.xlsx',header=None)
data = data.sample(frac=1).reset_index(drop=True)
#print(data)
#print(type(data))
data = data.to_numpy()
y = data[:,-1]
data = normalize(data)
x = data[:,:-1]
x1 = np.ones(len(x))
x = np.column_stack((x1, x))

iterations = 100
alpha = 0.01

#Split into testing and training sets
train_size = int(0.7 * len(x))
val_limit = int(0.8*len(x))
x_tr = x[:train_size]
x_val = x[train_size:val_limit]
x_ts = x[val_limit:]
y_tr = y[:train_size]
y_val = y[train_size:val_limit]
y_ts = y[val_limit:]

yp = np.zeros(len(x_tr))
m = x_tr.shape[0]

w = kernel_perceptron(x_tr,y_tr, iterations)
K = poly_kernel(x_tr, x_tr)

x = x_val
yp = [0 for i in range(len(x))]
for i in range(len(x)):
    yp[i] = np.matmul(x[i], np.transpose(w))
    yp[i] = sigmoid(yp[i])
y_val = set(y_val)
yp = set(yp)
print('predicted: ' +str(yp) + '\n')
y_actual = pd.Series(y_val, name='Actual')
y_pred = pd.Series(yp, name='Predicted')
confmat = pd.crosstab(y_actual, y_pred)
print(confmat)
confmat = np.asarray(confmat)
tp1 = confmat[1][1]
tn1 = confmat[0][0]
fp1 = confmat[0][1]
fn1 = confmat[1][0]

validation_accuracy = (tp1+tn1)/(tp1+tn1+fp1+fn1)
print('Validation Accuracy : ' + str(validation_accuracy) + '\n')

x = x_ts
yp = [0 for i in range(len(x))]
for i in range(len(x)):
    yp[i] = np.matmul(x[i], np.transpose(w))
    yp[i] = sigmoid(yp[i])
y_ts = set(y_ts)
yp = set(yp)
print('predicted: ' +str(yp) + '\n')
y_actual = pd.Series(y_ts, name='Actual')
y_pred = pd.Series(yp, name='Predicted')
confmat = pd.crosstab(y_actual, y_pred)
print(confmat)
confmat = np.asarray(confmat)
tp = confmat[1][1]
tn = confmat[0][0]
fp = confmat[0][1]
fn = confmat[1][0]

test_accuracy = (tp+tn)/(tp+tn+fp+fn)
sensitivity = tp/(tp+fn)
specificity = tn/(tn+fp)

print('\nTest Accuracy : ' + str(test_accuracy))
print('sensitivity : ' + str(sensitivity))
print('specificity : ' + str(specificity))


#Since there are only 100 instances(10 instances for val and 20 for test) 
#and data.sample sometimes doesn't shuffle the data properly,
#it is possible to get an error in confmat index, please run till the shuffling is acceptable

alpha: [0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

weights: [ 0.          0.78310769 -1.68893681  1.52639599  1.5973046 ]
predicted: [0, 1, 0, 1, 0, 1, 0, 1, 0, 0]

Predicted  0  1
Actual         
0.0        6  0
1.0        0  4
Validation Accuracy : 1.0

predicted: [1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1]

Predicted   0   1
Actual           
0.0        10   0
1.0         0  10

Test Accuracy : 1.0
sensitivity : 1.0
specificity : 1.0
