In [2]:
import math 
import numpy as np
import pandas as pd

def get_column(data,c):
    column = []
    for i in range(len(data)):
        column.append(data[i][c])
    return column

def set(y):
    for i in range(len(y)):
        if (y[i]>=0.5):
            y[i] = 1
        if (y[i]<0.5):
            y[i] = 0
    return y

def sigmoid(x):
    return 1 / (1 + math.exp(-x))

def nlp(x_tr, y_tr, iterations, alpha):
    m = x_tr.shape[0]
    a = np.random.normal(0, 1, size=(5))
    w1 = a[0]
    w2 = a[1]
    w3 = a[2]
    w4 = a[3]
    b = a[4]
    hyp = 0
    for iter in range(iterations):
      for i in range(m):
        hyp += (w1*x_tr[i,0]) + (w2*x_tr[i,1]) + (w3*x_tr[i,2]) + (w4*x_tr[i,3]) + b
        hyp = sigmoid(hyp)
        if (hyp >= 0.5):
          hyp = 1
        else:
          hyp = 0
        if (hyp != y_tr[i]):
          w1 += (alpha * y_tr[i] * x_tr[i,0])
          w2 += (alpha * y_tr[i] * x_tr[i,1])
          w3 += (alpha * y_tr[i] * x_tr[i,2])
          w4 += (alpha * y_tr[i] * x_tr[i,3])
          b += (alpha * y_tr[i])
    return w1, w2, w3, w4, b

#Normalization
def normalize(data):
    normalized_data = data
    for i in range(5):
        maxval = max(get_column(data,i))
        minval = min(get_column(data,i))
        for j in range(len(data)):
            normalized_data[j][i] = (data[j][i]-minval)/(maxval-minval)
    return normalized_data

#Data
data = pd.read_excel('data55.xlsx',header=None)
data = data.sample(frac=1).reset_index(drop=True)
#print(data)
#print(type(data))
data = data.to_numpy()
print(data.shape)
y = data[:,-1]
data = normalize(data)
x = data[:,:-1]

iterations = 1000
alpha = 0.01

#Split into testing and training sets
train_size = int(0.7 * len(x))
val_limit = int(0.8*len(x))
x_tr = x[:train_size]
x_val = x[train_size:val_limit]
x_ts = x[val_limit:]
y_tr = y[:train_size]
y_val = y[train_size:val_limit]
y_ts = y[val_limit:]

w1,w2,w3,w4, b = nlp(x_tr,y_tr, iterations, alpha)
print(w1,w2,w3,w4)

x = x_val
yp = [0 for i in range(len(x))]
for i in range(len(x)):
    yp[i] = (w1*x[i,0]) + (w2*x[i,1]) + (w3*x[i,2]) + (w4*x[i,3]) + b
    yp[i] = sigmoid(yp[i])
y_val = set(y_val)
yp = set(yp)
print(yp)
y_actual = pd.Series(y_val, name='Actual')
y_pred = pd.Series(yp, name='Predicted')
confmat = pd.crosstab(y_actual, y_pred)
print(confmat)
confmat = np.asarray(confmat)
tp1 = confmat[1][1]
tn1 = confmat[0][0]
fp1 = confmat[0][1]
fn1 = confmat[1][0]

validation_accuracy = (tp1+tn1)/(tp1+tn1+fp1+fn1)
print('Validation Accuracy : ' + str(validation_accuracy) + '\n')

x = x_ts
yp = [0 for i in range(len(x))]
for i in range(len(x)):
    yp[i] = (w1*x[i,0]) + (w2*x[i,1]) + (w3*x[i,2]) + (w4*x[i,3]) + b
    yp[i] = sigmoid(yp[i])
y_ts = set(y_ts)
yp = set(yp)
print(yp)
y_actual = pd.Series(y_ts, name='Actual')
y_pred = pd.Series(yp, name='Predicted')
confmat = pd.crosstab(y_actual, y_pred)
print(confmat)
confmat = np.asarray(confmat)
tp = confmat[1][1]
tn = confmat[0][0]
fp = confmat[0][1]
fn = confmat[1][0]

test_accuracy = (tp+tn)/(tp+tn+fp+fn)
sensitivity = tp/(tp+fn)
specificity = tn/(tn+fp)

print('\nTest Accuracy : ' + str(test_accuracy))
print('sensitivity : ' + str(sensitivity))
print('specificity : ' + str(specificity))


#Since there are only 100 instances(10 instances for val and 20 for test) 
#and data.sample sometimes doesn't shuffle the data properly,
#it is possible to get an error in confmat index, please run till the shuffling is acceptable

(100, 5)
1.1238075613816427 -1.9073457989392595 -0.6912392335192649 1.3406880806726353
[0, 1, 0, 0, 1, 0, 1, 0, 0, 0]
Predicted  0  1
Actual         
0.0        7  0
1.0        0  3
Validation Accuracy : 1.0

[0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0]
Predicted  0   1
Actual          
0.0        9   0
1.0        1  10

Test Accuracy : 0.95
sensitivity : 0.9090909090909091
specificity : 1.0
