In [1]:
import pandas as pd
import numpy as np
from math import log

In [2]:
data = pd.read_csv('datasets/Classification/xrayfull.csv')

In [3]:
data

Unnamed: 0,Image Index,fold,Cardiomegaly,Emphysema,Effusion,Hernia,Infiltration,Mass,Nodule,Atelectasis,Pneumothorax,Pleural_Thickening,Pneumonia,Fibrosis,Edema,Consolidation
0,00000001_000.png,train,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,00000001_001.png,train,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,00000001_002.png,train,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,00000002_000.png,train,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,00000003_000.png,train,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112115,00030801_001.png,train,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
112116,00030802_000.png,val,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
112117,00030803_000.png,train,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
112118,00030804_000.png,train,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
data.drop(columns=['Image Index'], inplace=True)

In [5]:
data.rename(columns = {"fold": "Class"}, inplace=True)

In [6]:
clss = data["Class"].unique()
print(clss)

['train' 'test' 'val']


In [7]:
cols = {}
cols['Class'] = {val : index for index, val in enumerate(data['Class'].unique())}
data['Class'] = [cols['Class'][val] for val in data['Class']]
cols

{'Class': {'train': 0, 'test': 1, 'val': 2}}

In [8]:
trainData = data.sample(frac=0.8, random_state=0)

In [9]:
testData = data.drop(trainData.index)

In [10]:
y = trainData['Class']
x = trainData.drop(columns = ['Class'])

In [11]:
def backpropagation(x,y,learning_rate,n1,n2,n3,iterations=1000):
    w1 = np.ones(n1*n2)
    w1 = w1.reshape(n1,n2)
    w2 = np.ones(n2*n3)
    w2 = w2.reshape(n2,n3)
    for i in range(len(w1)):
        for j in range(len(w1[0])):
            w1[i][j] = 0.5
    x = x.to_numpy()
    y = y.to_numpy()
    n = len(x)
    for i in range(iterations):
        for d in range(n):
            #Forward Pass
            hnet = np.matmul(x[d],w1)
            hout = 1/(1 + np.exp((-1)*hnet)).astype(np.longdouble)
            onet = np.matmul(hout,w2)
            oout = 1/(1 + np.exp((-1)*onet)).astype(np.longdouble)
            #Backward pass
            delta_o = np.multiply(oout,np.multiply(np.subtract(1,oout),np.subtract(y[d],oout)))
            inter = np.matmul(w2,delta_o)
            delta_h = np.multiply(hout,np.multiply(np.subtract(1,hout),inter))
            #Update weights
            for i in range(len(w1)):
                for j in range(len(w1[0])):
                    delta = learning_rate*delta_h[j]*x[d][i]
                    w1[i][j] += delta
            for i in range(len(w2)):
                for j in range(len(w2[0])):
                    delta = learning_rate*delta_o[j]*hout[j]
                    w2[i][j] += delta
    return w1, w2

In [12]:
w1,w2 = backpropagation(x,y,0.1,14,3,1)

In [13]:
def confusion():
    classes = {}
    clss = trainData["Class"].unique()
    i = 0
    for clas in clss:
        classes[clas] = i
        i += 1
    n = len(classes)
    confmat = np.zeros(n*n)
    confmat = confmat.reshape(n,n)
    for _, d in testData.iterrows():
        y = d['Class']
        x = d.drop('Class')
        hnet = np.matmul(x,w1)
        hout = 1/(1 + np.exp((-1)*hnet))
        onet = np.matmul(hnet,w2)
        oout = 1/(1 + np.exp((-1)*onet))
        if oout > 0.7:
            predy = 2
        elif oout < 0.4:
            predy = 0
        else:
            predy = 1
        confmat[classes[predy]][classes[y]] += 1
    return confmat 

In [14]:
def accuracy(mat):
    n = len(mat)
    num = 0
    den = 0
    for i in range(n):
        num += mat[i][i]
        den += sum(mat[i])
    return num/den

In [15]:
def precision(mat, key):
    n = len(mat)
    num = mat[key][key]
    den = 0
    for i in range(n):
        den += mat[i][key]
    return num/den

In [16]:
def recall(mat, key):
    n = len(mat)
    num = mat[key][key]
    den = 0
    for i in range(n):
        den += mat[key][i]
    return num/den

In [17]:
def F1Score(p,r):
    return (2*(p*r))/(p + r)

In [18]:
mat = confusion() 
acc = accuracy(mat)
pre = precision(mat, 0)
rec = recall(mat, 0)
fscore = F1Score(pre, rec)
print(acc, pre, rec, fscore)

0.2009900107028184 0.002291242362525458 0.782608695652174 0.004569107754791218


In [19]:
print(mat)

[[3.6000e+01 4.0000e+00 6.0000e+00]
 [1.5637e+04 4.4610e+03 2.2210e+03]
 [3.9000e+01 1.0000e+01 1.0000e+01]]
