# Classification of Liver Patients

Liver Patients are found using ILPD dataset

Importing Libraries

In [76]:
from copy import deepcopy
from sklearn.pipeline import Pipeline
from sklearn import svm
from sklearn.feature_selection import SelectFromModel
import numpy as np
import csv
import pandas as pd
from matplotlib import pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [77]:
def read_file(filename):
    df = pd.read_csv(filename, header = None)
    df[1], label1 = pd.factorize(df[1])
    return df

df = read_file('ILPD.csv')
print(df.head(5))


   0   1     2    3    4   5    6    7    8     9   10
0  65   0   0.7  0.1  187  16   18  6.8  3.3  0.90   1
1  62   1  10.9  5.5  699  64  100  7.5  3.2  0.74   1
2  62   1   7.3  4.1  490  60   68  7.0  3.3  0.89   1
3  58   1   1.0  0.4  182  14   20  6.8  3.4  1.00   1
4  72   1   3.9  2.0  195  27   59  7.3  2.4  0.40   1


In [78]:
def stratified_sampling(df, class1_p, class2_p):
    df_yes = df[df[10] == 1]
    df_no = df[df[10] == 2]
    df_yes[11] = np.random.uniform(0, 1, len(df_yes)) <= float(class1_p / 10)
    df_no[11] = np.random.uniform(0, 1, len(df_no)) <= float(class2_p/ 10)
    train = pd.DataFrame()
    test = pd.DataFrame()
    train1 = pd.DataFrame()
    train2 = pd.DataFrame()
    test1 = pd.DataFrame()
    test2 = pd.DataFrame()
    train1, test1 = df_yes[df_yes[11] == True], df_yes[df_yes[11] == False]
    train2, test2 = df_no[df_no[11] == True], df_no[df_no[11] == False]
    train = train1.append(train2)
    test = test1.append(test2)
    return train, test

train, test = stratified_sampling(df, 6, 9)
print(len(train), len(test))
    

382 197


In [79]:
def calc_metrics(pred, test):
    tp = tn = fp = fn = 0
    total = 0
    for x in test[10]:
        if x == pred[total]:
            if x == 1:
                tp = tp + 1
            else:
                tn = tn + 1

        else:
            if x == 1 and pred[total] == 2:
                fn = fn + 1
            if x == 2 and pred[total] == 1:
                fp = fp + 1
        total = total + 1

    accuracy = (tp + tn) / (tp + tn + fp + fn)
    sensitivity = tp / (tp + fn)
    specificity = tn / (fp + tn)
    precision = tp / (tp + fp)
    
    return accuracy, sensitivity, specificity, precision

In [80]:
def get_pred(train, test, kernel):
    features = df.columns[0:10]
    y = train[10]
    C = 1.0
    clf = svm.SVC(kernel = kernel, gamma = 0.7, C=C)
    clf.fit(train[features], y)
    pred = clf.predict(test[features])
    
    return pred

In [84]:
pred_rbf = get_pred(train, test, 'rbf')
acc, sens, spec, prec = calc_metrics(pred_rbf, test)
print(acc, sens, spec, prec)

0.8934010152284264 1.0 0.045454545454545456 0.8928571428571429


In [83]:
def get_all_metrics(kernel):
    all_acc = list()
    all_sens = list()
    all_spec = list()
    all_prec = list()
    for i in range(4, 10):
        acc_i = list()
        sens_i = list()
        spec_i = list()
        prec_i = list()
        for j in range(4, 10):
            acc = list()
            sens = list()
            spec = list()
            prec = list()
            for k in range(2):
                train, test = stratified_sampling(df, i, j)
                pred = get_pred(train, test, kernel)
                a, s, sp, p = calc_metrics(pred, test)
                #print(acc, sens, spec, prec)
                acc.append(a)
                sens.append(s)
                spec.append(sp)
                prec.append(p)
            acc_i.append(np.mean(acc))
            sens_i.append(np.mean(sens))
            spec_i.append(np.mean(spec))
            prec_i.append(np.mean(prec))
        all_acc.append(acc_i)
        all_sens.append(sens_i)
        all_spec.append(spec_i)
        all_prec.append(prec_i)
    return all_acc, all_sens, all_spec, all_prec

a, s, sp, pr = get_all_metrics('rbf')
print(a)         

[[0.72003380577223841, 0.75690406976744184, 0.80373831775700932, 0.86095943165227795, 0.8929906660901854, 0.94164560396407204], [0.70221013654246711, 0.71474453451240483, 0.73833802854157571, 0.79441062093586634, 0.86467853134519801, 0.91988889334563395], [0.63730410047230568, 0.67685680829708939, 0.69670219853431048, 0.74328210555376617, 0.84289617486338797, 0.9019084779954345], [0.5193721719457014, 0.58980724910957472, 0.67089678510998307, 0.69041057169104647, 0.7693812668134018, 0.89999053388867856], [0.50522875816993462, 0.54406520631686195, 0.61086247086247081, 0.61302095596891926, 0.69476282842532178, 0.83436284603065658], [0.28654060066740827, 0.32697671598350714, 0.39298245614035088, 0.45665286812536965, 0.60247747747747749, 0.7142857142857143]]
