In [59]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [60]:
# Data Collection

raw_data = pd.read_csv("CKD.csv")
raw_data

Unnamed: 0,age,bp,sg,al,su,rbc,pc,pcc,ba,bgr,...,pcv,wc,rc,htn,dm,cad,appet,pe,ane,classification
0,2.000000,76.459948,c,3.0,0.0,normal,abnormal,notpresent,notpresent,148.112676,...,38.868902,8408.191126,4.705597,no,no,no,yes,yes,no,yes
1,3.000000,76.459948,c,2.0,0.0,normal,normal,notpresent,notpresent,148.112676,...,34.000000,12300.000000,4.705597,no,no,no,yes,poor,no,yes
2,4.000000,76.459948,a,1.0,0.0,normal,normal,notpresent,notpresent,99.000000,...,34.000000,8408.191126,4.705597,no,no,no,yes,poor,no,yes
3,5.000000,76.459948,d,1.0,0.0,normal,normal,notpresent,notpresent,148.112676,...,38.868902,8408.191126,4.705597,no,no,no,yes,poor,yes,yes
4,5.000000,50.000000,c,0.0,0.0,normal,normal,notpresent,notpresent,148.112676,...,36.000000,12400.000000,4.705597,no,no,no,yes,poor,no,yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394,51.492308,70.000000,a,0.0,0.0,normal,normal,notpresent,notpresent,219.000000,...,37.000000,9800.000000,4.400000,no,no,no,yes,poor,no,yes
395,51.492308,70.000000,c,0.0,2.0,normal,normal,notpresent,notpresent,220.000000,...,27.000000,8408.191126,4.705597,yes,yes,no,yes,poor,yes,yes
396,51.492308,70.000000,c,3.0,0.0,normal,normal,notpresent,notpresent,110.000000,...,26.000000,9200.000000,3.400000,yes,yes,no,poor,poor,no,yes
397,51.492308,90.000000,a,0.0,0.0,normal,normal,notpresent,notpresent,207.000000,...,38.868902,8408.191126,4.705597,yes,yes,no,yes,poor,yes,yes


In [61]:
df = raw_data

In [62]:
df = pd.get_dummies(df, dtype=int, drop_first=True)# Input Output Split
df


Unnamed: 0,age,bp,al,su,bgr,bu,sc,sod,pot,hrmo,...,pc_normal,pcc_present,ba_present,htn_yes,dm_yes,cad_yes,appet_yes,pe_yes,ane_yes,classification_yes
0,2.000000,76.459948,3.0,0.0,148.112676,57.482105,3.077356,137.528754,4.627244,12.518156,...,0,0,0,0,0,0,1,1,0,1
1,3.000000,76.459948,2.0,0.0,148.112676,22.000000,0.700000,137.528754,4.627244,10.700000,...,1,0,0,0,0,0,1,0,0,1
2,4.000000,76.459948,1.0,0.0,99.000000,23.000000,0.600000,138.000000,4.400000,12.000000,...,1,0,0,0,0,0,1,0,0,1
3,5.000000,76.459948,1.0,0.0,148.112676,16.000000,0.700000,138.000000,3.200000,8.100000,...,1,0,0,0,0,0,1,0,1,1
4,5.000000,50.000000,0.0,0.0,148.112676,25.000000,0.600000,137.528754,4.627244,11.800000,...,1,0,0,0,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394,51.492308,70.000000,0.0,0.0,219.000000,36.000000,1.300000,139.000000,3.700000,12.500000,...,1,0,0,0,0,0,1,0,0,1
395,51.492308,70.000000,0.0,2.0,220.000000,68.000000,2.800000,137.528754,4.627244,8.700000,...,1,0,0,1,1,0,1,0,1,1
396,51.492308,70.000000,3.0,0.0,110.000000,115.000000,6.000000,134.000000,2.700000,9.100000,...,1,0,0,1,1,0,0,0,0,1
397,51.492308,90.000000,0.0,0.0,207.000000,80.000000,6.800000,142.000000,5.500000,8.500000,...,1,0,0,1,1,0,1,0,1,1


In [63]:
indep_x = df.drop('classification_yes',axis = 1)
dep_y = df['classification_yes']

In [64]:
# PCA Model Creation
def pca(x_train,x_test,n):
    pca = PCA(n_components = n)
    x_train_pca = pca.fit_transform(x_train)
    x_test_pca = pca.transform(x_test)
    explained_variance = pca.explained_variance_ratio_
    return x_train_pca, x_test_pca, explained_variance, pca

In [65]:
def split_scaler(indep_x,dep_y):
    x_train,x_test,y_train,y_test  = train_test_split(indep_x,dep_y, test_size=0.2, random_state=0)
    sc = StandardScaler()
    x_train = sc.fit_transform(x_train)
    x_test = sc.transform(x_test)
    return x_train,x_test,y_train,y_test,sc

def cm_prediction(classifier,x_test_pca,y_test):
    y_pred = classifier.predict(x_test_pca)
    
    cm = confusion_matrix(y_test,y_pred)
    Accuracy = accuracy_score(y_test,y_pred)
    Report = classification_report(y_test,y_pred)
    return classifier,Accuracy

def svm_nonlinear(x_train_pca,y_train,x_test_pca,y_test):
    classifier = SVC(kernel = 'rbf', random_state=0)
    classifier.fit(x_train_pca,y_train)
    return cm_prediction(classifier,x_test_pca,y_test)

In [66]:
def pca_classification(accsvmnl):
    pcadataframe = pd.DataFrame(index = ['PCA'], columns = ['SVM_nl'])
    pcadataframe['SVM_nl'] = accsvmnl
    return pcadataframe


In [67]:
x_train, x_test, y_train, y_test, sc = split_scaler(indep_x,dep_y)

In [68]:
x_train_pca, x_test_pca, explained_variance, pca = pca(x_train,x_test,2)

accsvmnl = []

In [69]:
x_train_pca

array([[ 1.47950784e-01, -1.04119496e-01],
       [-7.00871111e-01,  1.42439047e-01],
       [-3.64349034e+00,  3.09707247e-02],
       [-3.39701907e+00, -5.52073039e-02],
       [ 2.33571451e+00, -3.29000637e+00],
       [ 1.55184689e+00,  1.88639630e+00],
       [-2.26339867e+00,  1.93363072e-01],
       [ 2.83145000e-01,  4.12373114e-01],
       [-1.15526165e+00, -1.19218551e-01],
       [ 7.30094330e+00, -2.11855474e+00],
       [ 1.20334925e+00,  9.93297657e-01],
       [ 1.51113108e+00,  1.05871412e-01],
       [-2.29806655e+00,  6.93723872e-01],
       [-6.76216762e-01, -3.50031731e-01],
       [ 4.59980144e+00, -1.05026679e+00],
       [-9.13568410e-01,  8.72460424e-02],
       [ 8.62974675e-01,  5.66447295e-01],
       [ 4.39178132e-01,  6.30679245e-01],
       [-6.69549236e-01,  3.83858792e-01],
       [-8.76132482e-01, -2.40500109e-01],
       [-2.66814210e-01,  4.40545399e-01],
       [-1.35011099e+00,  3.98249283e-01],
       [-2.57396803e+00,  2.16275848e-01],
       [ 3.

In [70]:
x_test_pca

array([[-3.43515358,  0.07647829],
       [ 0.89216391, -0.85508939],
       [-2.58884046, -0.15352282],
       [ 2.04411909, -1.91092985],
       [ 0.67030972, -0.35523541],
       [ 0.96124506, -1.70527183],
       [ 5.90664484,  1.42289215],
       [ 0.34943433, -2.32580669],
       [-2.28570473, -0.35926769],
       [-2.82124887,  0.19740738],
       [-1.46860399, -2.00569775],
       [-0.10854403,  1.12171311],
       [-3.03778925, -0.0323031 ],
       [ 2.87590497, -2.53241368],
       [-2.92723678,  0.32160794],
       [-2.56816136,  0.12319635],
       [ 0.30183909, -0.82968924],
       [-2.63617433,  0.30687203],
       [ 2.71518556,  0.63057752],
       [ 4.11333966, -2.90912757],
       [-2.24528684,  0.3707568 ],
       [ 1.88294673, -0.64478053],
       [ 1.30054292, -0.19291415],
       [-2.42375126, -0.21903753],
       [ 6.22066392,  2.94734095],
       [-2.29169709, -0.13651568],
       [-3.47825337,  0.32850474],
       [-3.45704229,  0.4192994 ],
       [ 2.80932007,

In [35]:
explained_variance

array([0.25872433, 0.07258329])

In [36]:
pca

In [71]:
classifier,Accuracy = svm_nonlinear(x_train_pca,y_train,x_test_pca,y_test)
accsvmnl.append(Accuracy)


In [73]:
result = pca_classification(accsvmnl)

In [74]:
result

Unnamed: 0,SVM_nl
PCA,1.0


In [75]:
import pickle

### Save Model file

In [76]:
filename = 'Finalized_SVMnl_Model.sav'

In [77]:
pickle.dump(classifier,open(filename,'wb'))

### Save SC file

In [78]:
filenamesc = 'sc.pkl'

In [79]:
pickle.dump(sc,open(filenamesc,'wb'))

In [80]:
sc = pickle.load(open('sc.pkl','rb'))

### Save PCA file

In [81]:
filenamepca ='pca.pkl'

In [82]:
pickle.dump(pca,open(filenamepca,'wb'))

In [83]:
pca = pickle.load(open('pca.pkl','rb'))

In [84]:
df.columns

Index(['age', 'bp', 'al', 'su', 'bgr', 'bu', 'sc', 'sod', 'pot', 'hrmo', 'pcv',
       'wc', 'rc', 'sg_b', 'sg_c', 'sg_d', 'sg_e', 'rbc_normal', 'pc_normal',
       'pcc_present', 'ba_present', 'htn_yes', 'dm_yes', 'cad_yes',
       'appet_yes', 'pe_yes', 'ane_yes', 'classification_yes'],
      dtype='object')

In [85]:
user_input = ([[5.0,76.58,3.0,0.0,148.11,57.48,6.25,154.3,5.25,12.56,52.0,7700,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0]])

In [86]:
scaled_input = sc.transform(user_input)
scaled_input



array([[-2.89939466e+00, -3.59418284e-03,  1.70813011e+00,
        -3.65008855e-01, -1.53315608e-02, -5.08082167e-02,
         5.52477536e-01,  1.70735356e+00,  3.37552164e-01,
         4.29417851e-02,  1.61244852e+00, -2.40918855e-01,
        -5.61639544e+00,  2.05657682e+00, -5.15628758e-01,
         2.12132034e+00, -1.26188616e-01,  3.73210014e-01,
        -2.05657682e+00,  3.04800305e+00, -2.37258068e-01,
         1.31396184e+00, -7.40637353e-01,  3.28858857e+00,
        -1.97678838e+00,  1.93937981e+00, -4.36232172e-01]])

In [87]:
pca_input = pca.transform(scaled_input)
pca_input

array([[ 2.92060825, -0.87717892]])

In [88]:
loaded_model = pickle.load(open('Finalized_SVMnl_Model.sav','rb'))
Prediction = loaded_model.predict(pca_input)

In [89]:
Prediction

array([1])