## Kernel PCA - Best Model

In [53]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.decomposition import KernelPCA
from sklearn.svm import SVC
import pickle

In [14]:
# Data Collection

raw_data = pd.read_csv("CKD.csv")
raw_data

Unnamed: 0,age,bp,sg,al,su,rbc,pc,pcc,ba,bgr,...,pcv,wc,rc,htn,dm,cad,appet,pe,ane,classification
0,2.000000,76.459948,c,3.0,0.0,normal,abnormal,notpresent,notpresent,148.112676,...,38.868902,8408.191126,4.705597,no,no,no,yes,yes,no,yes
1,3.000000,76.459948,c,2.0,0.0,normal,normal,notpresent,notpresent,148.112676,...,34.000000,12300.000000,4.705597,no,no,no,yes,poor,no,yes
2,4.000000,76.459948,a,1.0,0.0,normal,normal,notpresent,notpresent,99.000000,...,34.000000,8408.191126,4.705597,no,no,no,yes,poor,no,yes
3,5.000000,76.459948,d,1.0,0.0,normal,normal,notpresent,notpresent,148.112676,...,38.868902,8408.191126,4.705597,no,no,no,yes,poor,yes,yes
4,5.000000,50.000000,c,0.0,0.0,normal,normal,notpresent,notpresent,148.112676,...,36.000000,12400.000000,4.705597,no,no,no,yes,poor,no,yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394,51.492308,70.000000,a,0.0,0.0,normal,normal,notpresent,notpresent,219.000000,...,37.000000,9800.000000,4.400000,no,no,no,yes,poor,no,yes
395,51.492308,70.000000,c,0.0,2.0,normal,normal,notpresent,notpresent,220.000000,...,27.000000,8408.191126,4.705597,yes,yes,no,yes,poor,yes,yes
396,51.492308,70.000000,c,3.0,0.0,normal,normal,notpresent,notpresent,110.000000,...,26.000000,9200.000000,3.400000,yes,yes,no,poor,poor,no,yes
397,51.492308,90.000000,a,0.0,0.0,normal,normal,notpresent,notpresent,207.000000,...,38.868902,8408.191126,4.705597,yes,yes,no,yes,poor,yes,yes


In [26]:
df = raw_data
df= pd.get_dummies(df, dtype=int, drop_first=True)
df


Unnamed: 0,age,bp,al,su,bgr,bu,sc,sod,pot,hrmo,...,pc_normal,pcc_present,ba_present,htn_yes,dm_yes,cad_yes,appet_yes,pe_yes,ane_yes,classification_yes
0,2.000000,76.459948,3.0,0.0,148.112676,57.482105,3.077356,137.528754,4.627244,12.518156,...,0,0,0,0,0,0,1,1,0,1
1,3.000000,76.459948,2.0,0.0,148.112676,22.000000,0.700000,137.528754,4.627244,10.700000,...,1,0,0,0,0,0,1,0,0,1
2,4.000000,76.459948,1.0,0.0,99.000000,23.000000,0.600000,138.000000,4.400000,12.000000,...,1,0,0,0,0,0,1,0,0,1
3,5.000000,76.459948,1.0,0.0,148.112676,16.000000,0.700000,138.000000,3.200000,8.100000,...,1,0,0,0,0,0,1,0,1,1
4,5.000000,50.000000,0.0,0.0,148.112676,25.000000,0.600000,137.528754,4.627244,11.800000,...,1,0,0,0,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394,51.492308,70.000000,0.0,0.0,219.000000,36.000000,1.300000,139.000000,3.700000,12.500000,...,1,0,0,0,0,0,1,0,0,1
395,51.492308,70.000000,0.0,2.0,220.000000,68.000000,2.800000,137.528754,4.627244,8.700000,...,1,0,0,1,1,0,1,0,1,1
396,51.492308,70.000000,3.0,0.0,110.000000,115.000000,6.000000,134.000000,2.700000,9.100000,...,1,0,0,1,1,0,0,0,0,1
397,51.492308,90.000000,0.0,0.0,207.000000,80.000000,6.800000,142.000000,5.500000,8.500000,...,1,0,0,1,1,0,1,0,1,1


In [38]:
# Input & Output Split
indep_x = df.drop('classification_yes',axis = 1)
dep_y = df['classification_yes']

In [39]:
# Train & Test split
x_train, x_test, y_train, y_test = train_test_split(indep_x, dep_y, test_size=0.2, random_state=0)

In [None]:
# Feature Scaling
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [None]:
# Apply Kernel PCA
kpca = KernelPCA(n_components = 2, kernel = 'rbf')
x_train_kpca = kpca.fit_transform(x_train)
x_test_kpca = kpca.transform(x_test)

In [41]:
# Model Creation with best one
classifier = SVC(kernel='linear', random_state=0)
classifier.fit(x_train_kpca, y_train)

In [42]:
# Make Predictions
y_pred = classifier.predict(x_test_kpca)

In [43]:
# Evaluate Performance
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

In [50]:
print("Best Accuracy for KernelPCA is SVC_nonLinear:", accuracy)

Best Accuracy for KernelPCA is SVC_nonLinear: 1.0


### Save the Model

In [68]:
filename = 'Finalized_SVMnl_Model.sav'
pickle.dump(classifier,open(filename,'wb'))

### Save the PrePro

In [69]:
filenameSC = 'sc.pkl'
pickle.dump(sc,open(filenameSC,'wb'))


### Save the kernelPCA

In [70]:
filenameKPCA = 'KPCA.pkl'
pickle.dump(kpca,open(filenameKPCA,'wb'))


### Load the files

In [71]:
sc = pickle.load(open('sc.pkl','rb'))
kpca = pickle.load(open('KPCA.pkl','rb'))

In [72]:
User_input = ([[5.0,76.58,3.0,0.0,148.11,57.48,6.25,154.3,5.25,12.56,52.0,7700,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0]])

In [73]:
scaled_input = sc.transform(User_input)
scaled_input  



array([[-2.89939466e+00, -3.59418284e-03,  1.70813011e+00,
        -3.65008855e-01, -1.53315608e-02, -5.08082167e-02,
         5.52477536e-01,  1.70735356e+00,  3.37552164e-01,
         4.29417851e-02,  1.61244852e+00, -2.40918855e-01,
        -5.61639544e+00,  2.05657682e+00, -5.15628758e-01,
         2.12132034e+00, -1.26188616e-01,  3.73210014e-01,
        -2.05657682e+00,  3.04800305e+00, -2.37258068e-01,
         1.31396184e+00, -7.40637353e-01,  3.28858857e+00,
        -1.97678838e+00,  1.93937981e+00, -4.36232172e-01]])

In [74]:
kpca_input = kpca.transform(scaled_input)
kpca_input

array([[-0.35589373,  0.2167403 ]])

In [75]:
loaded_model = pickle.load(open('Finalized_SVMnl_Model.sav','rb'))
Prediction = loaded_model.predict(kpca_input)

In [76]:
Prediction

array([1])