In [61]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mlxtend.plotting import plot_decision_regions

In [62]:
dataset = pd.read_csv('diabetes_prediction_dataset.csv')
X = dataset.iloc[:, :-1].values
Y = dataset.iloc[:, -1].values

In [63]:
print(X)
print(Y)

[['Female' 80.0 0 ... 25.19 6.6 140]
 ['Female' 54.0 0 ... 27.32 6.6 80]
 ['Male' 28.0 0 ... 27.32 5.7 158]
 ...
 ['Male' 66.0 0 ... 27.83 5.7 155]
 ['Female' 24.0 0 ... 35.42 4.0 100]
 ['Female' 57.0 0 ... 22.43 6.6 90]]
[0 0 0 ... 0 0 0]


In [64]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:,0] = le.fit_transform(X[:, 0])
print(X)

[[0 80.0 0 ... 25.19 6.6 140]
 [0 54.0 0 ... 27.32 6.6 80]
 [1 28.0 0 ... 27.32 5.7 158]
 ...
 [1 66.0 0 ... 27.83 5.7 155]
 [0 24.0 0 ... 35.42 4.0 100]
 [0 57.0 0 ... 22.43 6.6 90]]


In [65]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(),[4])], remainder = 'passthrough')
X = np.array(ct.fit_transform(X))
print(X)

[[0.0 0.0 0.0 ... 25.19 6.6 140]
 [1.0 0.0 0.0 ... 27.32 6.6 80]
 [0.0 0.0 0.0 ... 27.32 5.7 158]
 ...
 [0.0 0.0 0.0 ... 27.83 5.7 155]
 [0.0 0.0 0.0 ... 35.42 4.0 100]
 [0.0 1.0 0.0 ... 22.43 6.6 90]]


In [66]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size = .50, random_state = 1)

In [67]:
print(X_train)
print(X_test)

[[0.0 1.0 0.0 ... 29.55 6.2 85]
 [1.0 0.0 0.0 ... 27.32 6.2 155]
 [1.0 0.0 0.0 ... 23.24 4.0 85]
 ...
 [0.0 0.0 0.0 ... 28.36 5.8 90]
 [0.0 0.0 0.0 ... 57.51 6.2 126]
 [0.0 0.0 0.0 ... 27.32 6.0 90]]
[[1.0 0.0 0.0 ... 26.62 5.0 155]
 [0.0 0.0 0.0 ... 27.32 4.5 130]
 [1.0 0.0 0.0 ... 27.32 6.2 130]
 ...
 [0.0 0.0 0.0 ... 29.7 5.7 126]
 [0.0 0.0 0.0 ... 27.43 4.0 100]
 [0.0 0.0 0.0 ... 23.96 6.1 126]]


In [68]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

<h1>Logistical Regression</h1>

In [69]:
from sklearn.linear_model import LogisticRegression
LGC = LogisticRegression(random_state = 0)
LGC.fit(X_train,Y_train)

In [70]:
LGCY_pred = LGC.predict(X_test)
print(np.concatenate((LGCY_pred.reshape(len(LGCY_pred),1),Y_test.reshape(len(Y_test),1)),1))

[[0 0]
 [0 0]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


In [71]:
from sklearn.metrics import confusion_matrix, accuracy_score
LGCM = confusion_matrix(Y_test, LGCY_pred)
print(LGCM)

[[45312   392]
 [ 1606  2690]]


In [73]:
accuracy_score(Y_test,LGCY_pred)

0.96004

In [74]:
print(LGC.predict(sc.transform([[1,0,0,28,0,0,0,0,0,0,27.32,5.7,158]])))

[0]


<h1>KNN</h1>

In [75]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=10, metric = 'minkowski', p = 2)
knn.fit(X_train, Y_train)

In [76]:
KNNY_pred = knn.predict(X_test)

In [77]:
from sklearn.metrics import confusion_matrix, accuracy_score
KNNCM = confusion_matrix(Y_test, KNNY_pred)
print(KNNCM)

[[45591   113]
 [ 1986  2310]]


In [79]:
accuracy_score(Y_test,KNNY_pred)

0.95802

In [80]:
print(knn.predict(sc.transform([[1,0,0,28,0,0,0,0,0,0,27.32,5.7,158]])))

[0]


<h1>SVM RBF</h1>

In [81]:
from sklearn.svm import SVC
SVCC = SVC(kernel = "rbf", random_state = 0)
SVCC.fit(X_train, Y_train)

In [82]:
SVCY_pred = SVCC.predict(X_test)
print(np.concatenate((SVCY_pred.reshape(len(SVCY_pred),1),Y_test.reshape(len(Y_test),1)),1))

[[0 0]
 [0 0]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


In [83]:
from sklearn.metrics import confusion_matrix, accuracy_score
SVCCM = confusion_matrix(Y_test, SVCY_pred)
print(SVCCM)

[[45608    96]
 [ 1828  2468]]


In [85]:
accuracy_score(Y_test,SVCY_pred)

0.96152

In [86]:
print(SVCC.predict(sc.transform([[1,0,0,28,0,0,0,0,0,0,27.32,5.7,158]])))

[1]


<h1>SVM with poly kernel</h1>

In [87]:
SVCpoly = SVC(kernel = "poly", random_state = 0)
SVCpoly.fit(X_train, Y_train)
SVCY_pred = SVCpoly.predict(X_test)
print(np.concatenate((SVCY_pred.reshape(len(SVCY_pred),1),Y_test.reshape(len(Y_test),1)),1))
SVCpolyCM = confusion_matrix(Y_test, SVCY_pred)
print(SVCpolyCM)
accuracy_score(Y_test,SVCY_pred)

[[0 0]
 [0 0]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]
[[45639    65]
 [ 1764  2532]]


0.96342

In [89]:
print(SVCpoly.predict(sc.transform([[1,0,0,28,0,0,0,0,0,0,27.32,5.7,158]])))

[1]


<h1>SVM with linear kernel</h1>

In [90]:
Slinear = SVC(kernel = "linear", random_state = 0)
Slinear.fit(X_train, Y_train)
SVCY_pred = Slinear.predict(X_test)
print(np.concatenate((SVCY_pred.reshape(len(SVCY_pred),1),Y_test.reshape(len(Y_test),1)),1))
SlinearCM = confusion_matrix(Y_test, SVCY_pred)
print(SlinearCM)
accuracy_score(Y_test,SVCY_pred)

[[0 0]
 [0 0]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]
[[45499   205]
 [ 1779  2517]]


0.96032

In [92]:
print(Slinear.predict(sc.transform([[1,0,0,28,0,0,0,0,0,0,27.32,5.7,158]])))

[0]


<h1>SVM with Sigmoid kernel</h1>

In [93]:
SVsig = SVC(kernel = "sigmoid", random_state = 0)
SVsig.fit(X_train, Y_train)
SVCY_pred = SVsig.predict(X_test)
print(np.concatenate((SVCY_pred.reshape(len(SVCY_pred),1),Y_test.reshape(len(Y_test),1)),1))
SVsigCM = confusion_matrix(Y_test, SVCY_pred)
print(SVsigCM)
accuracy_score(Y_test,SVCY_pred)

[[0 0]
 [0 0]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]
[[43755  1949]
 [ 2067  2229]]


0.91968

In [95]:
print(SVsig.predict(sc.transform([[1,0,0,28,0,0,0,0,0,0,27.32,5.7,158]])))

[0]


<h1>PCA</h1>

In [96]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
PCAX_train = pca.fit_transform(X_train)
PCAX_test = pca.transform(X_test)

<h1>PCA SVM Linear</h1>

In [97]:
Slinear = SVC(kernel = "linear", random_state = 0)
Slinear.fit(PCAX_train, Y_train)
SVCY_pred = Slinear.predict(PCAX_test)
print(np.concatenate((SVCY_pred.reshape(len(SVCY_pred),1),Y_test.reshape(len(Y_test),1)),1))
SlinearCM = confusion_matrix(Y_test, SVCY_pred)
print(SlinearCM)
accuracy_score(Y_test,SVCY_pred)

[[0 0]
 [0 0]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]
[[45704     0]
 [ 4296     0]]


0.91408

<h1>PCA SVM Sigmoid</h1>

In [99]:
SVsig = SVC(kernel = "sigmoid", random_state = 0)
SVsig.fit(PCAX_train, Y_train)
SVCY_pred = SVsig.predict(PCAX_test)
print(np.concatenate((SVCY_pred.reshape(len(SVCY_pred),1),Y_test.reshape(len(Y_test),1)),1))
SVsigCM = confusion_matrix(Y_test, SVCY_pred)
print(SVsigCM)
accuracy_score(Y_test,SVCY_pred)

[[0 0]
 [0 0]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]
[[42887  2817]
 [ 2983  1313]]


0.884

<h1>PCA SVM Poly</h1>

In [101]:
SVCpoly = SVC(kernel = "poly", random_state = 0)
SVCpoly.fit(PCAX_train, Y_train)
SVCY_pred = SVCpoly.predict(PCAX_test)
print(np.concatenate((SVCY_pred.reshape(len(SVCY_pred),1),Y_test.reshape(len(Y_test),1)),1))
SVCpolyCM = confusion_matrix(Y_test, SVCY_pred)
print(SVCpolyCM)
accuracy_score(Y_test,SVCY_pred)

[[0 0]
 [0 0]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]
[[45531   173]
 [ 3921   375]]


0.91812

<h1>PCA SVM RBF</h1>

In [103]:
SVCC = SVC(kernel = "rbf", random_state = 0)
SVCC.fit(PCAX_train, Y_train)
SVCY_pred = SVCC.predict(PCAX_test)
print(np.concatenate((SVCY_pred.reshape(len(SVCY_pred),1),Y_test.reshape(len(Y_test),1)),1))
SVCCM = confusion_matrix(Y_test, SVCY_pred)
print(SVCCM)
accuracy_score(Y_test,SVCY_pred)

[[0 0]
 [0 0]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]
[[45392   312]
 [ 3657   639]]


0.92062