<a href="https://colab.research.google.com/github/Tafreed/CP/blob/master/Diabetes_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing modules 

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, f1_score,confusion_matrix
import matplotlib.pyplot as plt

# Dataset

In [None]:
diabetes_dataset = pd.read_csv('diabetes.csv')

In [None]:
diabetes_dataset.head()

In [None]:
diabetes_dataset.describe()

In [None]:
diabetes_parameters = diabetes_dataset.drop(columns = 'Outcome', axis=1)
diabetes_outcomes = diabetes_dataset['Outcome']

# Standardization


In [None]:
scaler = StandardScaler()

In [None]:
scaler.fit(diabetes_parameters)

In [None]:
standardized_data = scaler.transform(diabetes_parameters)
standardized_data_feat = pd.DataFrame(standardized_data,columns=diabetes_dataset.columns[:-1])
standardized_data_feat.head()

# Splitting into Train and Test

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(standardized_data,diabetes_outcomes, test_size = 0.33, stratify=diabetes_outcomes, random_state=42)

# Classfier Support Vector Machine

In [None]:
classifier = svm.SVC(kernel='rbf')

In [None]:
classifier.fit(X_train, Y_train)

In [None]:
X_train_prediction = classifier.predict(X_train)
X_test_prediction = classifier.predict(X_test)

# Accuracy Test data

In [None]:
test_data_accuracy = accuracy_score(Y_test,X_test_prediction)
print('Accuracy score of the test data : ', test_data_accuracy)

In [None]:
test_data_f1_score = f1_score(Y_test, X_test_prediction, average='binary')
print('F1 score of the test data : ', test_data_f1_score)

In [None]:
test_data_precision_score = precision_score(Y_test,X_test_prediction,)
print('Precision score of the Test data : ', test_data_precision_score)

In [None]:
print(confusion_matrix(Y_test,X_test_prediction))

# K Nearest Neighbours Classifier

In [None]:
classifier2 = KNeighborsClassifier(n_neighbors=29)

In [None]:
classifier2.fit(X_train, Y_train)

In [None]:
X_train_prediction = classifier2.predict(X_train)
X_test_prediction = classifier2.predict(X_test)

# Accuracy of Test data

In [None]:
test_data_accuracy = accuracy_score(Y_test,X_test_prediction)
print('Accuracy score of the test data : ', test_data_accuracy)

In [None]:
test_data_f1_score = f1_score(Y_test,X_test_prediction, average='binary')
print('F1 score of the test data : ', test_data_f1_score)

In [None]:
test_data_precision_score = precision_score(Y_test,X_test_prediction)
print('Precision score of the Test data : ', test_data_precision_score)

In [None]:
classifier2.score(X_test,Y_test)

In [None]:
print(confusion_matrix(Y_test,X_test_prediction))

# Finding best value K

In [None]:
accuracy_rate = []
for i in range(1,40):
    
    knn = KNeighborsClassifier(n_neighbors=i)
    score=cross_val_score(knn,standardized_data,diabetes_dataset['Outcome'],cv=10)
    accuracy_rate.append(score.mean())

In [None]:
plt.figure(figsize=(10,6))
plt.plot(range(1,40),accuracy_rate,color='blue', linestyle='dashed', marker='o',
         markerfacecolor='red', markersize=10)
plt.title('Accuracy vs. K Value')
plt.xlabel('K')
plt.ylabel('Accuracy')