In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import scale, StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, roc_auc_score, roc_curve
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPRegressor

In [4]:
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/ml/Diabetes.csv")

In [5]:
X = df.drop(["diabetes"], axis=1)
y = df["diabetes"]

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

**Model**

In [7]:
knn_model = KNeighborsClassifier().fit(X_train, y_train)

In [8]:
knn_model.get_params()

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

In [9]:
y_pred = knn_model.predict(X_test)

In [10]:
accuracy_score(y_test, y_pred)

0.6883116883116883

In [11]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.77      0.75      0.76       151
           1       0.55      0.56      0.56        80

    accuracy                           0.69       231
   macro avg       0.66      0.66      0.66       231
weighted avg       0.69      0.69      0.69       231



**Model Tuning**

In [12]:
knn = KNeighborsClassifier()

In [13]:
knn_params = {"n_neighbors": np.arange(1, 50)}

In [17]:
knn_cv_model = GridSearchCV(knn, knn_params, cv=10, error_score='raise').fit(X_train, y_train)

In [18]:
knn_cv_model.best_score_

0.748637316561845

In [19]:
knn_cv_model.best_params_

{'n_neighbors': 11}

**Tuned Model**

In [20]:
knn_tuned = KNeighborsClassifier(n_neighbors=11).fit(X_train, y_train)

In [21]:
y_pred = knn_tuned.predict(X_test)

In [22]:
accuracy_score(y_test, y_pred)

0.7316017316017316

In [23]:
knn_tuned.score(X_test, y_test) # if you do not want to do pred steps

0.7316017316017316