### Importing Libaries

In [32]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.preprocessing import StandardScaler

### Loading Dtasets

In [2]:
data = pd.read_csv("diabetes_prediction_dataset.csv")
data.head()

Unnamed: 0,gender,age,hypertension,heart_disease,smoking_history,bmi,HbA1c_level,blood_glucose_level,diabetes
0,Female,80.0,0,1,never,25.19,6.6,140,0
1,Female,54.0,0,0,No Info,27.32,6.6,80,0
2,Male,28.0,0,0,never,27.32,5.7,158,0
3,Female,36.0,0,0,current,23.45,5.0,155,0
4,Male,76.0,1,1,current,20.14,4.8,155,0


In [11]:
X = data["bmi"]
y = data['diabetes']

### Splitting the data into training and testing

In [12]:
X_train,X_test,y_train,y_test = train_test_split(X,y, test_size=0.3, random_state=42)

### Reshaping X_train and X_test

In [16]:
X_train = X_train.values.reshape(-1,1)
X_test  = X_test.values.reshape(-1,1)

### Standardize the features

In [17]:
scalar = StandardScaler()
X_train = scalar.fit_transform(X_train)
X_test = scalar.fit_transform(X_test)

### Train Random Forest Classifier

In [20]:
rf = RandomForestClassifier()
rf.fit(X_train,y_train)
rf_pred = rf.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test,rf_pred))

Random Forest Accuracy: 0.9095666666666666


### Train K-Nearest Neighbors Classifier

In [21]:
knn = KNeighborsClassifier()
knn.fit(X_train,y_train)
knn_pred = knn.predict(X_test)
print("KNN Accuracy:", accuracy_score(y_test,knn_pred))

KNN Accuracy: 0.9072666666666667


### Train AdaBoost Classifier

In [23]:
ada = AdaBoostClassifier()
ada.fit(X_train,y_train)
ada_pred = ada.predict(X_test)
print("AdaBoost Accuracy:", accuracy_score(y_test,ada_pred))

AdaBoost Accuracy: 0.9151


### Train Decision Tree Classifier

In [24]:
dt = DecisionTreeClassifier()
dt.fit(X_train,y_train)
dt_pred = dt.predict(X_test)
print("Decision Tree Accuracy:", accuracy_score(y_test,dt_pred))

Decision Tree Accuracy: 0.9111333333333334


### Train Linear Regression Classifier

In [27]:
lr = LinearRegression()
lr.fit(X_train,y_train)
lr_pred = lr.predict(X_test)
lr_pred_class = [1 if pred >= 0.5 else 0 for pred in lr_pred]
print("Linear Regression Accuracy:", accuracy_score(y_test,lr_pred_class))

Linear Regression Accuracy: 0.9150666666666667


In [34]:
print("Random Forest Classification Report:/n", classification_report(y_test,rf_pred))
print("KNN Classification Report:/n",classification_report(y_test,knn_pred))
print("AdaBoost Classification Report:/n",classification_report(y_test,ada_pred))
print("Decision Tree Classification Report:/n",classification_report(y_test,dt_pred))

Random Forest Classification Report:/n               precision    recall  f1-score   support

           0       0.92      0.99      0.95     27453
           1       0.27      0.04      0.07      2547

    accuracy                           0.91     30000
   macro avg       0.59      0.51      0.51     30000
weighted avg       0.86      0.91      0.88     30000

KNN Classification Report:/n               precision    recall  f1-score   support

           0       0.92      0.99      0.95     27453
           1       0.22      0.04      0.06      2547

    accuracy                           0.91     30000
   macro avg       0.57      0.51      0.51     30000
weighted avg       0.86      0.91      0.88     30000

AdaBoost Classification Report:/n               precision    recall  f1-score   support

           0       0.92      1.00      0.96     27453
           1       0.00      0.00      0.00      2547

    accuracy                           0.92     30000
   macro avg       0.46   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
