In [2]:
# Importing necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier

# Load the diabetes dataset
diabetes_file_path = '/content/diabetes_prediction_dataset.csv'
diabetes_df = pd.read_csv(diabetes_file_path)

# Encoding categorical variables (gender and smoking_history)
label_encoder = LabelEncoder()

diabetes_df['gender'] = label_encoder.fit_transform(diabetes_df['gender'])
diabetes_df['smoking_history'] = label_encoder.fit_transform(diabetes_df['smoking_history'])

# Splitting the data into features (X) and labels (y)
X = diabetes_df.drop('diabetes', axis=1)
y = diabetes_df['diabetes']

# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 1. Logistic Regression
logreg = LogisticRegression(max_iter=1000)
logreg.fit(X_train, y_train)
y_pred_logreg = logreg.predict(X_test)
print("Logistic Regression Report:\n", classification_report(y_test, y_pred_logreg))

# 2. Random Forest Classifier
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
print("Random Forest Classifier Report:\n", classification_report(y_test, y_pred_rf))

# 3. AdaBoost Classifier
ada = AdaBoostClassifier()
ada.fit(X_train, y_train)
y_pred_ada = ada.predict(X_test)
print("AdaBoost Classifier Report:\n", classification_report(y_test, y_pred_ada))

# 4. K-Nearest Neighbors (KNN) Classifier
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)
print("KNN Classifier Report:\n", classification_report(y_test, y_pred_knn))


Logistic Regression Report:
               precision    recall  f1-score   support

           0       0.96      0.99      0.98     18292
           1       0.86      0.61      0.72      1708

    accuracy                           0.96     20000
   macro avg       0.91      0.80      0.85     20000
weighted avg       0.96      0.96      0.96     20000

Random Forest Classifier Report:
               precision    recall  f1-score   support

           0       0.97      1.00      0.98     18292
           1       0.95      0.69      0.80      1708

    accuracy                           0.97     20000
   macro avg       0.96      0.84      0.89     20000
weighted avg       0.97      0.97      0.97     20000

AdaBoost Classifier Report:
               precision    recall  f1-score   support

           0       0.97      1.00      0.98     18292
           1       0.97      0.70      0.81      1708

    accuracy                           0.97     20000
   macro avg       0.97      0.85   