## Importing necessary libraries


In [43]:
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import warnings

warnings.filterwarnings('ignore')

## Load the diabetes dataset from sklearn

In [44]:
diabetes = load_diabetes(as_frame=True)
data = diabetes.frame
data['target'] = (data['target'] > data['target'].median()).astype(int)  # Binarize target variable for classification

## Split the data into features and target

In [45]:
X = data.drop('target', axis=1)
y = data['target']

## Train-test split

In [46]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

## Standardize the features

In [47]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Initialize Models

In [48]:
log_reg = LogisticRegression()
dec_tree = DecisionTreeClassifier(random_state=42)
rand_forest = RandomForestClassifier(random_state=42)
svc = SVC(probability=True, random_state=42)
grad_boost = GradientBoostingClassifier(random_state=42)

## Train Models

In [50]:
log_reg.fit(X_train_scaled, y_train)
dec_tree.fit(X_train, y_train)
rand_forest.fit(X_train, y_train)
svc.fit(X_train_scaled, y_train)
grad_boost.fit(X_train, y_train)

## Predict on the test data

In [51]:
y_pred_log_reg = log_reg.predict(X_test_scaled)
y_pred_dec_tree = dec_tree.predict(X_test)
y_pred_rand_forest = rand_forest.predict(X_test)
y_pred_svc = svc.predict(X_test_scaled)
y_pred_grad_boost = grad_boost.predict(X_test)

## Evaluate models

In [52]:
def evaluate_model(y_true, y_pred, y_proba):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    roc_auc = roc_auc_score(y_true, y_proba)
    return accuracy, precision, recall, f1, roc_auc

## Test the Models

In [53]:
# Logistic Regression
log_reg_proba = log_reg.predict_proba(X_test_scaled)[:, 1]
log_reg_metrics = evaluate_model(y_test, y_pred_log_reg, log_reg_proba)

# Decision Tree
dec_tree_proba = dec_tree.predict_proba(X_test)[:, 1]
dec_tree_metrics = evaluate_model(y_test, y_pred_dec_tree, dec_tree_proba)

# Random Forest
rand_forest_proba = rand_forest.predict_proba(X_test)[:, 1]
rand_forest_metrics = evaluate_model(y_test, y_pred_rand_forest, rand_forest_proba)

# SVM
svc_proba = svc.predict_proba(X_test_scaled)[:, 1]
svc_metrics = evaluate_model(y_test, y_pred_svc, svc_proba)

# Gradient Boosting
grad_boost_proba = grad_boost.predict_proba(X_test)[:, 1]
grad_boost_metrics = evaluate_model(y_test, y_pred_grad_boost, grad_boost_proba)

## Display Performance Measures

In [54]:
model_performance = pd.DataFrame({
    'Model': ['Logistic Regression', 'Decision Tree', 'Random Forest', 'SVM', 'Gradient Boosting'],
    'Accuracy': [log_reg_metrics[0], dec_tree_metrics[0], rand_forest_metrics[0], svc_metrics[0], grad_boost_metrics[0]],
    'Precision': [log_reg_metrics[1], dec_tree_metrics[1], rand_forest_metrics[1], svc_metrics[1], grad_boost_metrics[1]],
    'Recall': [log_reg_metrics[2], dec_tree_metrics[2], rand_forest_metrics[2], svc_metrics[2], grad_boost_metrics[2]],
    'F1-Score': [log_reg_metrics[3], dec_tree_metrics[3], rand_forest_metrics[3], svc_metrics[3], grad_boost_metrics[3]],
    'ROC-AUC': [log_reg_metrics[4], dec_tree_metrics[4], rand_forest_metrics[4], svc_metrics[4], grad_boost_metrics[4]]
})

print(model_performance)

                 Model  Accuracy  Precision    Recall  F1-Score   ROC-AUC
0  Logistic Regression  0.774436   0.746032  0.770492  0.758065  0.868169
1        Decision Tree  0.706767   0.661765  0.737705  0.697674  0.709130
2        Random Forest  0.751880   0.705882  0.786885  0.744186  0.838684
3                  SVM  0.759398   0.693333  0.852459  0.764706  0.872040
4    Gradient Boosting  0.766917   0.720588  0.803279  0.759690  0.834927


## Take user-input data

In [55]:
def get_user_input():
    user_data = {}
    features = diabetes.feature_names
    for feature in features:
        user_data[feature] = [float(input(f"Enter {feature}: "))]
    
    user_df = pd.DataFrame(user_data)
    user_df_scaled = scaler.transform(user_df)
    
    return user_df_scaled

## Predict the chances of having diabetes for new patient data

In [56]:
def predict_diabetes():
    user_data = get_user_input()
    
    log_reg_pred = log_reg.predict_proba(user_data)[:, 1][0]
    dec_tree_pred = dec_tree.predict_proba(user_data)[:, 1][0]
    rand_forest_pred = rand_forest.predict_proba(user_data)[:, 1][0]
    svc_pred = svc.predict_proba(user_data)[:, 1][0]
    grad_boost_pred = grad_boost.predict_proba(user_data)[:, 1][0]
    
    print(f"Chances of having diabetes based on Logistic Regression: {log_reg_pred:.2f}")
    print(f"Chances of having diabetes based on Decision Tree: {dec_tree_pred:.2f}")
    print(f"Chances of having diabetes based on Random Forest: {rand_forest_pred:.2f}")
    print(f"Chances of having diabetes based on SVM: {svc_pred:.2f}")
    print(f"Chances of having diabetes based on Gradient Boosting: {grad_boost_pred:.2f}")

## Call the prediction function

In [57]:
predict_diabetes()

Chances of having diabetes based on Logistic Regression: 0.00
Chances of having diabetes based on Decision Tree: 1.00
Chances of having diabetes based on Random Forest: 0.75
Chances of having diabetes based on SVM: 0.55
Chances of having diabetes based on Gradient Boosting: 0.99
