# Diabetes Prediction Model - Model Training and Evaluation

## Import Libraries

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score


## Load Preprocessed Data

In [None]:
file_path = '/path/to/your/preprocessed_data.csv'
data = pd.read_csv(file_path)
X = data.drop('diabetes', axis=1)
y = data['diabetes']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Model Training and Evaluation

In [None]:
# Initialize models
logreg = LogisticRegression(random_state=42)
rf = RandomForestClassifier(random_state=42)
gb = GradientBoostingClassifier(random_state=42)

# Dictionary to hold models and their performance metrics
models = {'Logistic Regression': logreg, 'Random Forest': rf, 'Gradient Boosting': gb}
model_performance = {}

# Training and evaluating each model
for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_pred)
    model_performance[model_name] = {'Accuracy': accuracy, 'Precision': precision, 'Recall': recall, 'ROC AUC': roc_auc}


## Model Performance Comparison

In [None]:
model_performance_df = pd.DataFrame(model_performance).T

## Conclusion

This notebook contains the steps for training and evaluating different models for the diabetes prediction task. It includes training logistic regression, random forest, and gradient boosting models, and comparing their performance based on accuracy, precision, recall, and ROC AUC.