# Diabetes Detection Model - Interactive Demo

This notebook demonstrates the advanced diabetes detection model with interactive predictions and risk assessment.

## Features Included
- Multi-algorithm ensemble predictions
- Risk stratification analysis
- Personalized health recommendations
- Feature importance visualization
- Model performance metrics

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score
import warnings
warnings.filterwarnings('ignore')

## Step 1: Load and Prepare Data

In [None]:
# Load diabetes dataset
data = pd.read_csv('your_diabetes_data.csv')

print('Dataset shape:', data.shape)
print('\nFirst few rows:')
print(data.head())
print('\nBasic statistics:')
print(data.describe())

## Step 2: Data Preprocessing

In [None]:
# Handle missing values
data_clean = data.fillna(data.mean())

# Separate features and target
X = data_clean.drop('Outcome', axis=1) if 'Outcome' in data_clean.columns else data_clean.iloc[:, :-1]
y = data_clean['Outcome'] if 'Outcome' in data_clean.columns else data_clean.iloc[:, -1]

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

print('Features shape:', X_scaled.shape)
print('Target distribution:', y.value_counts())

## Step 3: Model Training (All Algorithms)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train multiple models
models = {
    'SVM': SVC(kernel='rbf', probability=True),
    'Random Forest': RandomForestClassifier(n_estimators=200, random_state=42),
    'XGBoost': XGBClassifier(n_estimators=100, random_state=42, verbose=0),
    'Logistic Regression': LogisticRegression(random_state=42),
    'Neural Network': MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500, random_state=42)
}

trained_models = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    trained_models[name] = model
    print(f'{name} trained successfully')

## Step 4: Model Evaluation

In [None]:
# Evaluate all models
results = {}
for name, model in trained_models.items():
    y_pred = model.predict(X_test)
    accuracy = model.score(X_test, y_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    auc = roc_auc_score(y_test, y_pred_proba)
    results[name] = {'Accuracy': accuracy, 'AUC': auc}
    print(f'{name}: Accuracy={accuracy:.4f}, AUC={auc:.4f}')

# Create comparison dataframe
results_df = pd.DataFrame(results).T
print('\nModel Comparison:')
print(results_df)

## Step 5: Risk Assessment & Predictions

In [None]:
# Function to make ensemble predictions and assess risk
def assess_diabetes_risk(patient_data):
    """
    Assess diabetes risk based on patient data
    patient_data: array or list of features
    """
    patient_scaled = scaler.transform([patient_data])
    
    # Get predictions from all models
    predictions = []
    probabilities = []
    
    for name, model in trained_models.items():
        pred = model.predict(patient_scaled)[0]
        proba = model.predict_proba(patient_scaled)[0][1]
        predictions.append(pred)
        probabilities.append(proba)
    
    # Ensemble prediction (majority voting + average probability)
    avg_probability = np.mean(probabilities)
    
    # Risk assessment
    if avg_probability < 0.3:
        risk_level = 'LOW RISK'
        recommendations = ['Maintain current lifestyle', 'Regular exercise (150 min/week)', 'Annual check-ups']
    elif avg_probability < 0.7:
        risk_level = 'MEDIUM RISK'
        recommendations = ['Increase physical activity', 'Dietary modifications', 'Quarterly monitoring']
    else:
        risk_level = 'HIGH RISK'
        recommendations = ['Consult healthcare provider', 'Medication consideration', 'Monthly monitoring']
    
    return {
        'risk_score': avg_probability,
        'risk_level': risk_level,
        'recommendations': recommendations
    }

print('Risk Assessment Function Defined')

## Step 6: Example Prediction

In [None]:
# Example patient data
patient_example = [6, 148, 72, 35, 0, 33.6, 0.627, 50]  # Sample feature values

# Get risk assessment
risk_assessment = assess_diabetes_risk(patient_example)

print('Patient Risk Assessment:')
print(f"Risk Score: {risk_assessment['risk_score']:.4f}")
print(f"Risk Level: {risk_assessment['risk_level']}")
print(f"Recommendations: {risk_assessment['recommendations']}")

print('\n--- Assessment Complete ---')