<a href="https://colab.research.google.com/github/KamalAsh0ur/NTI/blob/main/Diabetes_Prediction_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **advanced predictive analytics solution with forecasting**

In [6]:
!pip install scikit-learn # Install scikit-learn if not already installed



In [7]:
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score # Import precision_score

In [8]:
import dash
from dash import dcc, html, Input, Output, State
import dash_bootstrap_components as dbc
import plotly.express as px
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
import shap
import xgboost as xgb
from prophet import Prophet
import plotly.graph_objects as go

# Load and preprocess data
df = pd.read_csv('diabetes.csv')

# Handle zero values in key features
for col in ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']:
    df[col] = df[col].replace(0, np.nan)
    df[col] = df[col].fillna(df[col].median())

# Feature engineering
df['BMI_Category'] = pd.cut(df['BMI'], bins=[0, 18.5, 25, 30, 100],
                           labels=['Underweight', 'Normal', 'Overweight', 'Obese'])
df['Age_Group'] = pd.cut(df['Age'], bins=[20, 30, 40, 50, 60, 100],
                        labels=['20-29', '30-39', '40-49', '50-59', '60+'])

# Prepare data for ML
X = df.drop(['Outcome', 'BMI_Category', 'Age_Group'], axis=1)
y = df['Outcome']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X_train_scaled, y_train)

# SHAP explainer
explainer = shap.TreeExplainer(model)

# Initialize Dash app
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.DARKLY])

app.layout = dbc.Container([
    dbc.Row([
        dbc.Col(html.H1("Advanced Diabetes Prediction Dashboard",
                       className="text-center mb-4 text-warning"),
                width=12)
    ]),

    # Prediction Interface
    dbc.Row([
        dbc.Col([
            html.Div([
                html.H3("Real-time Prediction", className="mb-3"),
                dbc.Row([
                    dbc.Col(dbc.Input(id='preg', type='number', placeholder='Pregnancies')),
                    dbc.Col(dbc.Input(id='gluc', type='number', placeholder='Glucose')),
                    dbc.Col(dbc.Input(id='bp', type='number', placeholder='Blood Pressure')),
                ], className="mb-2"),

                dbc.Row([
                    dbc.Col(dbc.Input(id='skin', type='number', placeholder='Skin Thickness')),
                    dbc.Col(dbc.Input(id='insulin', type='number', placeholder='Insulin')),
                    dbc.Col(dbc.Input(id='bmi', type='number', placeholder='BMI')),
                ], className="mb-2"),

                dbc.Row([
                    dbc.Col(dbc.Input(id='dpf', type='number', step=0.01, placeholder='Diabetes Pedigree')),
                    dbc.Col(dbc.Input(id='age', type='number', placeholder='Age')),
                    dbc.Col(dbc.Button('Predict', id='predict-btn', color='warning')),
                ]),

                html.Div(id='prediction-output', className="mt-3 h4"),
                dcc.Graph(id='shap-waterfall')
            ], className="p-3 border rounded")
        ], md=6),

        # Model Performance
        dbc.Col([
            html.Div([
                html.H3("Model Performance Metrics", className="mb-3"),
                dcc.Graph(figure=px.imshow(confusion_matrix(y_test, model.predict(X_test_scaled)),
                                         labels=dict(x="Predicted", y="Actual"),
                                         x=['Non-Diabetic', 'Diabetic'],
                                         y=['Non-Diabetic', 'Diabetic'],
                                         text_auto=True,
                                         title="Confusion Matrix")),
                dbc.Row([
                    dbc.Col(html.Div(f"Accuracy: {accuracy_score(y_test, model.predict(X_test_scaled)):.2%}",
                                    className="h5 text-center p-2 bg-primary rounded")),
                    dbc.Col(html.Div(f"Precision: {precision_score(y_test, model.predict(X_test_scaled)):.2%}",
                                    className="h5 text-center p-2 bg-success rounded")),
                ], className="mb-2")
            ], className="p-3 border rounded")
        ], md=6)
    ], className="mb-4"),

    # Forecasting and Trends
    dbc.Row([
        dbc.Col([
            html.Div([
                html.H3("Diabetes Risk Forecast", className="mb-3"),
                dcc.Graph(id='forecast-plot'),
                dcc.Slider(id='age-slider',
                          min=20,
                          max=80,
                          value=30,
                          marks={i: str(i) for i in range(20, 81, 10)})
            ], className="p-3 border rounded")
        ], md=6),

        # Feature Importance
        dbc.Col([
            html.Div([
                html.H3("Feature Importance Analysis", className="mb-3"),
                dcc.Graph(figure=px.bar(pd.DataFrame({
                    'features': X.columns,
                    'importance': model.feature_importances_
                }).sort_values('importance', ascending=False),
                x='importance', y='features',
                title="XGBoost Feature Importance"))
            ], className="p-3 border rounded")
        ], md=6)
    ]),

    # Time-based Analysis
    dbc.Row([
        dbc.Col([
            html.Div([
                html.H3("Age-based Risk Progression", className="mb-3"),
                dcc.Graph(id='age-risk-plot')
            ], className="p-3 border rounded")
        ], width=12)
    ])
], fluid=True)

# Callbacks
@app.callback(
    [Output('prediction-output', 'children'),
     Output('shap-waterfall', 'figure')],
    [Input('predict-btn', 'n_clicks')],
    [State('preg', 'value'),
     State('gluc', 'value'),
     State('bp', 'value'),
     State('skin', 'value'),
     State('insulin', 'value'),
     State('bmi', 'value'),
     State('dpf', 'value'),
     State('age', 'value')]
)
def predict_diabetes(n_clicks, preg, gluc, bp, skin, insulin, bmi, dpf, age):
    if n_clicks is None:
        return "", go.Figure()

    input_data = pd.DataFrame([[preg, gluc, bp, skin, insulin, bmi, dpf, age]],
                             columns=X.columns)
    input_scaled = scaler.transform(input_data)

    # Prediction
    prediction = model.predict(input_scaled)[0]
    proba = model.predict_proba(input_scaled)[0][1]

    # SHAP explanation
    shap_values = explainer.shap_values(input_scaled)
    fig = go.Figure(go.Waterfall(
        name="Feature Impact",
        orientation="h",
        measure=["relative"]*len(X.columns),
        x=shap_values[0],
        y=X.columns,
        base=explainer.expected_value
    ))

    fig.update_layout(title="SHAP Value Explanation",
                    xaxis_title="Impact on Prediction",
                    plot_bgcolor='rgba(0,0,0,0)')

    return f"Diabetes Risk: {proba:.1%}", fig

@app.callback(
    Output('forecast-plot', 'figure'),
    [Input('age-slider', 'value')]
)
def update_forecast(age):
    # Create synthetic future data
    future_ages = np.arange(age, age+10)
    synthetic_data = pd.DataFrame({
        'Pregnancies': [3]*10,
        'Glucose': np.linspace(100, 160, 10),
        'BloodPressure': np.linspace(70, 85, 10),
        'SkinThickness': np.linspace(20, 35, 10),
        'Insulin': np.linspace(100, 200, 10),
        'BMI': np.linspace(25, 35, 10),
        'DiabetesPedigreeFunction': [0.5]*10,
        'Age': future_ages
    })

    scaled_data = scaler.transform(synthetic_data)
    predictions = model.predict_proba(scaled_data)[:,1]

    fig = px.line(x=future_ages, y=predictions,
                 labels={'x': 'Age', 'y': 'Diabetes Risk'},
                 title="10-Year Diabetes Risk Projection")
    fig.update_traces(line=dict(color='red', width=3))
    return fig

@app.callback(
    Output('age-risk-plot', 'figure'),
    [Input('age-slider', 'value')]
)
def update_age_risk(selected_age):
    age_data = df[df['Age'] <= selected_age]
    risk_by_age = age_data.groupby('Age')['Outcome'].mean().reset_index()

    fig = px.area(risk_by_age, x='Age', y='Outcome',
                 title="Cumulative Diabetes Risk by Age",
                 labels={'Outcome': 'Probability'})
    fig.add_vline(x=selected_age, line_dash="dot",
                 annotation_text="Current Age",
                 line_color="red")
    return fig

if __name__ == '__main__':
    app.run_server(debug=True)


Parameters: { "use_label_encoder" } are not used.




<IPython.core.display.Javascript object>