<a href="https://colab.research.google.com/github/Aman-coder-debug/Diabetic_Detector_AI_ML/blob/main/Diabetic_detector_using_AI_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 1. Import libraries
import pandas as pd
import numpy as np
import joblib
import os
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc
from xgboost import XGBClassifier
import gradio as gr
import csv
from datetime import datetime

# 2. Load dataset
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness',
           'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']
data = pd.read_csv(url, names=columns)

# 3. Clean data
cols_with_zeros = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']
data[cols_with_zeros] = data[cols_with_zeros].replace(0, np.nan)
data.fillna(data.mean(), inplace=True)

# 4. Feature engineering
data["BMI_Age"] = data["BMI"] * data["Age"]

# 5. Prepare features and labels
X = data.drop("Outcome", axis=1)
y = data["Outcome"]

# 6. Split and scale
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 7. Train or load models
if os.path.exists("log_model.pkl"):
    log_model = joblib.load("log_model.pkl")
else:
    log_model = LogisticRegression(max_iter=1000)
    log_model.fit(X_train_scaled, y_train)
    joblib.dump(log_model, "log_model.pkl")

if os.path.exists("xgb_model.pkl"):
    xgb_model = joblib.load("xgb_model.pkl")
else:
    xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
    xgb_model.fit(X_train_scaled, y_train)
    joblib.dump(xgb_model, "xgb_model.pkl")

joblib.dump(scaler, "scaler.pkl")

# 8. Plotting functions
def plot_confusion_matrix(y_true, y_pred, model_name):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(4, 3))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["No", "Yes"], yticklabels=["No", "Yes"])
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title(f"{model_name} - Confusion Matrix")
    plt.tight_layout()
    plt.savefig("conf_matrix.png")
    plt.close()

def plot_roc_curve(model, X_test, y_test, model_name):
    y_probs = model.predict_proba(X_test)[:, 1]
    fpr, tpr, _ = roc_curve(y_test, y_probs)
    roc_auc = auc(fpr, tpr)
    plt.figure(figsize=(4, 3))
    plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.2f}")
    plt.plot([0, 1], [0, 1], linestyle="--")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title(f"{model_name} - ROC Curve")
    plt.legend()
    plt.tight_layout()
    plt.savefig("roc_curve.png")
    plt.close()

# 9. Logging function
def log_to_csv(inputs, prediction, model):
    with open("user_logs.csv", mode='a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([*inputs, prediction, model, datetime.now()])

# 10. Prediction function
def predict_diabetes(Pregnancies, Glucose, BloodPressure, SkinThickness,
                     Insulin, BMI, DiabetesPedigreeFunction, Age,
                     model_choice):

    BMI_Age = BMI * Age
    input_data = np.array([[Pregnancies, Glucose, BloodPressure, SkinThickness,
                            Insulin, BMI, DiabetesPedigreeFunction, Age, BMI_Age]])
    input_scaled = scaler.transform(input_data)

    if model_choice == "Logistic Regression":
        prediction = log_model.predict(input_scaled)
        accuracy = accuracy_score(y_test, log_model.predict(X_test_scaled))
        plot_confusion_matrix(y_test, log_model.predict(X_test_scaled), "Logistic Regression")
        plot_roc_curve(log_model, X_test_scaled, y_test, "Logistic Regression")
    else:
        prediction = xgb_model.predict(input_scaled)
        accuracy = accuracy_score(y_test, xgb_model.predict(X_test_scaled))
        plot_confusion_matrix(y_test, xgb_model.predict(X_test_scaled), "XGBoost")
        plot_roc_curve(xgb_model, X_test_scaled, y_test, "XGBoost")

    result = "Diabetic" if prediction[0] == 1 else "Not Diabetic"

    # Log inputs
    inputs = [Pregnancies, Glucose, BloodPressure, SkinThickness,
              Insulin, BMI, DiabetesPedigreeFunction, Age, BMI_Age]
    log_to_csv(inputs, result, model_choice)

    return result, f"Accuracy: {accuracy:.2f}", "conf_matrix.png", "roc_curve.png"

# 11. Gradio app
inputs = [
    gr.Number(label="Pregnancies"),
    gr.Number(label="Glucose"),
    gr.Number(label="Blood Pressure"),
    gr.Number(label="Skin Thickness"),
    gr.Number(label="Insulin"),
    gr.Number(label="BMI"),
    gr.Number(label="Diabetes Pedigree Function"),
    gr.Number(label="Age"),
    gr.Radio(["Logistic Regression", "XGBoost"], label="Choose Model")
]

outputs = [
    gr.Text(label="Prediction"),
    gr.Text(label="Model Accuracy"),
    gr.Image(label="Confusion Matrix"),
    gr.Image(label="ROC Curve")
]

gr.Interface(fn=predict_diabetes, inputs=inputs, outputs=outputs,
             title="Diabetes Prediction (Advanced AI Model)").launch()


Parameters: { "use_label_encoder" } are not used.



It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://ace62ff846fa9fb384.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.31.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.10.1 (from gradio)
  Downloading gradio_client-1.10.1-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.