<a href="https://colab.research.google.com/github/BASHER81jr/AI-for-Land-Use-Deforestation-Monitoring/blob/main/Diabetes_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_curve, roc_auc_score, confusion_matrix, classification_report
)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy

In [4]:
df = pd.read_csv("mulago_diabetes_dataset.csv")
df = df.drop(columns = ['Patient_ID','Admission_Date', 'Outcome'])

FileNotFoundError: [Errno 2] No such file or directory: 'mulago_diabetes_dataset.csv'

In [None]:
df.head()


In [None]:
for col in df.select_dtypes(include=['object']).columns:
    df[col] = LabelEncoder().fit_transform(df[col])

In [None]:
print(df.corr(numeric_only=True)
["Outcome_Binary"].sort_values(ascending=False))

In [None]:
X = df.drop("Outcome_Binary", axis=1)
y = df["Outcome_Binary"]


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)


In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)
y_probs = model.predict_proba(X_test)[:, 1]


In [None]:
# Add noise to Glucose_Level
df["Glucose_Level"] += np.random.normal(0, 20, size=len(df))

# Add noise to HbA1c
df["HbA1c"] += np.random.normal(0, 1.0, size=len(df))

# Clip to reasonable values
df["Glucose_Level"] = df["Glucose_Level"].clip(70, 300)
df["HbA1c"] = df["HbA1c"].clip(4.0, 13.0)

In [None]:
print("📈 Evaluation Metrics:")
print(f"Accuracy :  {accuracy_score(y_test, y_pred):,.2f}")
print(f"Precision:  {precision_score(y_test, y_pred):.2f}")
print(f"Recall   :  {recall_score(y_test, y_pred):.2f}")
print(f"F1 Score :  {f1_score(y_test, y_pred):.2f}")
print(f"AUC Score:  {roc_auc_score(y_test, y_probs):.2f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred))

In [None]:
cm = confusion_matrix(y_test, y_pred)

sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=["Critical (0)", "Uncritical (1)"],
            yticklabels=["Critical (0)", "Uncritical (1)"])
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

In [None]:
fpr, tpr, _ = roc_curve(y_test, y_probs)
auc_score = roc_auc_score(y_test, y_probs)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f"ROC Curve (AUC = {auc_score:.2f})", color="darkorange")
plt.plot([0, 1], [0, 1], linestyle="--", color="navy")
plt.title("ROC Curve")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(loc="lower right")
plt.grid(True)
plt.show()

In [5]:
model = Sequential([
    Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')  # For binary classification
])

model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss=BinaryCrossentropy(),
    metrics=['accuracy']
)

# Train the model and store training history
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=32,
    verbose=1
)


NameError: name 'X_train' is not defined

In [None]:
# Plot Training and Validation Accuracy
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy', marker='o')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy', marker='o')
plt.title('Model Accuracy Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

# Plot Training and Validation Loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss', marker='o')
plt.plot(history.history['val_loss'], label='Validation Loss', marker='o')
plt.title('Model Loss Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

In [None]:

# Count the outcomes
outcome_counts = df["Outcome_Binary"].value_counts().sort_index()

# Plot without label names
plt.figure(figsize=(6, 4))
plt.bar(outcome_counts.index.astype(str), outcome_counts.values, color=["orange", "blue"])
plt.title("Outcome_Binary Distribution")
plt.xlabel("Outcome (0 or 1)")
plt.ylabel("Number of Patients")
plt.grid(axis="y", linestyle="--", alpha=0.6)
plt.tight_layout()
plt.show()