In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import os
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.utils.class_weight import compute_class_weight
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

In [None]:
# df = pd.read_csv(r'Diabetes Dataset/User_Oriented_Dataset.csv')
path = "Diabetes Dataset" + os.sep + "Diabetes_and_LifeStyle_Dataset.csv"

df = pd.read_csv(path)

df = df.drop(columns=['systolic_bp', 'diastolic_bp', 'heart_rate', 'cholesterol_total',
                 'hdl_cholesterol', 'ldl_cholesterol', 'triglycerides',
                 'glucose_fasting', 'glucose_postprandial', 'insulin_level', 
                 'hba1c', 'diagnosed_diabetes'])

# mapping = {
#     'No Diabetes': 'Non-Diabetic',
#     'Type 1': 'At Risk',
#     'Type 2': 'At Risk',
#     'Pre-Diabetes': 'At Risk',
#     'Gestational': 'At Risk'
# }

# df['diabetes_stage'] = df['diabetes_stage'].map(mapping)

# df['risk_level'] = df['diabetes_risk_score'].apply(lambda x: 'Low' if x < 30 else ('Medium' if x < 60 else 'High'))

mapping = {
    'No Diabetes': 'Non-Diabetic',
    'Type 1': 'At Risk',
    'Type 2': 'At Risk',
    'Pre-Diabetes': 'At Risk',
    'Gestational': 'At Risk'
}

df['diabetes_stage'] = df['diabetes_stage'].map(mapping)

df.head()

In [None]:
# User Oriented Model Features

features = ['Age', 'gender', 'ethnicity', 'smoking_status', \
        'alcohol_consumption_per_week', 'physical_activity_minutes_per_week', \
        'sleep_hours_per_day', 'screen_time_hours_per_day', \
        'family_history_diabetes', 'hypertension_history', 'cardiovascular_history', \
        'bmi', 'waist_to_hip_ratio', 'diet_score']


# Select features and target variable
X = df[features]
y = df['diabetes_stage'] 

print(y.value_counts())

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42, stratify=y)


numeric_features = [
    'Age',
    'alcohol_consumption_per_week',
    'physical_activity_minutes_per_week',
    'sleep_hours_per_day',
    'screen_time_hours_per_day',
    'bmi',
    'waist_to_hip_ratio'
    # 'diet_score'
]

categorical_features = [
    'gender',
    'ethnicity',
    'smoking_status',
    'family_history_diabetes',
    'hypertension_history',
    'cardiovascular_history'
]

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ]
)

x_train_processed = preprocessor.fit_transform(x_train)
x_test_processed = preprocessor.transform(x_test)

encoder = LabelEncoder()
y_train_encoded = encoder.fit_transform(y_train)
y_test_encoded = encoder.transform(y_test)

# y_train_encoded = to_categorical(y_train_encoded)
# y_test_encoded = to_categorical(y_test_encoded)

print(y_train_encoded.shape)
print(y_test_encoded.shape)

In [None]:
# Creating the Model

classes = ['Non-Diabetic', 'At Risk']

def build_patient_model():
    model = models.Sequential([
        # Input layer
        layers.Dense(128, activation='relu', input_shape=(x_train_processed.shape[1],)),
        layers.BatchNormalization(),
        layers.Dropout(0.3),

        # Hidden layers
        layers.Dense(64, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.3),

        layers.Dense(32, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.3),

        # Output layer
        layers.Dense(1, activation='sigmoid')
    ])
    return model

# Initialize Patient-side Model
patient_model = build_patient_model()

patient_model.compile(
    optimizer=Adam(learning_rate=0.005),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Early Stopping 
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model
history = patient_model.fit(
    x_train_processed, y_train_encoded,
    epochs=50,
    verbose=1,
    validation_data=(x_test_processed, y_test_encoded),
    # callbacks=[early_stopping]
)


In [None]:
# Visualize Training History

plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Patient-side Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Patient-side Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss Value')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# Save the model
# Task: Make a model saving utility that appends version at the end and saves the model
#   Requirements:
#       Format: ModelName_vMajor.Minor.Revisions.h5
#       Version must increment based on the latest version in the Models/

from pathlib import Path
from Utils.versionsys import save_model

model_name = "patient_side_model"
save_path = Path("./Models")
save_model(model=patient_model, model_name=model_name, save_path=save_path, version_increment_type="revision", file_type=".h5")

import joblib
import os

if not os.path.exists('Models'):
    os.makedirs('Models')

# This contains all your scaler and OneHotEncoder logic
joblib.dump(preprocessor, 'Models/patient_pipeline.pkl')

# Save the Target Encoder (To translate 0,1,2 back to 'Diabetes', etc.)
joblib.dump(encoder, 'Models/patient_target_enc.pkl')

# Save Feature Lists (So the app knows what inputs to ask for)
feature_meta = {
    'num': numeric_features,
    'cat': categorical_features
}
joblib.dump(feature_meta, 'Models/patient_features.pkl')