In [1]:
# === 1. Imports ===
import pandas as pd
import numpy as np
import pickle

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

from sklearn.metrics import classification_report

# === 2. Load Dataset ===
df = pd.read_csv('sleep_disease_dataset.csv')

# === 3. Basic Preprocessing ===
# Encode categorical
df['Gender'] = df['Gender'].map({'Male': 0, 'Female': 1})
df['Occupation'] = pd.Categorical(df['Occupation']).codes
df['BMI Category'] = pd.Categorical(df['BMI Category']).codes
df['Blood Pressure'] = df['Blood Pressure'].str.split('/').apply(lambda x: int(x[0]))

# === 4. Feature Engineering ===
df['Is_Overweight'] = (df['BMI Category'] >= 2).astype(int)
df['Is_Hypertensive'] = (df['Blood Pressure'] > 130).astype(int)
df['Is_High_Heart_Rate'] = (df['Heart Rate'] > 80).astype(int)
df['Is_Short_Sleeper'] = (df['Sleep Duration'] < 6.5).astype(int)
df['Is_Stressed'] = (df['Stress Level'] > 5).astype(int)
df['Is_Low_Steps'] = (df['Daily Steps'] < 5000).astype(int)

df['Overweight_and_Hypertensive'] = (
    (df['Is_Overweight'] == 1) & (df['Is_Hypertensive'] == 1)
).astype(int)

df['HeartStress_and_ShortSleep'] = (
    (df['Is_High_Heart_Rate'] == 1) & (df['Is_Short_Sleeper'] == 1)
).astype(int)

# === 5. Define Features and Target ===
X = df.drop(['Sleep Disorder'], axis=1)
y = df['Sleep Disorder']

# === 6. Train/Test Split ===
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# === 7. Standard Scaling ===
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# === 8. SelectKBest ===
k = 11
k_best = SelectKBest(score_func=f_classif, k=k)
X_train_kbest = k_best.fit_transform(X_train_scaled, y_train)
X_test_kbest = k_best.transform(X_test_scaled)

# === 9. Model - Stacking Classifier ===
base_models = [
    ('rf', RandomForestClassifier(n_estimators=200, random_state=42)),
]

final_estimator = XGBClassifier(n_estimators=300, learning_rate=0.05, max_depth=5, random_state=42)

stacking_model = StackingClassifier(
    estimators=base_models,
    final_estimator=final_estimator,
    passthrough=False
)

# Train
stacking_model.fit(X_train_kbest, y_train)

# Predict
y_pred = stacking_model.predict(X_test_kbest)

# Report
print(classification_report(y_test, y_pred))

# === 10. Save Models ===
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

with open('k_best_selector.pkl', 'wb') as f:
    pickle.dump(k_best, f)

with open('stacking_classifier_k_best.pkl', 'wb') as f:
    pickle.dump(stacking_model, f)

print("✅ Model, scaler, and selector saved successfully!")

                       precision    recall  f1-score   support

             Insomnia       1.00      1.00      1.00      2000
           Narcolepsy       0.99      0.99      0.99      2000
          No Disorder       0.99      0.86      0.92      2000
Restless Leg Syndrome       0.96      0.93      0.94      2000
          Sleep Apnea       0.81      0.94      0.87      2000

             accuracy                           0.94     10000
            macro avg       0.95      0.94      0.95     10000
         weighted avg       0.95      0.94      0.95     10000

✅ Model, scaler, and selector saved successfully!


In [3]:
# === 1. Imports ===
import pandas as pd
import numpy as np
import pickle

# === 2. Load saved objects ===
with open('scaler.pkl', 'rb') as f:
    scaler = pickle.load(f)

with open('k_best_selector.pkl', 'rb') as f:
    k_best = pickle.load(f)

with open('stacking_classifier_k_best.pkl', 'rb') as f:
    stacking_classifier = pickle.load(f)

# === 3. Example Single Input ===
single_input = {
    'Gender': 'Female',
    'Age': 79,
    'Occupation': 'Engineer',
    'Sleep Duration': 10.3,
    'Quality of Sleep': 6,
    'Physical Activity Level': 1,
    'Stress Level': 3,
    'BMI Category': 'Overweight',
    'Blood Pressure': '175/100',
    'Heart Rate': 73,
    'Daily Steps': 5232
}

input_df = pd.DataFrame([single_input])

# === 4. Basic Preprocessing ===
input_df['Gender'] = input_df['Gender'].map({'Male': 0, 'Female': 1})
input_df['Occupation'] = pd.Categorical(input_df['Occupation']).codes
input_df['BMI Category'] = pd.Categorical(input_df['BMI Category']).codes
input_df['Blood Pressure'] = input_df['Blood Pressure'].str.split('/').apply(lambda x: int(x[0]))

# === 5. Feature Engineering ===
input_df['Is_Overweight'] = (input_df['BMI Category'] >= 2).astype(int)
input_df['Is_Hypertensive'] = (input_df['Blood Pressure'] > 130).astype(int)
input_df['Is_High_Heart_Rate'] = (input_df['Heart Rate'] > 80).astype(int)
input_df['Is_Short_Sleeper'] = (input_df['Sleep Duration'] < 6.5).astype(int)
input_df['Is_Stressed'] = (input_df['Stress Level'] > 5).astype(int)
input_df['Is_Low_Steps'] = (input_df['Daily Steps'] < 5000).astype(int)
input_df['Overweight_and_Hypertensive'] = (
    (input_df['Is_Overweight'] == 1) & (input_df['Is_Hypertensive'] == 1)
).astype(int)
input_df['HeartStress_and_ShortSleep'] = (
    (input_df['Is_High_Heart_Rate'] == 1) & (input_df['Is_Short_Sleeper'] == 1)
).astype(int)

# === 6. Prepare final input ===
all_features = [
    'Gender', 'Age', 'Occupation', 'Sleep Duration', 'Quality of Sleep',
    'Physical Activity Level', 'Stress Level', 'BMI Category', 'Blood Pressure',
    'Heart Rate', 'Daily Steps',
    'Is_Overweight', 'Is_Hypertensive', 'Is_High_Heart_Rate',
    'Is_Short_Sleeper', 'Is_Stressed', 'Is_Low_Steps',
    'Overweight_and_Hypertensive', 'HeartStress_and_ShortSleep'
]

for col in all_features:
    if col not in input_df.columns:
        input_df[col] = 0

input_df = input_df[all_features]

# === 7. Scale, Select K Best, Predict ===
input_scaled = scaler.transform(input_df)
input_k_best = k_best.transform(input_scaled)

prediction = stacking_classifier.predict(input_k_best)

# === 8. Map prediction to class names ===
# Updated class_labels to match the predictions from the stacking_classifier
class_labels = {'No Disorder': 'No Disorder', 'Sleep Apnea': 'Sleep Apnea', 'Insomnia': 'Insomnia', 'Narcolepsy': 'Narcolepsy', 'Restless Leg Syndrome': 'Restless Leg Syndrome'}
predicted_class = class_labels[prediction[0]] # Access using prediction[0]

print("✅ Prediction with enhanced model:")
print(f"Prediction: {predicted_class}")

✅ Prediction with enhanced model:
Prediction: Sleep Apnea
