## Model

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score, confusion_matrix
import ipywidgets as widgets
from IPython.display import display

# Load the dataset
file = "Sleep_health_and_lifestyle_dataset.csv"
df = pd.read_csv(file) # read CSV into Python as a DataFrame

# Preprocessing
df['BMI Category'] = df['BMI Category'].replace({"Normal Weight":"Normal"}) # Replace normal weight with normal to standardize categories
df['Sleep Disorder'] = df['Sleep Disorder'].fillna('None') # Fill NaN with 'None'
df['Has Sleep DIsorder'] = df['Sleep Disorder'].apply(lambda x: 0 if x == 'None' else 1) # Create a boolean col for having a sleep disorder
df[['Systolic_BP', 'Diastolic_BP']] = df['Blood Pressure'].str.split('/', expand=True).astype(int) # Split BP into systolic and diastolic

# Set Dependent and Independent Variables
X = df.drop(['Person ID', 'Sleep Disorder', 'Has Sleep DIsorder', 'Blood Pressure'], axis=1) # independent
y = df['Has Sleep DIsorder'] # dependent

# Identify categorical and numerical features
categorical_features = X.select_dtypes(include=['object']).columns
numerical_features = X.select_dtypes(include=['int64', 'float64']).columns

print(f"Categorical features: {list(categorical_features)}")
print(f"Numerical features: {list(numerical_features)}")

# Create preprocessing pipelines for numerical and categorical features
numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore') 

# Create a preprocessor object using ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ], 
    remainder='passthrough' #  Keep any other columns (though we expect all to be covered)
)

# Create the full pipeline with preprocessing and logistic regression model
model = Pipeline(steps=[('preprocessor', preprocessor), ('classifier', LogisticRegression())])

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.333, random_state=41)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test) # Probability of an invididual having a sleep disorder

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
# roc_auc = roc_auc_score(y_test, y_pred_proba)

print(f"\nModel Evaluation:")
print(f"Accuracy: {accuracy:.2%}")
# print(f"ROC AUC Score: {roc_auc:.2%}") 
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Categorical features: ['Gender', 'Occupation', 'BMI Category']
Numerical features: ['Age', 'Sleep Duration', 'Quality of Sleep', 'Physical Activity Level', 'Stress Level', 'Heart Rate', 'Daily Steps', 'Systolic_BP', 'Diastolic_BP']

Model Evaluation:
Accuracy: 94.40%

Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.93      0.95        76
           1       0.90      0.96      0.93        49

    accuracy                           0.94       125
   macro avg       0.94      0.95      0.94       125
weighted avg       0.95      0.94      0.94       125


Confusion Matrix:
[[71  5]
 [ 2 47]]


In [None]:
# Get unique values for dropdowns from the dataframe
occupations = sorted(df['Occupation'].unique())
occupations += ["Not Listed"]
genders = sorted(df['Gender'].unique())
genders += ["Not Listed"]
bmi_categories = sorted(df['BMI Category'].unique())
bmi_categories += ["Not Listed"]

style = {'description_width': 'initial'}

# Create UI widgets
gender_widget = widgets.Dropdown(options=genders, description='Gender:', style=style)
age_widget = widgets.BoundedIntText(value=30, min=1, max=120, step=1, description='Age:', style=style)
occupation_widget = widgets.Dropdown(options=occupations, description='Occupation:', style=style)
sleep_duration_widget = widgets.BoundedFloatText(value=7.0, min=1.0, max=16.0, step=0.1, description='Sleep Duration (hrs):', style=style)
quality_sleep_widget = widgets.BoundedIntText(value=7, min=1, max=10, step=1, description='Quality of Sleep (1-10):', style=style)
physical_activity_widget = widgets.BoundedIntText(value=60, min=0, max=300, step=5, description='Physical Activity (min/day):', style=style)
stress_level_widget = widgets.BoundedIntText(value=5, min=1, max=10, step=1, description='Stress Level (1-10):', style=style)
bmi_category_widget = widgets.Dropdown(options=bmi_categories, description='BMI Category:', style=style)
heart_rate_widget = widgets.BoundedIntText(value=70, min=40, max=120, step=1, description='Heart Rate (bpm):', style=style)
daily_steps_widget = widgets.BoundedIntText(value=8000, min=0, max=40000, step=100, description='Daily Steps:', style=style)
systolic_bp_widget = widgets.BoundedIntText(value=120, min=80, max=200, step=1, description='Systolic BP:', style=style)
diastolic_bp_widget = widgets.BoundedIntText(value=80, min=50, max=130, step=1, description='Diastolic BP:', style=style)

predict_button = widgets.Button(description="Submit")
output_area = widgets.Output()

# Function to handle prediction when button is clicked
def on_submit_button_clicked(b):
    with output_area:
        output_area.clear_output() 
        try:            
            # Collect data from widgets
            input_data = {
                'Gender': gender_widget.value,
                'Age': age_widget.value,
                'Occupation': occupation_widget.value,
                'Sleep Duration': sleep_duration_widget.value,
                'Quality of Sleep': quality_sleep_widget.value,
                'Physical Activity Level': physical_activity_widget.value,
                'Stress Level': stress_level_widget.value,
                'BMI Category': bmi_category_widget.value,
                'Heart Rate': heart_rate_widget.value,
                'Daily Steps': daily_steps_widget.value,
                'Systolic_BP': systolic_bp_widget.value,
                'Diastolic_BP': diastolic_bp_widget.value
            }

            # Create a DataFrame with the input data
            input_df = pd.DataFrame([input_data], columns=df.columns)

            # Make prediction
            probability = model.predict_proba(input_df)

            print("--- Prediction Results ---")
            print(f"Input Data:")
            for key, val in input_data.items():
                print(f"  {key}: {val}")
            print(f"Probability of having a Sleep Disorder: {probability[0, 1]:.2%}")

        except Exception as e:
            print(f"An error occurred: {e}")
            import traceback
            traceback.print_exc()


# Attach the event handler to the button
predict_button.on_click(on_submit_button_clicked)

# Layout the UI elements
input_widgets = [
    gender_widget, age_widget, occupation_widget,
    sleep_duration_widget, quality_sleep_widget, physical_activity_widget,
    stress_level_widget, bmi_category_widget, heart_rate_widget,
    daily_steps_widget, systolic_bp_widget, diastolic_bp_widget,
    predict_button,
    output_area
]

ui_layout = widgets.VBox(input_widgets)

# Display the UI
print("Please input your data below and click 'Submit':")
display(ui_layout)

Please input your data below and click 'Predict':


VBox(children=(Dropdown(description='Gender:', options=('Female', 'Male', 'Not Listed'), style=DescriptionStyl…