# Setup

## Load Libraries


In [3]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, mean_absolute_error
import ipywidgets as widgets
from IPython.display import display, HTML
import pandas as pd

## Load Dataset

In [4]:
# Load and display
df = pd.read_csv('/NguyenEnergyDataCleaned.csv')
print("First 5 rows of the DataFrame:")
print(df.head())

# Dataframe
print("\nShape of the DataFrame:")
print(df.shape)

First 5 rows of the DataFrame:
   Location  sleep_hours  nap_hours  coffee_intake  exercise_binary  \
0         1          6.0        0.0            2.0                0   
1         1          6.0        0.0            3.0                0   
2         2          6.0        0.0            2.0                1   
3         1          7.5        0.0            1.0                0   
4         1          7.0        0.0            2.0                0   

   emotion_score  energy_level  product_score  hour_of_day  
0              3             4              4           11  
1              2             3              2           15  
2              4             4              4           19  
3              3             3              4           11  
4              2             3              3           15  

Shape of the DataFrame:
(164, 9)


# Train and Evaluate the Model
Train and evaluate a Decision Tree Classifier to predict 'energy_level' using the dataset.

## Pre-Process Data
1. Separate features (X) from the target variable (y).
2. Apply one-hot encoding to categorical features.
3. Split the data into training and testing sets.


In [5]:
# 1. Separate features (X) from the target variable (y)
X = df.drop('energy_level', axis=1)
y = df['energy_level']

print("Original features (X) shape:", X.shape)
print("Target (y) shape:", y.shape)

# 2. Apply one-hot encoding to categorical features (e.g., 'Location')
# drop_first=True is used to avoid multicollinearity
X = pd.get_dummies(X, columns=['Location'], drop_first=True)

print("\nFeatures (X) shape after one-hot encoding:", X.shape)
print("Features (X) head after one-hot encoding:")
print(X.head())

# 3. Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("\nShape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_test:", y_test.shape)


Original features (X) shape: (164, 8)
Target (y) shape: (164,)

Features (X) shape after one-hot encoding: (164, 10)
Features (X) head after one-hot encoding:
   sleep_hours  nap_hours  coffee_intake  exercise_binary  emotion_score  \
0          6.0        0.0            2.0                0              3   
1          6.0        0.0            3.0                0              2   
2          6.0        0.0            2.0                1              4   
3          7.5        0.0            1.0                0              3   
4          7.0        0.0            2.0                0              2   

   product_score  hour_of_day  Location_2  Location_3  Location_4  
0              4           11       False       False       False  
1              2           15       False       False       False  
2              4           19        True       False       False  
3              4           11       False       False       False  
4              3           15       False   

## Train Decision Tree Classifier

In [6]:
# 1. DecisionTreeClassifier object
dtc_model = DecisionTreeClassifier(random_state=42)

# 2. Train the Decision Tree Classifier model using the training data
dtc_model.fit(X_train, y_train)

print("Decision Tree Classifier model trained successfully.")

Decision Tree Classifier model trained successfully.


## Evaluate Model Performance


### Classification Report
Make predictions on the test set and evaluate the model's performance.


In [7]:
# 1. Use the trained dtc_model to make predictions on the X_test data
y_pred = dtc_model.predict(X_test)

# 2. Calculate and print the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.4f}")

# 3. Print a classification report, handling zero division for undefined metrics
print("\nClassification Report:")
print(classification_report(y_test, y_pred, zero_division=0))

# 4. Compute and display the confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Model Accuracy: 0.4545

Classification Report:
              precision    recall  f1-score   support

           1       0.00      0.00      0.00         0
           2       0.00      0.00      0.00         2
           3       0.29      0.25      0.27         8
           4       0.27      0.38      0.32         8
           5       0.71      0.67      0.69        15

    accuracy                           0.45        33
   macro avg       0.25      0.26      0.25        33
weighted avg       0.46      0.45      0.45        33


Confusion Matrix:
[[ 0  0  0  0  0]
 [ 0  0  1  0  1]
 [ 0  0  2  6  0]
 [ 0  0  2  3  3]
 [ 1  0  2  2 10]]


### Mean Absolute error

In [8]:
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error (MAE): {mae:.4f}")

Mean Absolute Error (MAE): 0.7576


### Feature Importance
Find the most and least important **features**

In [9]:
feature_importances = dtc_model.feature_importances_
print("Feature Importances:\n", feature_importances)

Feature Importances:
 [0.10162459 0.07391052 0.07116339 0.03310427 0.25230554 0.18850613
 0.16915366 0.05923041 0.0100769  0.04092459]


In [10]:
# Create a Series with feature names and their importances
feature_importance_df = pd.DataFrame({'feature': X.columns, 'importance': feature_importances})

# Sort the features by importance in descending order
feature_importance_df = feature_importance_df.sort_values(by='importance', ascending=False)

print("\nFeature Importances (Sorted):")
print(feature_importance_df)



Feature Importances (Sorted):
           feature  importance
4    emotion_score    0.252306
5    product_score    0.188506
6      hour_of_day    0.169154
0      sleep_hours    0.101625
1        nap_hours    0.073911
2    coffee_intake    0.071163
7       Location_2    0.059230
9       Location_4    0.040925
3  exercise_binary    0.033104
8       Location_3    0.010077


# User Interactive Scripts

### Interactive Model Tester

Use the sliders and selectors below to input feature values and see the predicted `energy_level` from the trained Decision Tree Classifier model.

In [12]:
# Define widgets for each feature
sleep_hours_slider = widgets.FloatSlider(
    value=X['sleep_hours'].mean(),
    min=X['sleep_hours'].min(),
    max=X['sleep_hours'].max(),
    step=0.5,
    description='Sleep Hours:',
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.1f',
)

nap_hours_slider = widgets.FloatSlider(
    value=X['nap_hours'].mean(),
    min=X['nap_hours'].min(),
    max=X['nap_hours'].max(),
    step=0.5,
    description='Nap Hours:',
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.1f',
)

coffee_intake_slider = widgets.FloatSlider(
    value=X['coffee_intake'].mean(),
    min=X['coffee_intake'].min(),
    max=X['coffee_intake'].max(),
    step=1.0,
    description='Coffee Intake:',
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.0f',
)

exercise_binary_toggle = widgets.Checkbox(
    value=False,
    description='Exercise (0=No, 1=Yes)',
    disabled=False,
    indent=False
)

emotion_score_slider = widgets.IntSlider(
    value=int(X['emotion_score'].mean()),
    min=X['emotion_score'].min(),
    max=X['emotion_score'].max(),
    step=1,
    description='Emotion Score:',
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
)

product_score_slider = widgets.IntSlider(
    value=int(X['product_score'].mean()),
    min=X['product_score'].min(),
    max=X['product_score'].max(),
    step=1,
    description='Product Score:',
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
)

hour_of_day_slider = widgets.IntSlider(
    value=int(X['hour_of_day'].mean()),
    min=X['hour_of_day'].min(),
    max=X['hour_of_day'].max(),
    step=1,
    description='Hour of Day:',
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
)

location_dropdown = widgets.Dropdown(
    options=[('Location 1', 1), ('Location 2', 2), ('Location 3', 3), ('Location 4', 4)],
    value=1,
    description='Location:',
)

output = widgets.Output()

In [13]:
def predict_energy_level(
    sleep_hours, nap_hours, coffee_intake, exercise_binary, emotion_score,
    product_score, hour_of_day, location
):
    with output:
        output.clear_output(wait=True)

        # Create a dictionary for the new input
        input_data = {
            'sleep_hours': sleep_hours,
            'nap_hours': nap_hours,
            'coffee_intake': coffee_intake,
            'exercise_binary': 1 if exercise_binary else 0,
            'emotion_score': emotion_score,
            'product_score': product_score,
            'hour_of_day': hour_of_day,
            'Location_2': 0, 'Location_3': 0, 'Location_4': 0 # Initialize one-hot encoded cols
        }

        # Set the correct one-hot encoded column based on selected location
        if location == 2:
            input_data['Location_2'] = 1
        elif location == 3:
            input_data['Location_3'] = 1
        elif location == 4:
            input_data['Location_4'] = 1

        # Ensure the order of columns matches X_train
        input_df = pd.DataFrame([input_data], columns=X_train.columns)

        # Make prediction
        prediction = dtc_model.predict(input_df)[0]

        display(HTML(f"<h3>Predicted Energy Level: <span style=\"color:blue;\">{prediction}</span></h3>"))

# Create the interactive widget
interactive_plot = widgets.interactive(
    predict_energy_level,
    sleep_hours=sleep_hours_slider,
    nap_hours=nap_hours_slider,
    coffee_intake=coffee_intake_slider,
    exercise_binary=exercise_binary_toggle,
    emotion_score=emotion_score_slider,
    product_score=product_score_slider,
    hour_of_day=hour_of_day_slider,
    location=location_dropdown
)

# Display the widgets and the output area
display(interactive_plot, output)

interactive(children=(FloatSlider(value=6.778658536585366, continuous_update=False, description='Sleep Hours:'â€¦

Output()