# Install Packages


In [16]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_absolute_error
!pip install mord
from mord import LogisticIT
from ipywidgets import interact, FloatSlider, IntSlider, Dropdown, Checkbox
from IPython.display import display



# Load Data


In [17]:
# Assuming the file is a CSV as per the instructions
df = pd.read_csv('/NguyenEnergyDataCleaned.csv')

print("First 5 rows of the DataFrame:")
print(df.head())

print("\nDataFrame Information:")
df.info()

First 5 rows of the DataFrame:
   Location  sleep_hours  nap_hours  coffee_intake  exercise_binary  \
0         1          6.0        0.0            2.0                0   
1         1          6.0        0.0            3.0                0   
2         2          6.0        0.0            2.0                1   
3         1          7.5        0.0            1.0                0   
4         1          7.0        0.0            2.0                0   

   emotion_score  energy_level  product_score  hour_of_day  
0              3             4              4           11  
1              2             3              2           15  
2              4             4              4           19  
3              3             3              4           11  
4              2             3              3           15  

DataFrame Information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 164 entries, 0 to 163
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype  
--

# Train & Evaluate the Model
Develop and evaluate an ordinal logistic regression model to predict the 'energy_level' from the dataset/

## Preprocess Data

Define features (X) and target (y), apply one-hot encoding to the 'Location' column, scale the numerical features, and split the data into training and testing sets.


In [18]:
# Define target variable (y) and features (X)
y = df['energy_level']
X = df.drop('energy_level', axis=1)

# Apply one-hot encoding to the 'Location' column
X = pd.get_dummies(X, columns=['Location'], drop_first=True)

# Identify numerical columns for scaling (all columns except the one-hot encoded 'Location' columns, which are already handled)
numerical_cols = X.select_dtypes(include=['float64', 'int64']).columns.tolist()

# Exclude any potential boolean columns if drop_first=True creates them and they shouldn't be scaled
# In this case, get_dummies creates new int type columns (0 or 1), which are fine for scaling or can be left if preferred not to scale them.
# For this context, let's assume we scale all numerical columns created, including the dummy variables.

# Initialize and fit StandardScaler
scaler = StandardScaler()
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_test:", y_test.shape)
print("First 5 rows of X_train after preprocessing:")
print(X_train.head())

Shape of X_train: (131, 10)
Shape of X_test: (33, 10)
Shape of y_train: (131,)
Shape of y_test: (33,)
First 5 rows of X_train after preprocessing:
    sleep_hours  nap_hours  coffee_intake  exercise_binary  emotion_score  \
84    -0.948459  -0.251454       1.087894        -0.340068      -1.001343   
2     -0.948459  -0.251454       1.087894         2.940588       0.252247   
94     0.878643  -0.251454       1.087894         2.940588      -1.001343   
45     0.269609  -0.251454       1.087894        -0.340068       0.252247   
42     0.269609  -0.251454      -1.331288        -0.340068       0.252247   

    product_score  hour_of_day  Location_2  Location_3  Location_4  
84       0.228543     1.988363       False       False        True  
2        0.228543     1.237577        True       False       False  
94       0.228543     0.987315        True       False       False  
45      -0.873842     0.987315        True       False       False  
42       1.330929    -1.515307       False   

## Train Ordinal Logistic Regression Model

Train an ordinal logistic regression model using the 'mord.LogisticIT' class with the preprocessed and scaled training data.


In [19]:
# Instantiate the Ordinal Logistic Regression model
# Using max_iter and solver explicitly for better control and to avoid future warnings
ordinal_model = LogisticIT(alpha=0, max_iter=1000) # alpha=0 for logistic regression, 1000 iter as default is often 100

# Train the model
ordinal_model.fit(X_train, y_train)

print("Ordinal Logistic Regression Model trained successfully.")

Ordinal Logistic Regression Model trained successfully.


## Evaluate Model Performance

Evaluate the trained model's performance on the test set using appropriate metrics for ordinal classification.


### Accuracy and Mean Absolute Error

In [20]:
# Make predictions on the test set
y_pred = ordinal_model.predict(X_test)

# Calculate Accuracy Score
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy Score: {accuracy:.4f}")

# Calculate Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error: {mae:.4f}")

Accuracy Score: 0.4242
Mean Absolute Error: 0.6667


### Feature Weights from Ordinal Logistic Regression Model

In [21]:
# Get the feature names from the training data
feature_names = X_train.columns

# Get the coefficients from the trained ordinal model
# LogisticIT model has a coef_ attribute which is a 2D array if there are multiple classes.
# For ordinal regression, it's often a single set of coefficients.
# We'll take the first row if it's a 2D array, assuming it represents the main coefficients.
if ordinal_model.coef_.ndim > 1:
    feature_weights = ordinal_model.coef_[0]
else:
    feature_weights = ordinal_model.coef_

# Create a DataFrame to display feature names and their corresponding weights
feature_importance_df = pd.DataFrame({
    'Feature': feature_names,
    'Weight': feature_weights
})

# Sort by absolute weight to see the most influential features
feature_importance_df['Absolute_Weight'] = abs(feature_importance_df['Weight'])
feature_importance_df = feature_importance_df.sort_values(by='Absolute_Weight', ascending=False).drop(columns='Absolute_Weight')

print("Feature Weights from Ordinal Logistic Regression Model (sorted by absolute weight):")
display(feature_importance_df)

Feature Weights from Ordinal Logistic Regression Model (sorted by absolute weight):


Unnamed: 0,Feature,Weight
9,Location_4,-0.990512
4,emotion_score,0.791849
5,product_score,0.473125
0,sleep_hours,-0.339103
6,hour_of_day,-0.192866
2,coffee_intake,-0.144972
1,nap_hours,0.108063
8,Location_3,-0.05747
3,exercise_binary,0.044051
7,Location_2,0.020966


# User interactive Scripts



## Predict_energy_level function
returns prediction from trained ordinal regression model



In [26]:
def predict_energy_level(sleep_hours, nap_hours, coffee_intake, exercise_binary, emotion_score, product_score, hour_of_day, location):
    # Create a DataFrame from the input features
    input_data = pd.DataFrame([{
        'sleep_hours': sleep_hours,
        'nap_hours': nap_hours,
        'coffee_intake': coffee_intake,
        'exercise_binary': exercise_binary,
        'emotion_score': emotion_score,
        'product_score': product_score,
        'hour_of_day': hour_of_day,
        'Location': location
    }])

    # Apply one-hot encoding to 'Location' column, matching the training data's drop_first=True and dtype=int
    input_encoded = pd.get_dummies(input_data, columns=['Location'], drop_first=True, dtype=int)

    # Align columns of input_encoded with the columns that the model was trained on (X_train.columns)
    # X_train.columns correctly represents the feature set after one-hot encoding and before scaling,
    # including all dummy variables that could possibly exist. We use reindex to ensure all columns
    # are present and in the correct order, filling any missing (e.g., location dummies not in this input) with 0.
    aligned_input_df = input_encoded.reindex(columns=X_train.columns, fill_value=0)

    # Scale the numerical features using the *fitted* scaler.
    # 'numerical_cols' list already correctly contains all columns that were scaled in X_train.
    aligned_input_df[numerical_cols] = scaler.transform(aligned_input_df[numerical_cols])

    # Predict the energy level
    prediction = ordinal_model.predict(aligned_input_df)

    return prediction[0] # return energy level prediction

## Interactive Energy Level Prediction

Use the interactive widgets below to input different feature values and predict the energy level using the trained model.

In [30]:
def interactive_predict(sleep_hours, nap_hours, coffee_intake, exercise_binary, emotion_score, product_score, hour_of_day, location):
    predicted_level = predict_energy_level(
        sleep_hours, nap_hours, coffee_intake,
        exercise_binary, emotion_score, product_score,
        hour_of_day, location
    )
    print(f"Predicted Energy Level: {int(predicted_level)}")

# Get unique locations from the original dataframe for the dropdown
locations = sorted(df['Location'].unique().tolist())

# Create interactive widgets
interact(interactive_predict,
    sleep_hours=FloatSlider(min=4.0, max=12.0, step=0.5, value=7.0, description='Sleep Hours:'),
    nap_hours=FloatSlider(min=0.0, max=3.0, step=0.1, value=0.0, description='Nap Hours:'),
    coffee_intake=FloatSlider(min=0.0, max=5.0, step=0.5, value=2.0, description='Coffee Intake:'),
    exercise_binary=Checkbox(value=False, description='Exercise (Binary):'),
    emotion_score=IntSlider(min=1, max=5, step=1, value=3, description='Emotion Score:'),
    product_score=IntSlider(min=1, max=5, step=1, value=4, description='Product Score:'),
    hour_of_day=IntSlider(min=0, max=23, step=1, value=14, description='Hour of Day:'),
    location=Dropdown(options=locations, value=locations[0] if locations else 1, description='Location:')
);


interactive(children=(FloatSlider(value=7.0, description='Sleep Hours:', max=12.0, min=4.0, step=0.5), FloatSlâ€¦