In [1]:
# ## 06 - Inference: Using the Trained Model for Predictions
#
# **Objective:** Demonstrate how to load the saved, trained LightGBM model and use it
# to predict the cognitive load on new, unseen data. This notebook simulates a real-world
# application where you would use your model's intelligence.
#
# **Input:**
# 1. ../models/lgbm_model.pkl (The saved model payload)
#
# **Output:**
# 1. A printed prediction of the cognitive load for a sample data point.

import pandas as pd
import numpy as np
import joblib
import os

# --- Configuration ---
MODELS_DIR = '../models/'
MODEL_FILE = os.path.join(MODELS_DIR, 'lgbm_model.pkl')

# --- Load the Trained Model and Scaler ---
try:
    model_payload = joblib.load(MODEL_FILE)
    model = model_payload['model']
    scaler = model_payload['scaler']
    print("Successfully loaded the trained model and data scaler.")
except FileNotFoundError:
    print(f"Error: Model file not found at {MODEL_FILE}")
    print("Please run the '03_modeling_baseline_classification.ipynb' notebook first to train and save the model.")
    exit()

# --- Simulate New, Unseen Data ---
# In a real application, this data would come from a live sensor feed
# after being processed through the same feature engineering steps.
# Here, we will create a few hypothetical examples.

# Example 1: A task with high GSR and high Theta/Alpha ratio (likely 'High Load')
sample_high_load = {
    'EEG_Delta_Mean': 0.8, 'EEG_Delta_Var': 0.05,
    'EEG_Theta_Mean': 0.9, 'EEG_Theta_Var': 0.04,
    'EEG_Alpha_Mean': 0.5, 'EEG_Alpha_Var': 0.02,
    'EEG_Beta_Mean': 0.6, 'EEG_Beta_Var': 0.01,
    'EEG_Gamma_Mean': 0.3, 'EEG_Gamma_Var': 0.01,
    'EEG_Theta_Alpha_Ratio': 1.8, # High ratio
    'EEG_Theta_Beta_Ratio': 1.5,
    'GSR_Mean': 25.0, # High skin conductance
    'GSR_Var': 5.0
}

# Example 2: A task with intermediate values (likely 'Medium Load')
sample_medium_load = {
    'EEG_Delta_Mean': 0.6, 'EEG_Delta_Var': 0.03,
    'EEG_Theta_Mean': 0.6, 'EEG_Theta_Var': 0.02,
    'EEG_Alpha_Mean': 0.45, 'EEG_Alpha_Var': 0.015,
    'EEG_Beta_Mean': 0.4, 'EEG_Beta_Var': 0.01,
    'EEG_Gamma_Mean': 0.2, 'EEG_Gamma_Var': 0.007,
    'EEG_Theta_Alpha_Ratio': 1.33, # Intermediate ratio
    'EEG_Theta_Beta_Ratio': 1.5,
    'GSR_Mean': 15.0, # Intermediate skin conductance
    'GSR_Var': 2.0
}

# Example 3: A task with low GSR and balanced EEG (likely 'Low Load')
sample_low_load = {
    'EEG_Delta_Mean': 0.4, 'EEG_Delta_Var': 0.02,
    'EEG_Theta_Mean': 0.3, 'EEG_Theta_Var': 0.01,
    'EEG_Alpha_Mean': 0.4, 'EEG_Alpha_Var': 0.01,
    'EEG_Beta_Mean': 0.3, 'EEG_Beta_Var': 0.01,
    'EEG_Gamma_Mean': 0.1, 'EEG_Gamma_Var': 0.005,
    'EEG_Theta_Alpha_Ratio': 0.75, # Balanced ratio
    'EEG_Theta_Beta_Ratio': 1.0,
    'GSR_Mean': 5.0, # Low skin conductance
    'GSR_Var': 0.5
}


# Convert the sample data into a pandas DataFrame
new_data = pd.DataFrame([sample_high_load, sample_medium_load, sample_low_load])
print("\n--- Sample New Data ---")
print(new_data)

# --- Make Predictions ---
# **THE FIX IS HERE**: We get the correct feature names from the scaler,
# which remembers the names from the original training data.

# 1. Get the feature names in the correct order from the saved scaler.
model_feature_names = scaler.get_feature_names_out()

# 2. Reorder the columns of our new data DataFrame.
new_data_reordered = new_data[model_feature_names]

# 3. Scale the correctly ordered data using the saved scaler.
new_data_scaled = scaler.transform(new_data_reordered)

# 4. Use the model to predict the cognitive load.
predictions = model.predict(new_data_scaled)
prediction_probabilities = model.predict_proba(new_data_scaled)

# 5. Interpret and display the results.
label_map = {0: 'Low Load', 1: 'Medium Load', 2: 'High Load'}
predicted_labels = [label_map[p] for p in predictions]

print("\n--- Model Predictions ---")
for i, label in enumerate(predicted_labels):
    print(f"\nSample {i+1}:")
    print(f"  Predicted Cognitive Load: '{label}'")
    # Print the model's confidence for each class
    print(f"  Model Confidence:")
    print(f"    - Low:    {prediction_probabilities[i][0]:.2%}")
    print(f"    - Medium: {prediction_probabilities[i][1]:.2%}")
    print(f"    - High:   {prediction_probabilities[i][2]:.2%}")



Successfully loaded the trained model and data scaler.

--- Sample New Data ---
   EEG_Delta_Mean  EEG_Delta_Var  EEG_Theta_Mean  EEG_Theta_Var  \
0             0.8           0.05             0.9           0.04   
1             0.6           0.03             0.6           0.02   
2             0.4           0.02             0.3           0.01   

   EEG_Alpha_Mean  EEG_Alpha_Var  EEG_Beta_Mean  EEG_Beta_Var  EEG_Gamma_Mean  \
0            0.50          0.020            0.6          0.01             0.3   
1            0.45          0.015            0.4          0.01             0.2   
2            0.40          0.010            0.3          0.01             0.1   

   EEG_Gamma_Var  EEG_Theta_Alpha_Ratio  EEG_Theta_Beta_Ratio  GSR_Mean  \
0          0.010                   1.80                   1.5      25.0   
1          0.007                   1.33                   1.5      15.0   
2          0.005                   0.75                   1.0       5.0   

   GSR_Var  
0      5.0  




--- Model Predictions ---

Sample 1:
  Predicted Cognitive Load: 'High Load'
  Model Confidence:
    - Low:    0.39%
    - Medium: 0.90%
    - High:   98.71%

Sample 2:
  Predicted Cognitive Load: 'High Load'
  Model Confidence:
    - Low:    0.34%
    - Medium: 19.58%
    - High:   80.08%

Sample 3:
  Predicted Cognitive Load: 'Low Load'
  Model Confidence:
    - Low:    59.48%
    - Medium: 10.93%
    - High:   29.59%


