In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report
from scipy.stats import zscore
from sklearn.cluster import KMeans
from sklearn.preprocessing import RobustScaler
from sklearn.metrics.pairwise import rbf_kernel

import warnings
warnings.filterwarnings("ignore", message="KMeans is known to have a memory leak")

In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.metrics import classification_report, roc_auc_score, precision_recall_curve, average_precision_score
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Activation, Dropout, Bidirectional, Multiply, Flatten, Lambda,RepeatVector, Permute 
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
from scipy.stats import zscore
from sklearn.metrics.pairwise import rbf_kernel
from scipy.signal import resample, butter, filtfilt, welch
from tensorflow.keras.losses import BinaryCrossentropy
from sklearn.cluster import KMeans

In [3]:
def load_physiological_data(file_path):
    columns = ["time", "ECG", "BVP", "GSR", "Resp", "Skin_Temp", "EMG_z", "EMG_c", "EMG_t"]
    df = pd.read_csv(file_path, sep="\t", names=columns)
    return df

# Load valence-arousal data
def load_valence_arousal_data(file_path):
    df = pd.read_csv(file_path, sep="\t", names=["jsttime", "valence", "arousal"])
    return df

In [4]:
def preprocess_data(physio_df, va_df, window_size=5):
    # Downsample
    physio_downsampled = physio_df.iloc[::1000].copy()
    va_downsampled = va_df.iloc[::20].copy()

    # Create window ID
    start_time = min(physio_downsampled["time"].min(), va_downsampled["jsttime"].min())
    physio_downsampled["window"] = ((physio_downsampled["time"] - start_time) // window_size).astype(int)
    va_downsampled["window"] = ((va_downsampled["jsttime"] - start_time) // window_size).astype(int)

    # Features to aggregate
    agg_dict = {
        "ECG": "mean",
        "BVP": ["mean", "std"],
        "GSR": ["mean", "std"],
        "Resp": "mean",
        "Skin_Temp": ["mean", "std"],
        "EMG_c": "mean",
        "EMG_t": "mean"
    }

    # Group by window
    physio_segmented = physio_downsampled.groupby("window").agg(agg_dict).reset_index()

    # Flatten column MultiIndex (e.g., ('BVP', 'mean') → 'BVP_mean')
    physio_segmented.columns = ["window"] + [f"{col}_{stat}" if stat else col for col, stat in physio_segmented.columns[1:]]

    # Rename columns to match model expectations
    physio_segmented = physio_segmented.rename(columns={
        "ECG_mean": "ECG",
        "BVP_mean": "BVP",
        "GSR_mean": "GSR",
        "Resp_mean": "Resp",
        "Skin_Temp_mean": "Skin_temp",
        "EMG_c_mean": "EMG_c",
        "EMG_t_mean": "EMG_t",
        "BVP_std": "BVP_std",
        "GSR_std": "GSR_std",
        "Skin_Temp_std": "Skin_temp_std"
    })

    # Compute slope for GSR (difference between successive means)
    physio_segmented["GSR_slope"] = physio_segmented["GSR"].diff().fillna(0)

    # Same for valence-arousal
    va_segmented = va_downsampled.groupby("window").mean().reset_index()

    return physio_segmented, va_segmented

In [5]:
def compute_rulsif_change_scores(X, alpha=0.1, sigma=0.1, lambda_param=1e-3):
    n = len(X) - 1
    change_scores = np.zeros(n)
    
    for i in range(n):
        X_t, X_t_next = X[i], X[i + 1]
        
        # Compute Gaussian Kernel Matrix
        K_t = rbf_kernel(X_t.reshape(-1, 1), X_t.reshape(-1, 1), gamma=1/(2*sigma**2))
        K_t_next = rbf_kernel(X_t_next.reshape(-1, 1), X_t_next.reshape(-1, 1), gamma=1/(2*sigma**2))
        
        # Compute Weights using Least Squares Importance Fitting (LSIF)
        H = alpha * K_t + (1 - alpha) * K_t_next + lambda_param * np.eye(K_t.shape[0])
        h = np.mean(K_t, axis=1)
        
        theta = np.linalg.solve(H, h)
        
        # Compute Change Score
        change_scores[i] = np.mean(np.square(K_t_next.dot(theta) - 1))
    
    return change_scores

# Identify significant changes and label opportune moments
def label_opportune_moments(change_scores):
    mean, std = np.mean(change_scores), np.std(change_scores)
    threshold = mean + 3 * std
    outliers = change_scores > threshold
    
    # Clustering the remaining scores
    valid_indices = np.where(~outliers)[0]  # Indices of non-outliers
    valid_scores = change_scores[valid_indices]
    
    if len(valid_scores) > 1:  # Ensure there are enough points for clustering
        kmeans = KMeans(n_clusters=2, random_state=42).fit(valid_scores.reshape(-1, 1))
        centroids = kmeans.cluster_centers_.flatten()
        high_cluster = np.argmax(centroids)
        high_values = (kmeans.labels_ == high_cluster) & (valid_scores > centroids[high_cluster])
        
        # Map high_values back to the original indices
        high_values_original = np.zeros_like(change_scores, dtype=bool)
        high_values_original[valid_indices] = high_values
    else:
        # If there are not enough valid scores, treat all as non-opportune
        high_values_original = np.zeros_like(change_scores, dtype=bool)
    
    # Mark opportune moments
    opportune_moments = np.where(outliers | high_values_original)[0]
    return opportune_moments

In [6]:
# Align valence-arousal data with physiological data windows
def align_data(physiological_data, valence_arousal_data, window_size=5):
    # Convert jsttime to window index
    start_time = physiological_data["time"].min()
    valence_arousal_data["window"] = ((valence_arousal_data["jsttime"] - start_time) // window_size).astype(int)
    
    # Merge data on window index
    merged_data = pd.merge(physiological_data, valence_arousal_data, on="window", how="inner")
    return merged_data

def scale_valence_arousal(df):
    df = df.copy()
    for dim in ["valence", "arousal"]:
        max_abs = np.max(np.abs(df[dim]))
        df[dim] = df[dim] / max_abs if max_abs != 0 else 0.0
    return df

# Map valence-arousal to emotion classes
def map_to_emotion_classes(valence, arousal):
    # Define emotion classes based on valence and arousal
    if valence >= 0 and arousal >= 0:
        return "Happy"
    elif valence >= 0 and arousal < 0:
        return "Relaxed"
    elif valence < 0 and arousal >= 0:
        return "Stressed"
    else:
        return "Sad"

In [6]:
import joblib
import os

def load_student_model(user_id, model_name="random_forest"):
    """
    Load a distilled classical student model (e.g., Random Forest, XGBoost, SVM) for a specific user.

    Args:
        user_id (str or int): Either 'User1' or just 1
        model_name (str): "random_forest", "xgboost", or "svm"

    Returns:
        Loaded model object (from joblib)
    """
    # Ensure user_id is formatted as 'UserX'
    if isinstance(user_id, int):
        user_id = f"User{user_id}"

    model_path = os.path.join("../dump/final_models/models_distilled", model_name, f"{user_id}.pkl")
    
    if not os.path.exists(model_path):
        raise FileNotFoundError(f"Model not found at: {model_path}")
    
    return joblib.load(model_path)


In [7]:

def elliott(x, p):
    return (p * x) / (1 + K.abs(x))

# Derivative of PEF
def elliott_derivative(x, p):
    return p / ((K.abs(x) + 1) ** 2)

# Swish Activation Function
def swish(x):
    return x * K.sigmoid(x)

# Custom PEF Activation Layer
class PEFLayer(Activation):
    def __init__(self, activation, **kwargs):
        super(PEFLayer, self).__init__(activation, **kwargs)
        self.p = K.variable(1.0)  # Initialize parameter p

    def call(self, inputs):
        return elliott(inputs, self.p)

In [8]:
from tensorflow.keras.models import load_model
import os

def load_student_model(user_id):
    if isinstance(user_id, int):
        user_id = f"User{user_id}"

    #model_path = os.path.join("../dump", "final_models", "saved_models_emotion_personalized", f"user_{user_id}", "student_model")
    model_path = os.path.join("../","saved_models_emotion_personalized", f"user_{user_id}", "student_model")

    if not os.path.exists(model_path):
        raise FileNotFoundError(f"Model not found at: {model_path}")

    return load_model(
        model_path,
        custom_objects={
            'PEFLayer': PEFLayer,
            'elliott': elliott,
            'swish': swish
        },
        compile=False  # Optional if you don't need to retrain
    )


In [7]:
def train_and_evaluate_classifier(X_train, X_test, y_train, y_test):
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)
    
    # Predict on test set
    y_pred = model.predict(X_test)
    
    # Evaluate performance
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average="weighted")
    print(classification_report(y_test, y_pred, zero_division=0))

    print("Classification Report:")

    return accuracy, f1

# Load and Test Models

In [None]:
if __name__ == "__main__":
    results = []

    opportune_features = ['BVP', 'BVP_std', 'GSR', 'GSR_std', 'GSR_slope', 'Skin_temp', 'Skin_temp_std']
    model_features = ["ECG", "EMG_c", "EMG_t", "Resp", "Skin_temp"]
    change_score_features = ['BVP', 'GSR', 'Skin_temp']
    student_features = opportune_features
    threshold = 0.5

    for user_id in range(1, 31):
        try:
            physio_data = load_physiological_data(
                f"../case_dataset-master/case_dataset-master/data/raw/physiological/sub{user_id}_DAQ.txt")
            va_data = load_valence_arousal_data(
                f"../case_dataset-master/case_dataset-master/data/raw/annotations/sub{user_id}_joystick.txt")

            physio_segmented, va_segmented = preprocess_data(physio_data, va_data)
            va_segmented = scale_valence_arousal(va_segmented)
            merged_data = pd.merge(physio_segmented, va_segmented, on="window", how="inner")

            merged_data["emotion"] = merged_data.apply(
                lambda row: map_to_emotion_classes(row["valence"], row["arousal"]), axis=1)

            # Detect RuLSIF opportune moments
            X_opportune = merged_data[change_score_features].values
            change_scores = compute_rulsif_change_scores(X_opportune)
            opportune_moments = label_opportune_moments(change_scores)

            if len(opportune_moments) < 5:
                print(f"Too few opportune moments for User {user_id}. Skipping...")
                continue

            opportune_data = merged_data[merged_data["window"].isin(opportune_moments)]
            opportune_data = opportune_data.sort_values("window").reset_index(drop=True)

            # Split opportune_data for emotion classifier test
            opportune_train_data, opportune_test_data = train_test_split(
                opportune_data, test_size=0.2, random_state=42)

            inopportune_data = merged_data[~merged_data["window"].isin(opportune_moments)]

            # Combine opportune (80%) and inopportune data for detection model input
            detection_input_data = pd.concat([opportune_train_data, inopportune_data], ignore_index=True)
            detection_input_data = detection_input_data.sort_values("window").reset_index(drop=True)
            student_signal_data = detection_input_data[student_features].copy()

            from sklearn.preprocessing import RobustScaler

            # Compute RuLSIF change scores
            change_score_input = detection_input_data[change_score_features].values
            student_change_scores = compute_rulsif_change_scores(change_score_input)

            if len(student_change_scores) < len(student_signal_data):
                student_change_scores = np.append(student_change_scores, [0] * (len(student_signal_data) - len(student_change_scores)))
            elif len(student_change_scores) > len(student_signal_data):
                student_change_scores = student_change_scores[:len(student_signal_data)]
  # to match length

            # Add change score as a new feature
            student_signal_data["change_score"] = student_change_scores

            # Concatenate all features into a NumPy array
            student_input_array = student_signal_data.values  # shape: (n_samples, 8)

            # Apply RobustScaler on the whole array
            scaler = RobustScaler()
            student_input_scaled = scaler.fit_transform(student_input_array)  # shape: (n_samples, 8)

            # Convert to 3D for LSTM input
            student_input_3D = np.expand_dims(student_input_scaled, axis=1)  # shape: (n_samples, 1, 8)


            # Predict opportune moments with trained student model
            student_model = load_student_model(user_id)
            _, _, pred_probs = student_model.predict(student_input_3D, verbose=0)
            predicted_opportune_mask = (pred_probs.flatten() > 0.5)
            detected_opportune_data = detection_input_data[predicted_opportune_mask]

            if len(detected_opportune_data) < 5:
                print(f"Too few predicted opportune moments for User {user_id}. Skipping...")
                continue

            # Train emotion classifier on predicted opportune moments
            X_train_emotion = detected_opportune_data[model_features]
            y_train_emotion = detected_opportune_data["emotion"]
            X_test_emotion = opportune_test_data[model_features]
            y_test_emotion = opportune_test_data["emotion"]

            # Show test indices and class info for training
            print(f"User {user_id} — Test windows (opportune_test_data): {sorted(opportune_test_data['window'].tolist())}")
            print(f"User {user_id} — Emotion class distribution in train set:\n{y_train_emotion.value_counts().to_string()}\n")


            accuracy_opportune, f1_opportune = train_and_evaluate_classifier(
                X_train_emotion, X_test_emotion, y_train_emotion, y_test_emotion
            )

            results.append({
                "User ID": user_id,
                "Accuracy (Opportune)": accuracy_opportune,
                "F1 Score (Opportune)": f1_opportune,
                "Opportune Moments Count": len(opportune_moments)
            })

            print(f"Successfully processed User {user_id}\n\n")

        except FileNotFoundError:
            print(f"Data files for User {user_id} not found. Skipping...")
            continue
        except Exception as e:
            print(f"Unexpected error processing User {user_id}: {str(e)}. Skipping...")
            continue

    results_df = pd.DataFrame(results)
    results_df.to_csv("baselines/baseline_emotion_personalized.csv", index=False)
    print("\nFinal Results:")
    print(results_df)


# Personalized Model

In [8]:
""" This one is for personalized"""

from tensorflow.keras.models import load_model
import os

def load_student_model(user_id):
    model_path = os.path.join("../", "saved_models_emotion_personalized", f"user_User{user_id}", "student_model")

    if not os.path.exists(model_path):
        raise FileNotFoundError(f"Student model for User {user_id} not found at {model_path}")
    
    model = load_model(model_path, compile=False)
    return model



if __name__ == "__main__":
    results = []

    opportune_features = ['BVP', 'BVP_std', 'GSR', 'GSR_std', 'GSR_slope', 'Skin_temp', 'Skin_temp_std']
    model_features = ["ECG", "EMG_c", "EMG_t", "Resp", "Skin_temp"]
    change_score_features = ['BVP', 'GSR', 'Skin_temp']
    student_features = opportune_features
    threshold = 0.5

    for user_id in range(1, 31):
        try:
            physio_data = load_physiological_data(
                f"../case_dataset-master/case_dataset-master/data/raw/physiological/sub{user_id}_DAQ.txt")
            va_data = load_valence_arousal_data(
                f"../case_dataset-master/case_dataset-master/data/raw/annotations/sub{user_id}_joystick.txt")

            physio_segmented, va_segmented = preprocess_data(physio_data, va_data)
            va_segmented = scale_valence_arousal(va_segmented)
            merged_data = pd.merge(physio_segmented, va_segmented, on="window", how="inner")

            merged_data["emotion"] = merged_data.apply(
                lambda row: map_to_emotion_classes(row["valence"], row["arousal"]), axis=1)

            # Detect RuLSIF opportune moments
            X_opportune = merged_data[change_score_features].values
            change_scores = compute_rulsif_change_scores(X_opportune)
            opportune_moments = label_opportune_moments(change_scores)

            if len(opportune_moments) < 5:
                print(f"Too few opportune moments for User {user_id}. Skipping...")
                continue

            opportune_data = merged_data[merged_data["window"].isin(opportune_moments)]
            opportune_data = opportune_data.sort_values("window").reset_index(drop=True)

            # Split opportune_data for emotion classifier test
            opportune_train_data, opportune_test_data = train_test_split(
                opportune_data, test_size=0.2, random_state=42)

            inopportune_data = merged_data[~merged_data["window"].isin(opportune_moments)]

            # Combine opportune (80%) and inopportune data for detection model input
            detection_input_data = pd.concat([opportune_train_data, inopportune_data], ignore_index=True)
            detection_input_data = detection_input_data.sort_values("window").reset_index(drop=True)
            student_signal_data = detection_input_data[student_features].copy()

            from sklearn.preprocessing import RobustScaler

            # Compute RuLSIF change scores
            change_score_input = detection_input_data[change_score_features].values
            student_change_scores = compute_rulsif_change_scores(change_score_input)

            if len(student_change_scores) < len(student_signal_data):
                student_change_scores = np.append(student_change_scores, [0] * (len(student_signal_data) - len(student_change_scores)))
            elif len(student_change_scores) > len(student_signal_data):
                student_change_scores = student_change_scores[:len(student_signal_data)]
  # to match length

            # Add change score as a new feature
            student_signal_data["change_score"] = student_change_scores

            # Concatenate all features into a NumPy array
            student_input_array = student_signal_data.values  # shape: (n_samples, 8)

            # Apply RobustScaler on the whole array
            scaler = RobustScaler()
            student_input_scaled = scaler.fit_transform(student_input_array)  # shape: (n_samples, 8)

            # Convert to 3D for LSTM input
            student_input_3D = np.expand_dims(student_input_scaled, axis=1)  # shape: (n_samples, 1, 8)


            # Predict opportune moments with trained student model
            student_model = load_student_model(user_id)
            pred_probs = student_model.predict(student_input_3D, verbose=0)  # shape (n_samples, 1)
            predicted_opportune_mask = (pred_probs.flatten() > threshold)

            detected_opportune_data = detection_input_data[predicted_opportune_mask]

            if len(detected_opportune_data) < 5:
                print(f"Too few predicted opportune moments for User {user_id}. Skipping...")
                continue

            # Train emotion classifier on predicted opportune moments
            X_train_emotion = detected_opportune_data[model_features]
            y_train_emotion = detected_opportune_data["emotion"]
            X_test_emotion = opportune_test_data[model_features]
            y_test_emotion = opportune_test_data["emotion"]

            # Show test indices and class info for training
            print(f"User {user_id} — Test windows (opportune_test_data): {sorted(opportune_test_data['window'].tolist())}")
            print(f"User {user_id} — Emotion class distribution in train set:\n{y_train_emotion.value_counts().to_string()}\n")


            accuracy_opportune, f1_opportune = train_and_evaluate_classifier(
                X_train_emotion, X_test_emotion, y_train_emotion, y_test_emotion
            )

            results.append({
                "User ID": user_id,
                "Accuracy (Opportune)": accuracy_opportune,
                "F1 Score (Opportune)": f1_opportune,
                "Opportune Moments Count": len(opportune_moments)
            })

            print(f"Successfully processed User {user_id}\n\n")

        except FileNotFoundError as fnf_error:
            print(f"FileNotFoundError for User {user_id}: {fnf_error}. Skipping...")

        except Exception as e:
            print(f"Unexpected error processing User {user_id}: {str(e)}. Skipping...")
            continue

    results_df = pd.DataFrame(results)
    results_df.to_csv("baselines/baseline_emotion_personalized.csv", index=False)
    print("\nFinal Results:")
    print(results_df)


User 1 — Test windows (opportune_test_data): [60, 63, 66, 67, 69, 70, 75, 80, 81, 82, 89, 92, 93, 96, 106, 107, 111, 116, 129, 133, 141, 144, 159, 162, 164, 165, 166, 175, 188, 190, 192, 194, 197, 204, 213]
User 1 — Emotion class distribution in train set:
emotion
Happy       42
Stressed    12
Relaxed      4

              precision    recall  f1-score   support

       Happy       0.71      1.00      0.83        24
     Relaxed       0.00      0.00      0.00         4
    Stressed       1.00      0.14      0.25         7

    accuracy                           0.71        35
   macro avg       0.57      0.38      0.36        35
weighted avg       0.68      0.71      0.62        35

Classification Report:
Successfully processed User 1


User 2 — Test windows (opportune_test_data): [44, 66, 72, 73, 111, 112, 173, 174, 178, 183, 189, 226, 229, 235, 238, 437, 438, 439, 452, 454, 457, 462, 475, 477, 488]
User 2 — Emotion class distribution in train set:
emotion
Stressed    38
Happy        

# Fixed Time Based

In [None]:
if __name__ == "__main__":
    os.makedirs("baselines", exist_ok=True)
    results = []

    opportune_features = ['BVP', 'BVP_std', 'GSR', 'GSR_std', 'GSR_slope', 'Skin_temp', 'Skin_temp_std']
    change_score_features = ['BVP', 'GSR', 'Skin_temp']
    model_features = ["ECG", "EMG_c", "EMG_t", "Resp", "Skin_temp"]
    threshold = 0.5

    for user_id in range(1, 31):
        try:
            physio_data = load_physiological_data(
                f"../case_dataset-master/case_dataset-master/data/raw/physiological/sub{user_id}_DAQ.txt")
            va_data = load_valence_arousal_data(
                f"../case_dataset-master/case_dataset-master/data/raw/annotations/sub{user_id}_joystick.txt")

            physio_segmented, va_segmented = preprocess_data(physio_data, va_data)
            va_segmented = scale_valence_arousal(va_segmented)

            # Sort to ensure reproducibility
            physio_segmented = physio_segmented.sort_values("window").reset_index(drop=True)
            va_segmented = va_segmented.sort_values("window").reset_index(drop=True)

            merged_data = pd.merge(physio_segmented, va_segmented, on="window", how="inner")
            merged_data["emotion"] = merged_data.apply(
                lambda row: map_to_emotion_classes(row["valence"], row["arousal"]), axis=1
            )

            # Detect true opportune moments using RuLSIF
            X_opportune = merged_data[change_score_features].apply(zscore).values
            change_scores = compute_rulsif_change_scores(X_opportune)
            opportune_moments = label_opportune_moments(change_scores)

            if len(opportune_moments) < 5:
                print(f"Too few opportune moments for User {user_id}. Skipping...")
                continue

            opportune_data = merged_data[merged_data["window"].isin(opportune_moments)].copy()
            opportune_data = opportune_data.sort_values("window").reset_index(drop=True)

            # Train/test split from real RuLSIF opportune moments
            # Split 80% opportune train and 20% opportune test
            opportune_train_data, opportune_test_data = train_test_split(
                opportune_data, test_size=0.2, random_state=42)

            # Print test window segment IDs
            test_windows = sorted(opportune_test_data["window"].tolist())
            print(f"User {user_id} - Test segment windows: {test_windows}")

            # Identify inopportune segments
            inopportune_data = merged_data[~merged_data["window"].isin(opportune_moments)].copy()

            # Concatenate 80% opportune + inopportune segments for training
            combined_train_data = pd.concat([opportune_train_data, inopportune_data], ignore_index=True)

            X_train_emotion = combined_train_data[model_features]
            y_train_emotion = combined_train_data["emotion"]

            X_test_emotion = opportune_test_data[model_features]
            y_test_emotion = opportune_test_data["emotion"]

            
            scaler_class = RobustScaler()
            X_train_emotion_scaled = scaler_class.fit_transform(X_train_emotion)
            X_test_emotion_scaled = scaler_class.transform(X_test_emotion)

            accuracy, f1 = train_and_evaluate_classifier(
                X_train_emotion_scaled, X_test_emotion_scaled, y_train_emotion, y_test_emotion
            )

            results.append({
                "User ID": user_id,
                "Accuracy": accuracy,
                "F1 Score": f1,
                "Train Samples": len(X_train_emotion),
                "Test Samples": len(X_test_emotion)
            })

            print(f"Successfully processed User {user_id}")

        except FileNotFoundError:
            print(f"Data files for User {user_id} not found. Skipping...")
            continue
        except Exception as e:
            print(f"Unexpected error processing User {user_id}: {str(e)}. Skipping...")
            continue

    results_df = pd.DataFrame(results)
    results_df[["Accuracy", "F1 Score"]] = results_df[["Accuracy", "F1 Score"]].round(4)
    results_df.to_csv("baselines/baseline_emotion_fixed_time.csv", index=False)

    print("\nSaved results to baselines/baseline_emotion_fixed_time.csv")

User 1 - Test segment windows: [25, 33, 34, 39, 43, 46, 58, 59, 65, 89, 94, 95, 116, 122, 124, 133, 136, 142, 147, 149, 155, 169, 173, 193, 201, 211, 217, 249, 251, 261, 265, 272, 280, 287, 308, 324, 358, 375, 385, 391, 392, 394, 403, 411, 423, 427, 430, 435, 449, 450, 452, 454, 456, 471, 478, 486]
              precision    recall  f1-score   support

       Happy       0.82      0.93      0.87        40
     Relaxed       0.33      0.17      0.22         6
         Sad       0.00      0.00      0.00         2
    Stressed       0.75      0.75      0.75         8

    accuracy                           0.79        56
   macro avg       0.48      0.46      0.46        56
weighted avg       0.73      0.79      0.75        56

Classification Report:
Successfully processed User 1
User 2 - Test segment windows: [19, 23, 24, 31, 32, 37, 43, 52, 55, 72, 74, 100, 108, 109, 116, 118, 128, 135, 149, 168, 181, 191, 192, 197, 200, 201, 202, 207, 211, 241, 243, 247, 259, 269, 316, 362, 366, 391, 4

# Random Time Based

In [16]:
if __name__ == "__main__":
    import random
    os.makedirs("baselines", exist_ok=True)
    results = []

    opportune_features = ['BVP', 'BVP_std', 'GSR', 'GSR_std', 'GSR_slope', 'Skin_temp', 'Skin_temp_std']
    change_score_features = ['BVP', 'GSR', 'Skin_temp']
    model_features = ["ECG", "EMG_c", "EMG_t", "Resp", "Skin_temp"]
    threshold = 0.5

    for user_id in range(1, 31):
        try:
            physio_data = load_physiological_data(
                f"../case_dataset-master/case_dataset-master/data/raw/physiological/sub{user_id}_DAQ.txt")
            va_data = load_valence_arousal_data(
                f"../case_dataset-master/case_dataset-master/data/raw/annotations/sub{user_id}_joystick.txt")

            physio_segmented, va_segmented = preprocess_data(physio_data, va_data)
            va_segmented = scale_valence_arousal(va_segmented)

            # Sort to ensure reproducibility
            physio_segmented = physio_segmented.sort_values("window").reset_index(drop=True)
            va_segmented = va_segmented.sort_values("window").reset_index(drop=True)

            merged_data = pd.merge(physio_segmented, va_segmented, on="window", how="inner")
            merged_data["emotion"] = merged_data.apply(
                lambda row: map_to_emotion_classes(row["valence"], row["arousal"]), axis=1
            )

            # RuLSIF-based opportune detection
            X_opportune = merged_data[change_score_features].apply(zscore).values
            change_scores = compute_rulsif_change_scores(X_opportune)
            opportune_moments = label_opportune_moments(change_scores)

            if len(opportune_moments) < 5:
                print(f"Too few opportune moments for User {user_id}. Skipping...")
                continue

            opportune_data = merged_data[merged_data["window"].isin(opportune_moments)].copy()
            opportune_data = opportune_data.sort_values("window").reset_index(drop=True)

            # Train/test split from opportune moments
            opportune_train_data, opportune_test_data = train_test_split(
                opportune_data, test_size=0.2, random_state=42
            )

            # Print test segment window IDs
            test_windows = sorted(opportune_test_data["window"].tolist())
            print(f"User {user_id} - Test segment windows: {test_windows}")

            # Merge opportune train + all inopportune segments
            inopportune_data = merged_data[~merged_data["window"].isin(opportune_moments)].copy()
            combined_train_data = pd.concat([opportune_train_data, inopportune_data], ignore_index=True)

            # Apply 50% random sampling on combined train set
            sampled_train_data = combined_train_data.sample(frac=0.5, random_state=42).reset_index(drop=True)

            # Prepare features/labels
            X_train_emotion = sampled_train_data[model_features]
            y_train_emotion = sampled_train_data["emotion"]

            X_test_emotion = opportune_test_data[model_features]
            y_test_emotion = opportune_test_data["emotion"]

            # Apply RobustScaler
            scaler = RobustScaler()
            X_train_emotion_scaled = scaler.fit_transform(X_train_emotion)
            X_test_emotion_scaled = scaler.transform(X_test_emotion)

            # Train & Evaluate
            accuracy, f1 = train_and_evaluate_classifier(
                X_train_emotion_scaled, X_test_emotion_scaled, y_train_emotion, y_test_emotion
            )

            results.append({
                "User ID": user_id,
                "Accuracy": accuracy,
                "F1 Score": f1,
                "Train Samples": len(X_train_emotion),
                "Test Samples": len(X_test_emotion)
            })

            print(f"Successfully processed User {user_id}")

        except FileNotFoundError:
            print(f"Data files for User {user_id} not found. Skipping...")
            continue
        except Exception as e:
            print(f"Unexpected error processing User {user_id}: {str(e)}. Skipping...")
            continue

    # Save results
    results_df = pd.DataFrame(results)
    results_df[["Accuracy", "F1 Score"]] = results_df[["Accuracy", "F1 Score"]].round(4)
    results_df.to_csv("baselines/baseline_emotion_random_time.csv", index=False)

    print("\nSaved results to baselines/baseline_emotion_50percent_combined_train.csv")


User 1 - Test segment windows: [25, 33, 34, 39, 43, 46, 58, 59, 65, 89, 94, 95, 116, 122, 124, 133, 136, 142, 147, 149, 155, 169, 173, 193, 201, 211, 217, 249, 251, 261, 265, 272, 280, 287, 308, 324, 358, 375, 385, 391, 392, 394, 403, 411, 423, 427, 430, 435, 449, 450, 452, 454, 456, 471, 478, 486]
              precision    recall  f1-score   support

       Happy       0.75      0.90      0.82        40
     Relaxed       0.50      0.17      0.25         6
         Sad       0.00      0.00      0.00         2
    Stressed       0.50      0.38      0.43         8

    accuracy                           0.71        56
   macro avg       0.44      0.36      0.37        56
weighted avg       0.66      0.71      0.67        56

Classification Report:
Successfully processed User 1
User 2 - Test segment windows: [19, 23, 24, 31, 32, 37, 43, 52, 55, 72, 74, 100, 108, 109, 116, 118, 128, 135, 149, 168, 181, 191, 192, 197, 200, 201, 202, 207, 211, 241, 243, 247, 259, 269, 316, 362, 366, 391, 4

# xgboost/svm/random_forest

In [10]:
if __name__ == "__main__":
    results = []

    opportune_features = ['BVP', 'BVP_std', 'GSR', 'GSR_std', 'GSR_slope', 'Skin_temp', 'Skin_temp_std']
    model_features = ["ECG", "EMG_c", "EMG_t", "Resp", "Skin_temp"]
    change_score_features = ['BVP', 'GSR', 'Skin_temp']
    threshold = 0.5

    for user_id in range(1, 31):
        try:
            # Load data
            physio_data = load_physiological_data(
                f"../case_dataset-master/case_dataset-master/data/raw/physiological/sub{user_id}_DAQ.txt")
            va_data = load_valence_arousal_data(
                f"../case_dataset-master/case_dataset-master/data/raw/annotations/sub{user_id}_joystick.txt")

            physio_segmented, va_segmented = preprocess_data(physio_data, va_data)
            va_segmented = scale_valence_arousal(va_segmented)

            merged_data = pd.merge(
                physio_segmented.sort_values("window").reset_index(drop=True),
                va_segmented.sort_values("window").reset_index(drop=True),
                on="window", how="inner"
            )

            merged_data["emotion"] = merged_data.apply(
                lambda row: map_to_emotion_classes(row["valence"], row["arousal"]), axis=1
            )

            # Detect RuLSIF opportune moments
            change_scores = compute_rulsif_change_scores(merged_data[change_score_features].values)
            opportune_moments = label_opportune_moments(change_scores)

            if len(opportune_moments) < 5:
                print(f"Too few opportune moments for User {user_id}. Skipping...")
                continue

            opportune_data = merged_data[merged_data["window"].isin(opportune_moments)].sort_values("window")
            inopportune_data = merged_data[~merged_data["window"].isin(opportune_moments)].sort_values("window")

            # Split RuLSIF-opportune moments: 20% test, 80% train
            opportune_train_data, opportune_test_data = train_test_split(
                opportune_data, test_size=0.2, random_state=42
            )
            test_windows = sorted(opportune_test_data["window"].tolist())
            print(f"User {user_id} - Test segment windows: {test_windows}")

            # Student model detects opportune moments on:
            # [inopportune + 80% RuLSIF-opportune]
            detection_input_data = pd.concat([inopportune_data, opportune_train_data]).sort_values("window")

            student_signal_data = detection_input_data[opportune_features]
            student_change_scores = compute_rulsif_change_scores(student_signal_data[change_score_features].values)
            student_change_scores = np.append(student_change_scores, 0)  # pad to match

            student_input_array = np.column_stack([student_signal_data.values, student_change_scores])
            scaler_student = RobustScaler()
            student_input_scaled = scaler_student.fit_transform(student_input_array)

            student_model = load_student_model(user_id, model_name="random_forest")
            pred_probs = student_model.predict(student_input_scaled)
            predicted_opportune_mask = (pred_probs >= threshold)
            detected_opportune_data = detection_input_data[predicted_opportune_mask]

            if len(detected_opportune_data) < 5:
                print(f"Too few predicted opportune moments for User {user_id}. Skipping...")
                continue

            # Train emotion classifier
            X_train_emotion = detected_opportune_data[model_features]
            y_train_emotion = detected_opportune_data["emotion"]
            X_test_emotion = opportune_test_data[model_features]
            y_test_emotion = opportune_test_data["emotion"]

            print("Train class distribution:")
            print(y_train_emotion.value_counts())
            print("Test class distribution:")
            print(y_test_emotion.value_counts())

            scaler_class = RobustScaler()
            X_train_emotion_scaled = scaler_class.fit_transform(X_train_emotion)
            X_test_emotion_scaled = scaler_class.transform(X_test_emotion)

            accuracy_opportune, f1_opportune = train_and_evaluate_classifier(
                X_train_emotion_scaled, X_test_emotion_scaled, y_train_emotion, y_test_emotion
            )

            results.append({
                "User ID": user_id,
                "Accuracy": round(accuracy_opportune, 4),
                "F1 Score": round(f1_opportune, 4),
                "Opportune Moments Count": len(opportune_moments)
            })

            print(f"Successfully processed User {user_id}\n")

        except FileNotFoundError:
            print(f"Data files for User {user_id} not found. Skipping...")
            continue
        except Exception as e:
            print(f"Unexpected error processing User {user_id}: {str(e)}. Skipping...")
            continue

    results_df = pd.DataFrame(results)
    results_df.to_csv("../dump/emotion_random_forest_opportune.csv", index=False)

    print("\nFinal Results:")
    print(results_df)


User 1 - Test segment windows: [60, 63, 66, 67, 69, 70, 75, 80, 81, 82, 89, 92, 93, 96, 106, 107, 111, 116, 129, 133, 141, 144, 159, 162, 164, 165, 166, 175, 188, 190, 192, 194, 197, 204, 213]
Too few predicted opportune moments for User 1. Skipping...
User 2 - Test segment windows: [44, 66, 72, 73, 111, 112, 173, 174, 178, 183, 189, 226, 229, 235, 238, 437, 438, 439, 452, 454, 457, 462, 475, 477, 488]
Train class distribution:
emotion
Happy       29
Stressed    27
Sad          2
Relaxed      1
Name: count, dtype: int64
Test class distribution:
emotion
Stressed    14
Happy        7
Relaxed      3
Sad          1
Name: count, dtype: int64
              precision    recall  f1-score   support

       Happy       0.75      0.43      0.55         7
     Relaxed       0.00      0.00      0.00         3
         Sad       0.20      1.00      0.33         1
    Stressed       0.81      0.93      0.87        14

    accuracy                           0.68        25
   macro avg       0.44      