In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.metrics import classification_report, roc_auc_score, precision_recall_curve, average_precision_score
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Activation, Dropout, Bidirectional, Multiply, Flatten, Lambda,RepeatVector, Permute 
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
from scipy.stats import zscore
from sklearn.metrics.pairwise import rbf_kernel
from scipy.signal import resample, butter, filtfilt, welch
from tensorflow.keras.losses import BinaryCrossentropy
from sklearn.cluster import KMeans

In [2]:
# Constants for sampling rates
PHYSIO_SAMPLING_RATE = 1000  # Hz
VALENCE_SAMPLING_RATE = 20    # Hz
DOWNSAMPLE_FACTOR = PHYSIO_SAMPLING_RATE // VALENCE_SAMPLING_RATE  # 50

In [3]:
def load_data(file_path):
    columns = ["time", "ECG", "BVP", "GSR", "Resp", "Skin_Temp", "EMG_z", "EMG_c", "EMG_t"]
    
    # Read all data first
    df = pd.read_csv(file_path, sep="\t", names=columns)
    
    # Downsample physiological data to match valence-arousal sampling rate
    downsampled_df = df.iloc[::DOWNSAMPLE_FACTOR, :].copy()
    
    # Reset time to new sampling rate
    downsampled_df["time"] = np.arange(len(downsampled_df)) / VALENCE_SAMPLING_RATE
    
    return downsampled_df


In [4]:
# Segment into 5-second windows with proper alignment
def segment_data(physio_df, window_size=5):  # Removed valence_df parameter
    # Calculate number of samples per window
    samples_per_window = window_size * VALENCE_SAMPLING_RATE
    
    # Segment physiological data only
    segments = []
    for i in range(0, len(physio_df), samples_per_window):
        segment = physio_df.iloc[i:i+samples_per_window]
        if len(segment) == samples_per_window:  # only complete segments
            # Calculate time-domain features
            features = {
                "time": segment["time"].mean(),
                "ECG_mean": segment["ECG"].mean(),
                "ECG_std": segment["ECG"].std(),
                "ECG_hr": 60 / (segment["ECG"].diff().abs().mean() + 1e-6),
                "BVP_mean": segment["BVP"].mean(),
                "BVP_std": segment["BVP"].std(),
                "GSR_mean": segment["GSR"].mean(),
                "GSR_std": segment["GSR"].std(),
                "GSR_slope": np.polyfit(np.arange(len(segment)), segment["GSR"], 1)[0],
                "Resp_mean": segment["Resp"].mean(),
                "Resp_std": segment["Resp"].std(),
                "Resp_rate": len(np.where(np.diff(np.sign(segment["Resp"] - segment["Resp"].mean())))[0]) / 2,
                "Skin_temp_mean": segment["Skin_Temp"].mean(),
                "Skin_temp_std": segment["Skin_Temp"].std(),
                "EMG_mean": segment[["EMG_z", "EMG_c", "EMG_t"]].mean().mean(),
                "EMG_std": segment[["EMG_z", "EMG_c", "EMG_t"]].std().mean()
            }
            segments.append(features)
    
    return pd.DataFrame(segments)

In [5]:
def extract_physiological_features(segment):
    """Extract time and frequency domain features from a physiological segment"""
    features = {}
    
    # Time-domain features
    for signal in ['ECG', 'BVP', 'GSR', 'Resp', 'Skin_Temp']:
        sig_data = segment[signal].values
        features.update({
            f"{signal}_mean": np.mean(sig_data),
            f"{signal}_std": np.std(sig_data),
            f"{signal}_max": np.max(sig_data),
            f"{signal}_min": np.min(sig_data),
            f"{signal}_range": np.ptp(sig_data),
            f"{signal}_slope": np.polyfit(np.arange(len(sig_data)), sig_data, 1)[0],
            f"{signal}_diff_mean": np.mean(np.diff(sig_data)),
            f"{signal}_diff_std": np.std(np.diff(sig_data))
        })
    
    # Frequency-domain features (using Welch's method)
    for signal in ['ECG', 'BVP', 'GSR']:
        sig_data = segment[signal].values
        f, Pxx = welch(sig_data, fs=VALENCE_SAMPLING_RATE, nperseg=min(len(sig_data), 256))
        features.update({
            f"{signal}_psd_mean": np.mean(Pxx),
            f"{signal}_psd_std": np.std(Pxx),
            f"{signal}_psd_max": np.max(Pxx),
            f"{signal}_psd_max_freq": f[np.argmax(Pxx)],
            f"{signal}_psd_ratio": np.sum(Pxx[f > 0.1]) / (np.sum(Pxx) + 1e-6)  # ratio of high freq components
        })
    
    # Combined features
    features['HRV'] = np.std(np.diff(np.where(np.diff(segment['ECG']) > 0.5 * np.std(segment['ECG']))[0]))
    features['GSR_peaks'] = len(find_peaks(segment['GSR'], height=np.mean(segment['GSR']))[0])
    
    return features

In [6]:
def compute_rulsif_change_scores(X, alpha=0.1, sigma=1.0, lambda_param=1e-3):
    n = len(X) - 1
    change_scores = np.zeros(n)
    
    for i in range(n):
        X_t, X_t_next = X[i], X[i + 1]
        
        # Compute Gaussian Kernel Matrix
        K_t = rbf_kernel(X_t.reshape(-1, 1), X_t.reshape(-1, 1), gamma=1/(2*sigma**2))
        K_t_next = rbf_kernel(X_t_next.reshape(-1, 1), X_t_next.reshape(-1, 1), gamma=1/(2*sigma**2))
        
        # Compute Weights using Least Squares Importance Fitting (LSIF)
        H = alpha * K_t + (1 - alpha) * K_t_next + lambda_param * np.eye(K_t.shape[0])
        h = np.mean(K_t, axis=1)
        
        theta = np.linalg.solve(H, h)
        
        # Compute Change Score
        change_scores[i] = np.mean(np.square(K_t_next.dot(theta) - 1))
    
    return change_scores

In [7]:
def label_opportune_moments(change_scores):
    mean, std = np.mean(change_scores), np.std(change_scores)
    threshold = mean + 3 * std
    outliers = change_scores > threshold
    
    # Clustering the remaining scores
    valid_indices = np.where(~outliers)[0]  # Indices of non-outliers
    valid_scores = change_scores[valid_indices]
    
    if len(valid_scores) > 1:  # Ensure there are enough samples for clustering
        kmeans = KMeans(n_clusters=2, random_state=42).fit(valid_scores.reshape(-1, 1))
        centroids = kmeans.cluster_centers_.flatten()
        high_cluster = np.argmax(centroids)
        high_values = (kmeans.labels_ == high_cluster) & (valid_scores > centroids[high_cluster])
        
        # Map high_values back to the original indices
        high_values_mapped = np.zeros_like(change_scores, dtype=bool)
        high_values_mapped[valid_indices] = high_values
    else:
        high_values_mapped = np.zeros_like(change_scores, dtype=bool)
    
    # Mark opportune moments
    opportune_moments = np.where(outliers | high_values_mapped)[0]
    return opportune_moments

In [8]:
def prepare_input(segmented_data, change_scores):
    # Select only physiological features
    features = [
        'ECG_mean', 'ECG_std', 'ECG_hr', 
        'BVP_mean', 'BVP_std',
        'GSR_mean', 'GSR_std', 'GSR_slope',
        'Resp_mean', 'Resp_std', 'Resp_rate',
        'Skin_temp_mean', 'Skin_temp_std',
        'EMG_mean', 'EMG_std'
    ]
    
    # Add change point scores
    segmented_data['change_score'] = np.concatenate([[0], change_scores])[:len(segmented_data)]
    
    # Prepare input sequence
    input_sequence = segmented_data[features + ['change_score']].values
    
    # Robust scaling
    scaler = RobustScaler()
    input_sequence = scaler.fit_transform(input_sequence)
    
    return input_sequence

def prepare_input_student(segmented_data, change_scores):
    # Features only from BVP, GSR, and Skin Temperature + change scores
    features = [
        'BVP_mean', 'BVP_std',
        'GSR_mean', 'GSR_std', 'GSR_slope',
        'Skin_temp_mean', 'Skin_temp_std'
    ]
    
    # Add change point scores (same as before)
    segmented_data['change_score'] = np.concatenate([[0], change_scores])[:len(segmented_data)]
    
    # Prepare input sequence with selected features
    input_sequence = segmented_data[features + ['change_score']].values
    
    # Apply robust scaling
    scaler = RobustScaler()
    input_sequence = scaler.fit_transform(input_sequence)
    
    return input_sequence


In [9]:
def elliott(x, p):
    return (p * x) / (1 + K.abs(x))

# Derivative of PEF
def elliott_derivative(x, p):
    return p / ((K.abs(x) + 1) ** 2)

# Swish Activation Function
def swish(x):
    return x * K.sigmoid(x)

# Custom PEF Activation Layer
class PEFLayer(Activation):
    def __init__(self, activation, **kwargs):
        super(PEFLayer, self).__init__(activation, **kwargs)
        self.p = K.variable(1.0)  # Initialize parameter p

    def call(self, inputs):
        return elliott(inputs, self.p)

# def build_p_lstm(input_shape):
#     inputs = Input(shape=input_shape)
    
#     # p-LSTM Layer with PEF activation
#     lstm_out = LSTM(32, return_sequences=False, kernel_regularizer=l2(0.01))(inputs)
#     lstm_out = Dropout(0.5)(lstm_out)  # Dropout for regularization
#     lstm_out = PEFLayer(elliott)(lstm_out)
    
#     # Fully Connected Layers with PEF activation
#     dense1 = Dense(16, kernel_regularizer=l2(0.01))(lstm_out)
#     dense1 = Dropout(0.5)(dense1)  # Dropout for regularization
#     dense1 = PEFLayer(elliott)(dense1)
    
#     # Swish Activation Layer
#     swish_out = Activation(swish)(dense1)
    
#     # Sigmoid Output Layer for Binary Classification
#     outputs = Dense(1, activation="sigmoid")(swish_out)
    
#     # Define Model
#     model = Model(inputs, outputs)
#     return model

def build_p_lstm_for_distillation(input_shape):
    inputs = Input(shape=input_shape)
    
    # p-LSTM Layer with PEF activation (named)
    lstm_out = LSTM(32, return_sequences=False, kernel_regularizer=l2(0.01), name='lstm_layer')(inputs)
    lstm_out = Dropout(0.5)(lstm_out)  # Dropout for regularization
    lstm_out = PEFLayer(elliott)(lstm_out)
    
    # Fully Connected Layer with PEF activation (named)
    dense1 = Dense(16, kernel_regularizer=l2(0.01), name='dense_layer')(lstm_out)
    dense1 = Dropout(0.5)(dense1)
    dense1 = PEFLayer(elliott)(dense1)
    
    # Swish Activation Layer
    swish_out = Activation(swish)(dense1)
    
    # Sigmoid Output Layer for Binary Classification
    outputs = Dense(1, activation="sigmoid", name='output_layer')(swish_out)
    
    # Model outputs intermediate layers + final output
    model = Model(inputs=inputs, outputs=[lstm_out, dense1, outputs])
    return model


def build_student_lstm(input_shape):
    inputs = Input(shape=input_shape)
    
    # Smaller LSTM layer (named)
    lstm_out = LSTM(16, return_sequences=False, kernel_regularizer=l2(0.01), name='lstm_layer')(inputs)
    lstm_out = Dropout(0.5)(lstm_out)
    lstm_out = PEFLayer(elliott)(lstm_out)
    
    # Smaller Dense layer (named)
    dense1 = Dense(8, kernel_regularizer=l2(0.01), name='dense_layer')(lstm_out)
    dense1 = Dropout(0.5)(dense1)
    dense1 = PEFLayer(elliott)(dense1)
    
    # Swish Activation
    swish_out = Activation(swish)(dense1)
    
    # Sigmoid output for binary classification
    outputs = Dense(1, activation="sigmoid", name='output_layer')(swish_out)
    
    # Multi-output: intermediate features + final output
    model = Model(inputs=inputs, outputs=[lstm_out, dense1, outputs])
    return model


In [None]:
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report, roc_auc_score
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy, MeanSquaredError
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping

# def train_student_with_distillation(student, teacher, x_student, x_teacher, y_train, proj_lstm, proj_dense, epochs=50, batch_size=32):
#     optimizer = Adam()
#     bce_loss = BinaryCrossentropy()
#     mse_loss = MeanSquaredError()

#     # Combine inputs into a dataset: ((student_input, teacher_input), labels)
#     dataset = tf.data.Dataset.from_tensor_slices(((x_student, x_teacher), y_train))
#     dataset = dataset.shuffle(buffer_size=1024).batch(batch_size)

#     for epoch in range(epochs):
#         total_hard_loss = 0.0
#         total_feat_loss = 0.0
#         total_batches = 0

#         for (x_batch_student, x_batch_teacher), y_batch in dataset:
#             with tf.GradientTape() as tape:
#                 # Teacher output (no gradients)
#                 t_lstm, t_dense, t_output = teacher(x_batch_teacher, training=False)

#                 # Student output (with gradients)
#                 s_lstm, s_dense, s_output = student(x_batch_student, training=True)

#                 # Hard label loss (true label)
#                 hard_loss = bce_loss(y_batch, s_output)

#                 # 🔁 Project student outputs to match teacher dimensions
#                 s_lstm_proj = proj_lstm(s_lstm)
#                 s_dense_proj = proj_dense(s_dense)

#                 # Feature distillation loss
#                 feat_loss = mse_loss(t_lstm, s_lstm_proj) + mse_loss(t_dense, s_dense_proj)

#                 # Total loss (weighted sum)
#                 total_loss = hard_loss + 0.5 * feat_loss

#             # Backpropagation
#             grads = tape.gradient(total_loss, student.trainable_weights + proj_lstm.trainable_weights + proj_dense.trainable_weights)
#             optimizer.apply_gradients(zip(grads, student.trainable_weights + proj_lstm.trainable_weights + proj_dense.trainable_weights))

#             total_hard_loss += hard_loss.numpy()
#             total_feat_loss += feat_loss.numpy()
#             total_batches += 1

#         print(f"Epoch {epoch+1}/{epochs} - Hard Loss: {total_hard_loss/total_batches:.4f} | Feature Loss: {total_feat_loss/total_batches:.4f}")

def train_student_with_distillation(
    student, teacher, x_student, x_teacher, y_train,
    proj_lstm, proj_dense,
    epochs=50, batch_size=32,
    patience=5, val_split=0.2
):
    optimizer = Adam()
    bce_loss = BinaryCrossentropy()
    mse_loss = MeanSquaredError()

    # Split train/val manually
    num_val = int(len(y_train) * val_split)
    x_student_train, x_student_val = x_student[:-num_val], x_student[-num_val:]
    x_teacher_train, x_teacher_val = x_teacher[:-num_val], x_teacher[-num_val:]
    y_train_train, y_val = y_train[:-num_val], y_train[-num_val:]

    print(f"Teacher Train: {len(x_teacher_train)} | Teacher Val: {len(x_teacher_val)} | Student Train: {len(x_student_train)} | Student Val: {len(x_student_val)}")
    # Prepare datasets
    train_dataset = tf.data.Dataset.from_tensor_slices(((x_student_train, x_teacher_train), y_train_train))
    train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

    val_dataset = tf.data.Dataset.from_tensor_slices(((x_student_val, x_teacher_val), y_val))
    val_dataset = val_dataset.batch(batch_size)

    best_loss = np.inf
    wait = 0
    best_weights = None

    for epoch in range(epochs):
        # Training loop
        total_hard_loss = 0.0
        total_feat_loss = 0.0
        total_batches = 0

        for (x_batch_student, x_batch_teacher), y_batch in train_dataset:
            with tf.GradientTape() as tape:
                t_lstm, t_dense, t_output = teacher(x_batch_teacher, training=False)
                s_lstm, s_dense, s_output = student(x_batch_student, training=True)

                hard_loss = bce_loss(y_batch, s_output)
                s_lstm_proj = proj_lstm(s_lstm)
                s_dense_proj = proj_dense(s_dense)
                feat_loss = mse_loss(t_lstm, s_lstm_proj) + mse_loss(t_dense, s_dense_proj)

                total_loss = hard_loss + 0.5 * feat_loss

            grads = tape.gradient(total_loss, student.trainable_weights + proj_lstm.trainable_weights + proj_dense.trainable_weights)
            optimizer.apply_gradients(zip(grads, student.trainable_weights + proj_lstm.trainable_weights + proj_dense.trainable_weights))

            total_hard_loss += hard_loss.numpy()
            total_feat_loss += feat_loss.numpy()
            total_batches += 1

        train_loss = (total_hard_loss + 0.5 * total_feat_loss) / total_batches

        # Validation loop
        val_losses = []
        for (x_batch_student, x_batch_teacher), y_batch in val_dataset:
            t_lstm, t_dense, t_output = teacher(x_batch_teacher, training=False)
            s_lstm, s_dense, s_output = student(x_batch_student, training=False)

            hard_loss = bce_loss(y_batch, s_output)
            s_lstm_proj = proj_lstm(s_lstm)
            s_dense_proj = proj_dense(s_dense)
            feat_loss = mse_loss(t_lstm, s_lstm_proj) + mse_loss(t_dense, s_dense_proj)

            val_losses.append(hard_loss.numpy() + 0.5 * feat_loss.numpy())

        val_loss = np.mean(val_losses)

        print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")

        # Early stopping check
        if val_loss < best_loss:
            best_loss = val_loss
            wait = 0
            # Save best weights
            best_weights = (
                student.get_weights(),
                proj_lstm.get_weights(),
                proj_dense.get_weights()
            )
        else:
            wait += 1
            if wait >= patience:
                print(f"Early stopping at epoch {epoch+1}")
                # Restore best weights
                student.set_weights(best_weights[0])
                proj_lstm.set_weights(best_weights[1])
                proj_dense.set_weights(best_weights[2])
                break


def main():
    # User profiles with predefined clusters
    df_profiles = pd.DataFrame({
        "User": [f"User{i}" for i in range(1,31) if i != 7],
        "Cluster": [0, 0, 0, 1, 1, 0, 0, 0, 0,
                    0, 0, 0, 0, 0, 0, 0, 1,
                    0, 0, 0, 0, 0, 0, 0, 1,
                    0, 0, 1, 0]
    })

    user_inputs = {}
    user_labels = {}

    for i in range(1, 31):
        if i == 7:  # Skip missing user
            continue
        user_id = f"User{i}"
        try:
            physio_file = f"case_dataset-master/case_dataset-master/data/raw/physiological/sub{i}_DAQ.txt"
            df_physio = load_data(physio_file)
            segmented_data = segment_data(df_physio)
            
            physio_features = segmented_data[['ECG_mean', 'BVP_mean', 'GSR_mean', 'Resp_mean', 'Skin_temp_mean', 'EMG_mean']].values
            change_scores = compute_rulsif_change_scores(physio_features)
            opportune_moments = label_opportune_moments(change_scores)
            
            labels = np.zeros(len(segmented_data))
            labels[opportune_moments] = 1
            
            # Prepare inputs for teacher and student separately
            input_teacher = prepare_input(segmented_data.copy(), change_scores)
            input_student = prepare_input_student(segmented_data.copy(), change_scores)
            
            # Remove rows with NaNs (if any)
            valid_indices_teacher = ~np.isnan(input_teacher).any(axis=1)
            valid_indices_student = ~np.isnan(input_student).any(axis=1)
            
            # Keep intersection of valid indices for teacher and student inputs
            valid_indices = valid_indices_teacher & valid_indices_student
            
            input_teacher = input_teacher[valid_indices]
            input_student = input_student[valid_indices]
            labels_cleaned = labels[valid_indices]
            
            user_inputs[user_id] = {
                'teacher': input_teacher,
                'student': input_student
            }
            user_labels[user_id] = labels_cleaned

        except FileNotFoundError:
            print(f"Data for {user_id} not found. Skipping...")
            continue


    for user, input_data in user_inputs.items():
        cluster = df_profiles[df_profiles["User"] == user]["Cluster"].values[0]
        similar_users = [u for u in df_profiles[df_profiles["Cluster"] == cluster]["User"] if u != user]

        # Preparing training data and labels for teacher and student separately
        train_data_teacher, train_data_student, train_labels = [], [], []
        for u in similar_users:
            if u in user_inputs:
                train_data_teacher.append(user_inputs[u]['teacher'])
                train_data_student.append(user_inputs[u]['student'])
                train_labels.append(user_labels[u])

        if not train_data_teacher or not train_data_student:
            print(f"No training data for {user}. Skipping...")
            continue

        proj_lstm = Dense(32)
        proj_dense = Dense(16)

        train_data_teacher = np.vstack(train_data_teacher)
        train_data_student = np.vstack(train_data_student)
        train_labels = np.hstack(train_labels)

        # Test data
        test_data_teacher = input_data['teacher']
        test_data_student = input_data['student']
        test_labels = user_labels[user]

        # Reshaping inputs for LSTM: [samples, timesteps, features]
        train_data_teacher = train_data_teacher.reshape((train_data_teacher.shape[0], 1, train_data_teacher.shape[1]))
        train_data_student = train_data_student.reshape((train_data_student.shape[0], 1, train_data_student.shape[1]))
        test_data_teacher = test_data_teacher.reshape((test_data_teacher.shape[0], 1, test_data_teacher.shape[1]))
        test_data_student = test_data_student.reshape((test_data_student.shape[0], 1, test_data_student.shape[1]))

        print(f"\nTraining {user} on: {[u for u in similar_users if u in user_inputs]}")
        print(f"Train samples for teacher: {len(train_data_teacher)}, Test samples: {len(test_data_teacher)}")

        print(f"\nTraining teacher model for {user}...")
        teacher_model = build_p_lstm_for_distillation((train_data_teacher.shape[1], train_data_teacher.shape[2]))
        teacher_model.compile(optimizer=Adam(0.001), loss=BinaryCrossentropy(), metrics=['accuracy'])
        class_weights = compute_class_weight(
            class_weight="balanced",
            classes=np.unique(train_labels),
            y=train_labels
        )
        class_weights = dict(enumerate(class_weights))

        early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

        teacher_model.fit(
            train_data_teacher, train_labels,
            validation_split=0.2,
            epochs=50,
            batch_size=32,
            callbacks=[early_stopping],
            class_weight=class_weights,
            verbose=0
        )

        t_lstm, t_dense, teacher_preds = teacher_model.predict(test_data_teacher)
        teacher_pred_classes = (teacher_preds > 0.5).astype(int)

        print(f"Teacher Evaluation for {user}:")
        print(classification_report(test_labels, teacher_pred_classes))
        print(f"AUC-ROC (Teacher): {roc_auc_score(test_labels, teacher_preds):.3f}")

        print(f"Train samples for student: {len(train_data_student)}, Test samples: {len(test_data_student)}")

        print(f"Training student model with distillation for {user}...")
        student_model = build_student_lstm((train_data_student.shape[1], train_data_student.shape[2]))
        train_student_with_distillation(
            student_model, teacher_model,
            train_data_student, train_data_teacher,
            train_labels,
            proj_lstm,
            proj_dense,
            epochs=50,
            batch_size=32
        )

        # Evaluate student on test student data
        s_lstm, s_dense, y_pred = student_model.predict(test_data_student)
        y_pred_classes = (y_pred > 0.5).astype(int)

        print(f"Evaluation for {user}:")
        print(classification_report(test_labels, y_pred_classes))
        print(f"AUC-ROC: {roc_auc_score(test_labels, y_pred):.3f}")


# Then call main()
if __name__ == "__main__":
    main()




Training User1 on: ['User2', 'User3', 'User6', 'User8', 'User9', 'User10', 'User11', 'User12', 'User13', 'User14', 'User15', 'User16', 'User17', 'User19', 'User20', 'User21', 'User22', 'User23', 'User24', 'User25', 'User27', 'User28', 'User30']
Train samples for teacher: 11270, Test samples: 490

Training teacher model for User1...
Teacher Evaluation for User1:
              precision    recall  f1-score   support

         0.0       0.92      0.95      0.94       341
         1.0       0.88      0.82      0.85       149

    accuracy                           0.91       490
   macro avg       0.90      0.89      0.89       490
weighted avg       0.91      0.91      0.91       490

AUC-ROC (Teacher): 0.964
Train samples for student: 11270, Test samples: 490
Training student model with distillation for User1...
Teacher Train: 9016 | Teacher Val: 2254 | Student Train: 9016 | Student Val: 2254
Epoch 1/50 - Train Loss: 0.6482 | Val Loss: 0.4176
Epoch 2/50 - Train Loss: 0.3971 | Val Loss: 