In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.metrics import classification_report, roc_auc_score, precision_recall_curve, average_precision_score
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Activation, Dropout, Bidirectional, Multiply, Flatten, Lambda,RepeatVector, Permute 
from tensorflow.keras.layers import Conv1D, BatchNormalization, MaxPooling1D, GlobalAveragePooling1D, Concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
from scipy.stats import zscore
from sklearn.metrics.pairwise import rbf_kernel
from scipy.signal import resample, butter, filtfilt, welch
from tensorflow.keras.losses import BinaryCrossentropy
from sklearn.cluster import KMeans
from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score, confusion_matrix
import csv
import os
from sklearn.model_selection import train_test_split

In [2]:
# Constants for sampling rates
PHYSIO_SAMPLING_RATE = 1000  # Hz
VALENCE_SAMPLING_RATE = 20    # Hz
DOWNSAMPLE_FACTOR = PHYSIO_SAMPLING_RATE // VALENCE_SAMPLING_RATE  # 50

In [3]:
# Load physiological data with proper downsampling
def load_data(file_path):
    columns = ["time", "ECG", "BVP", "GSR", "Resp", "Skin_Temp", "EMG_z", "EMG_c", "EMG_t"]
    
    # Read all data first
    df = pd.read_csv(file_path, sep="\t", names=columns)
    
    # Downsample physiological data to match valence-arousal sampling rate
    downsampled_df = df.iloc[::DOWNSAMPLE_FACTOR, :].copy()
    
    # Reset time to new sampling rate
    downsampled_df["time"] = np.arange(len(downsampled_df)) / VALENCE_SAMPLING_RATE
    
    return downsampled_df

In [4]:
# Segment into 5-second windows with proper alignment
def segment_data(physio_df, window_size=5):  # Removed valence_df parameter
    # Calculate number of samples per window
    samples_per_window = window_size * VALENCE_SAMPLING_RATE
    
    # Segment physiological data only
    segments = []
    for i in range(0, len(physio_df), samples_per_window):
        segment = physio_df.iloc[i:i+samples_per_window]
        if len(segment) == samples_per_window:  # only complete segments
            # Calculate time-domain features
            features = {
                "time": segment["time"].mean(),
                "ECG_mean": segment["ECG"].mean(),
                "ECG_std": segment["ECG"].std(),
                "ECG_hr": 60 / (segment["ECG"].diff().abs().mean() + 1e-6),
                "BVP_mean": segment["BVP"].mean(),
                "BVP_std": segment["BVP"].std(),
                "GSR_mean": segment["GSR"].mean(),
                "GSR_std": segment["GSR"].std(),
                "GSR_slope": np.polyfit(np.arange(len(segment)), segment["GSR"], 1)[0],
                "Resp_mean": segment["Resp"].mean(),
                "Resp_std": segment["Resp"].std(),
                "Resp_rate": len(np.where(np.diff(np.sign(segment["Resp"] - segment["Resp"].mean())))[0]) / 2,
                "Skin_temp_mean": segment["Skin_Temp"].mean(),
                "Skin_temp_std": segment["Skin_Temp"].std(),
                "EMG_mean": segment[["EMG_z", "EMG_c", "EMG_t"]].mean().mean(),
                "EMG_std": segment[["EMG_z", "EMG_c", "EMG_t"]].std().mean()
            }
            segments.append(features)
    
    return pd.DataFrame(segments)

In [5]:
# RuLSIF Algorithm for Change-Point Detection (unchanged)
def compute_rulsif_change_scores(X, alpha=0.1, sigma=1.0, lambda_param=1e-3):
    n = len(X) - 1
    change_scores = np.zeros(n)
    
    for i in range(n):
        X_t, X_t_next = X[i], X[i + 1]
        
        # Compute Gaussian Kernel Matrix
        K_t = rbf_kernel(X_t.reshape(-1, 1), X_t.reshape(-1, 1), gamma=1/(2*sigma**2))
        K_t_next = rbf_kernel(X_t_next.reshape(-1, 1), X_t_next.reshape(-1, 1), gamma=1/(2*sigma**2))
        
        # Compute Weights using Least Squares Importance Fitting (LSIF)
        H = alpha * K_t + (1 - alpha) * K_t_next + lambda_param * np.eye(K_t.shape[0])
        h = np.mean(K_t, axis=1)
        
        theta = np.linalg.solve(H, h)
        
        # Compute Change Score
        change_scores[i] = np.mean(np.square(K_t_next.dot(theta) - 1))
    
    return change_scores

# Identify significant changes and label opportune moments (unchanged)
def label_opportune_moments(change_scores):
    mean, std = np.mean(change_scores), np.std(change_scores)
    threshold = mean + 3 * std
    outliers = change_scores > threshold
    
    # Clustering the remaining scores
    valid_indices = np.where(~outliers)[0]  # Indices of non-outliers
    valid_scores = change_scores[valid_indices]
    
    if len(valid_scores) > 1:  # Ensure there are enough samples for clustering
        kmeans = KMeans(n_clusters=2, random_state=42).fit(valid_scores.reshape(-1, 1))
        centroids = kmeans.cluster_centers_.flatten()
        high_cluster = np.argmax(centroids)
        high_values = (kmeans.labels_ == high_cluster) & (valid_scores > centroids[high_cluster])
        
        # Map high_values back to the original indices
        high_values_mapped = np.zeros_like(change_scores, dtype=bool)
        high_values_mapped[valid_indices] = high_values
    else:
        high_values_mapped = np.zeros_like(change_scores, dtype=bool)
    
    # Mark opportune moments
    opportune_moments = np.where(outliers | high_values_mapped)[0]
    return opportune_moments

In [6]:
def build_cnn_encoder(input_shape):
    from tensorflow.keras.layers import Conv1D, MaxPooling1D, GlobalAveragePooling1D, BatchNormalization

    inp = Input(shape=input_shape)

    # Conv Block 1
    x = Conv1D(32, kernel_size=4, activation='relu')(inp)
    x = BatchNormalization()(x)
    x = Conv1D(32, kernel_size=4, activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(pool_size=2)(x)

    # Conv Block 2
    x = Conv1D(64, kernel_size=4, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Conv1D(64, kernel_size=4, activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(pool_size=2)(x)

    # Conv Block 3 (bottleneck)
    x = Conv1D(32, kernel_size=4, activation='relu')(x)
    x = BatchNormalization()(x)
    x = GlobalAveragePooling1D()(x)

    model = Model(inputs=inp, outputs=x)
    return model



In [7]:
def get_raw_segments(physio_df, window_size=5):
    samples_per_window = window_size * VALENCE_SAMPLING_RATE
    segments = []

    for i in range(0, len(physio_df) - samples_per_window + 1, samples_per_window):
        segment = physio_df.iloc[i:i + samples_per_window]
        segments.append(segment)

    return segments  # Each segment is a DataFrame

def build_fusion_model(input_length, num_modalities):
    # One input per modality
    inputs = [Input(shape=(input_length, 1)) for _ in range(num_modalities)]

    # Shared CNN or separate CNNs per modality
    cnn_encoders = [build_cnn_encoder((input_length, 1)) for _ in range(num_modalities)]
    features = [cnn(inputs[i]) for i, cnn in enumerate(cnn_encoders)]

    # Late fusion (feature-level)
    merged = Concatenate()(features)
    x = Dense(64, activation='relu')(merged)
    x = Dropout(0.5)(x)
    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=inputs, outputs=output)
    return model


def prepare_multimodal_input(segments, modalities):
    inputs = {mod: [] for mod in modalities}
    
    for segment in segments:
        for mod in modalities:
            sig = segment[mod].values.reshape(-1, 1)  # (100, 1)
            inputs[mod].append(sig)

    # Convert to np arrays per modality
    return [np.array(inputs[mod]) for mod in modalities]


In [8]:
def main():
    
    user_reports = []

    # User profiles with pre-defined clusters
    df_profiles = pd.DataFrame({
        "User": ["User1", "User2", "User3", "User4", "User5", "User6", "User8", "User9", "User10",
                "User11", "User12", "User13", "User14", "User15", "User16", "User17", "User18",
                "User19", "User20", "User21", "User22", "User23", "User24", "User25", "User26",
                "User27", "User28", "User29", "User30"],
        "Cluster": [0, 0, 0, 1, 1, 0, 0, 0, 0, 
                   0, 0, 0, 0, 0, 0, 0, 1,
                   0, 0, 0, 0, 0, 0, 0, 1,
                   0, 0, 1, 0]
    })

    # Preprocess data for all users
    user_inputs = {}
    user_labels = {}
    for i in range(1, 31):
        user_id = f"User{i}"
        if i == 7:
            continue
            
        try:
            physio_file = f"../case_dataset-master/case_dataset-master/data/raw/physiological/sub{i}_DAQ.txt"
            
            # Load physiological data
            df_physio = load_data(physio_file)

            # Segment raw data
            raw_segments = get_raw_segments(df_physio)  # shape: list of DataFrames

            # Prepare modality-specific raw inputs (e.g., ecg, bvp, gsr)
            #X_modalities = prepare_multimodal_input(raw_segments, modalities=["ECG", "BVP", "GSR"])
            X_modalities = prepare_multimodal_input(raw_segments, modalities=["BVP", "GSR", "ECG", "Resp", "EMG_c"])
            

            # Create segmented data for feature extraction (mean features)
            segmented_data = segment_data(df_physio)

            # Compute RuLSIF change scores on physiological mean features
            physio_features = segmented_data[['ECG_mean', 'BVP_mean', 'GSR_mean', 
                                           'Resp_mean', 'Skin_temp_mean', 'EMG_mean']].values
            # physio_features = segmented_data[['ECG_mean', 'BVP_mean', 'GSR_mean']].values
            change_scores = compute_rulsif_change_scores(physio_features)

            # Label opportune moments
            opportune_moments = label_opportune_moments(change_scores)
            labels = np.zeros(len(raw_segments))
            labels[opportune_moments] = 1

            # Filter out segments with any NaNs across modalities
            valid_mask = ~np.any([np.isnan(X).any(axis=(1, 2)) for X in X_modalities], axis=0)
            X_modalities = [X[valid_mask] for X in X_modalities]
            labels = labels[valid_mask]

            user_inputs[user_id] = X_modalities  # list of modality arrays
            user_labels[user_id] = labels
            
        except FileNotFoundError:
            print(f"Data for {user_id} not found. Skipping...")
            continue

    results = []
    
    for user, X_modalities in user_inputs.items():
        # Get user's cluster
        cluster = df_profiles[df_profiles["User"] == user]["Cluster"].values[0]
        similar_users = [u for u in df_profiles[df_profiles["Cluster"] == cluster]["User"] if u != user]

        # Aggregate training data from users in the same cluster
        train_modalities = [[] for _ in range(len(X_modalities))]  # One list per modality
        train_labels = []

        for u in similar_users:
            if u in user_inputs:
                for i, modality_data in enumerate(user_inputs[u]):
                    train_modalities[i].append(modality_data)
                train_labels.append(user_labels[u])

        # Skip if no training data
        if not train_labels:
            print(f"No training data available for {user}. Skipping...")
            continue

        # Stack per-modality arrays
        train_modalities = [np.vstack(mod_data) for mod_data in train_modalities]
        train_labels = np.hstack(train_labels)

        # Prepare test data
        test_modalities = [np.array(modality) for modality in X_modalities]
        test_labels = user_labels[user]

        print(f"\nTraining {user} on: {[u for u in similar_users if u in user_inputs]}")
        print(f"Train samples: {len(train_labels)}, Test samples: {len(test_labels)}")
        print(f"Class distribution - Train: {np.bincount(train_labels.astype(int))}, Test: {np.bincount(test_labels.astype(int))}")

        # Build and compile CNN fusion model
        model = build_fusion_model(input_length=100, num_modalities=len(X_modalities))
        model.compile(optimizer=Adam(learning_rate=0.001), 
                    loss="binary_crossentropy", 
                    metrics=["accuracy"])

        # Compute class weights for imbalance
        class_weights = compute_class_weight(class_weight="balanced", 
                                            classes=np.unique(train_labels), 
                                            y=train_labels)
        class_weights = dict(enumerate(class_weights))

        # Train the model
        early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

        train_modalities_train = []
        train_modalities_val = []

        for mod_data in train_modalities:
            X_tr, X_val = train_test_split(mod_data, test_size=0.2, random_state=42, stratify=train_labels)
            train_modalities_train.append(X_tr)
            train_modalities_val.append(X_val)

        y_train, y_val = train_test_split(train_labels, test_size=0.2, random_state=42, stratify=train_labels)

        model.fit(
                train_modalities_train, y_train,
                validation_data=(train_modalities_val, y_val),
                epochs=50,
                batch_size=32,
                class_weight=class_weights,
                callbacks=[early_stopping],
                verbose=0
            )

        # Evaluate on test data

        # y_pred = model.predict(test_modalities)
        # y_pred_classes = (y_pred > 0.5).astype(int).flatten()

       # Tune threshold on validation set (not test!)
        y_val_probs = model.predict(train_modalities_val).flatten()

        thresholds = np.arange(0.05, 0.95, 0.01)
        best_f1 = 0
        best_threshold = 0.5

        for t in thresholds:
            preds = (y_val_probs > t).astype(int)
            f1 = f1_score(y_val, preds, average='macro', zero_division=0)
            if f1 > best_f1:
                best_f1 = f1
                best_threshold = t

        # Now use that threshold on the test set
        y_pred_probs = model.predict(test_modalities).flatten()
        y_pred_classes = (y_pred_probs > best_threshold).astype(int)

        # Create a directory for saved models
        os.makedirs("saved_models_cnn_fusion", exist_ok=True)

        # Create a subfolder for this user
        user_dir = os.path.join("saved_models_cnn_fusion", f"user_{user}")
        os.makedirs(user_dir, exist_ok=True)

        # Save the model
        model.save(os.path.join(user_dir, "model.h5"))

        # Also save the best threshold
        with open(os.path.join(user_dir, "threshold.txt"), "w") as f:
            f.write(str(round(best_threshold, 4)))



        tn, fp, fn, tp = confusion_matrix(test_labels, y_pred_classes).ravel()
        
        TPR = recall_score(test_labels, y_pred_classes)               # Sensitivity / Recall
        TNR = tn / (tn + fp) if (tn + fp) > 0 else 0.0               # Specificity
        FPR = fp / (fp + tn) if (fp + tn) > 0 else 0.0               # False positive rate
        F1 = f1_score(test_labels, y_pred_classes, average='weighted')
        AUC = roc_auc_score(test_labels, y_pred_probs)
        ACC = accuracy_score(test_labels, y_pred_classes)

        results.append([
                        user,
                        round(TPR, 4),
                        round(TNR, 4),
                        round(FPR, 4),
                        round(F1, 4),
                        round(AUC, 4),
                        round(ACC, 4),
                        round(best_threshold, 4)
                    ])

        print(classification_report(test_labels, y_pred_classes))
        print(f"AUC-ROC: {roc_auc_score(test_labels, y_pred_probs):.3f}")
        
    # Save results to CSV
    output_file = "cnn_fusion_results.csv"
    with open(output_file, mode="w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["User", "TPR", "TNR", "FPR", "Weighted F1", "AUC", "Accuracy", "Threshold"])
        writer.writerows(results)

    print(f"\nResults saved to {output_file}")

        
        

if __name__ == "__main__":
    main()




Training User1 on: ['User2', 'User3', 'User6', 'User8', 'User9', 'User10', 'User11', 'User12', 'User13', 'User14', 'User15', 'User16', 'User17', 'User19', 'User20', 'User21', 'User22', 'User23', 'User24', 'User25', 'User27', 'User28', 'User30']
Train samples: 11270, Test samples: 490
Class distribution - Train: [9680 1590], Test: [341 149]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

         0.0       0.70      1.00      0.82       341
         1.0       0.00      0.00      0.00       149

    accuracy                           0.70       490
   macro avg       0.35      0.50      0.41       490
weighted avg       0.48      0.70      0.57       490

AUC-ROC: 0.876

Training User2 on: ['User1', 'User3', 'User6', 'User8', 'User9', 'User10', 'User11', 'User12', 'User13', 'User14', 'User15', 'User16', 'User17', 'User19', 'User20', 'User21', 'User22', 'User23', 'User24', 'User25', 'User27', 'User28', 'User30']
Train samples: 11270, Test samples: 490
Class distribution - Train: [9591 1679], Test: [430  60]
              precision    recall  f1-score   support

         0.0       0.98      1.00      0.99       430
         1.0       1.00      0.82      0.90        60

    accuracy                           0.98       490
   macro avg       0.99      0.91      0.94       490
weighted avg       0.98      0.98      0.98

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

         0.0       0.88      1.00      0.94       433
         1.0       0.00      0.00      0.00        57

    accuracy                           0.88       490
   macro avg       0.44      0.50      0.47       490
weighted avg       0.78      0.88      0.83       490

AUC-ROC: 0.997

Training User5 on: ['User4', 'User18', 'User26', 'User29']
Train samples: 1960, Test samples: 490
Class distribution - Train: [1697  263], Test: [420  70]
              precision    recall  f1-score   support

         0.0       0.88      1.00      0.93       420
         1.0       1.00      0.14      0.25        70

    accuracy                           0.88       490
   macro avg       0.94      0.57      0.59       490
weighted avg       0.89      0.88      0.84       490

AUC-ROC: 0.947

Training User6 on: ['User1', 'User2', 'User3', 'User8', 'User9', 'User10', 'User11', 'User12', 'User13', 'User14', 'User15', 'User16', 'User17', 'User19', 'Use

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

         0.0       0.88      1.00      0.93       429
         1.0       0.00      0.00      0.00        61

    accuracy                           0.88       490
   macro avg       0.44      0.50      0.47       490
weighted avg       0.77      0.88      0.82       490

AUC-ROC: 0.189

Training User8 on: ['User1', 'User2', 'User3', 'User6', 'User9', 'User10', 'User11', 'User12', 'User13', 'User14', 'User15', 'User16', 'User17', 'User19', 'User20', 'User21', 'User22', 'User23', 'User24', 'User25', 'User27', 'User28', 'User30']
Train samples: 11270, Test samples: 490
Class distribution - Train: [9576 1694], Test: [445  45]
              precision    recall  f1-score   support

         0.0       1.00      0.96      0.98       445
         1.0       0.73      0.98      0.84        45

    accuracy                           0.97       490
   macro avg       0.87      0.97      0.91       490
weighted avg       0.97      0.97      0.97

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

         0.0       0.85      1.00      0.92       417
         1.0       0.00      0.00      0.00        73

    accuracy                           0.85       490
   macro avg       0.43      0.50      0.46       490
weighted avg       0.72      0.85      0.78       490

AUC-ROC: 0.838

Training User12 on: ['User1', 'User2', 'User3', 'User6', 'User8', 'User9', 'User10', 'User11', 'User13', 'User14', 'User15', 'User16', 'User17', 'User19', 'User20', 'User21', 'User22', 'User23', 'User24', 'User25', 'User27', 'User28', 'User30']
Train samples: 11270, Test samples: 490
Class distribution - Train: [9581 1689], Test: [440  50]
              precision    recall  f1-score   support

         0.0       1.00      0.88      0.94       440
         1.0       0.49      1.00      0.65        50

    accuracy                           0.89       490
   macro avg       0.74      0.94      0.79       490
weighted avg       0.95      0.89      0.91

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

         0.0       0.88      1.00      0.93       430
         1.0       0.00      0.00      0.00        60

    accuracy                           0.88       490
   macro avg       0.44      0.50      0.47       490
weighted avg       0.77      0.88      0.82       490

AUC-ROC: 0.994

Training User17 on: ['User1', 'User2', 'User3', 'User6', 'User8', 'User9', 'User10', 'User11', 'User12', 'User13', 'User14', 'User15', 'User16', 'User19', 'User20', 'User21', 'User22', 'User23', 'User24', 'User25', 'User27', 'User28', 'User30']
Train samples: 11270, Test samples: 490
Class distribution - Train: [9594 1676], Test: [427  63]
              precision    recall  f1-score   support

         0.0       1.00      0.91      0.95       427
         1.0       0.62      1.00      0.76        63

    accuracy                           0.92       490
   macro avg       0.81      0.95      0.86       490
weighted avg       0.95      0.92      0.93

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

         0.0       0.92      1.00      0.96       450
         1.0       0.00      0.00      0.00        40

    accuracy                           0.92       490
   macro avg       0.46      0.50      0.48       490
weighted avg       0.84      0.92      0.88       490

AUC-ROC: 0.299

Training User19 on: ['User1', 'User2', 'User3', 'User6', 'User8', 'User9', 'User10', 'User11', 'User12', 'User13', 'User14', 'User15', 'User16', 'User17', 'User20', 'User21', 'User22', 'User23', 'User24', 'User25', 'User27', 'User28', 'User30']
Train samples: 11270, Test samples: 490
Class distribution - Train: [9575 1695], Test: [446  44]
              precision    recall  f1-score   support

         0.0       1.00      0.94      0.97       446
         1.0       0.61      1.00      0.76        44

    accuracy                           0.94       490
   macro avg       0.81      0.97      0.86       490
weighted avg       0.97      0.94      0.95

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

         0.0       0.77      1.00      0.87       378
         1.0       0.00      0.00      0.00       112

    accuracy                           0.77       490
   macro avg       0.39      0.50      0.44       490
weighted avg       0.60      0.77      0.67       490

AUC-ROC: 0.503

Training User22 on: ['User1', 'User2', 'User3', 'User6', 'User8', 'User9', 'User10', 'User11', 'User12', 'User13', 'User14', 'User15', 'User16', 'User17', 'User19', 'User20', 'User21', 'User23', 'User24', 'User25', 'User27', 'User28', 'User30']
Train samples: 11270, Test samples: 490
Class distribution - Train: [9679 1591], Test: [342 148]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

         0.0       0.70      1.00      0.82       342
         1.0       0.00      0.00      0.00       148

    accuracy                           0.70       490
   macro avg       0.35      0.50      0.41       490
weighted avg       0.49      0.70      0.57       490

AUC-ROC: 0.712

Training User23 on: ['User1', 'User2', 'User3', 'User6', 'User8', 'User9', 'User10', 'User11', 'User12', 'User13', 'User14', 'User15', 'User16', 'User17', 'User19', 'User20', 'User21', 'User22', 'User24', 'User25', 'User27', 'User28', 'User30']
Train samples: 11270, Test samples: 490
Class distribution - Train: [9589 1681], Test: [432  58]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

         0.0       0.88      1.00      0.94       432
         1.0       0.00      0.00      0.00        58

    accuracy                           0.88       490
   macro avg       0.44      0.50      0.47       490
weighted avg       0.78      0.88      0.83       490

AUC-ROC: 0.791

Training User24 on: ['User1', 'User2', 'User3', 'User6', 'User8', 'User9', 'User10', 'User11', 'User12', 'User13', 'User14', 'User15', 'User16', 'User17', 'User19', 'User20', 'User21', 'User22', 'User23', 'User25', 'User27', 'User28', 'User30']
Train samples: 11270, Test samples: 490
Class distribution - Train: [9610 1660], Test: [411  79]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

         0.0       0.84      1.00      0.91       411
         1.0       0.00      0.00      0.00        79

    accuracy                           0.84       490
   macro avg       0.42      0.50      0.46       490
weighted avg       0.70      0.84      0.77       490

AUC-ROC: 0.806

Training User25 on: ['User1', 'User2', 'User3', 'User6', 'User8', 'User9', 'User10', 'User11', 'User12', 'User13', 'User14', 'User15', 'User16', 'User17', 'User19', 'User20', 'User21', 'User22', 'User23', 'User24', 'User27', 'User28', 'User30']
Train samples: 11270, Test samples: 490
Class distribution - Train: [9573 1697], Test: [448  42]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

         0.0       0.91      1.00      0.96       448
         1.0       0.00      0.00      0.00        42

    accuracy                           0.91       490
   macro avg       0.46      0.50      0.48       490
weighted avg       0.84      0.91      0.87       490

AUC-ROC: 0.375

Training User26 on: ['User4', 'User5', 'User18', 'User29']
Train samples: 1960, Test samples: 490
Class distribution - Train: [1697  263], Test: [420  70]
              precision    recall  f1-score   support

         0.0       0.89      1.00      0.94       420
         1.0       1.00      0.27      0.43        70

    accuracy                           0.90       490
   macro avg       0.95      0.64      0.68       490
weighted avg       0.91      0.90      0.87       490

AUC-ROC: 0.990

Training User27 on: ['User1', 'User2', 'User3', 'User6', 'User8', 'User9', 'User10', 'User11', 'User12', 'User13', 'User14', 'User15', 'User16', 'User17', 'Use

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

         0.0       0.84      1.00      0.92       414
         1.0       0.00      0.00      0.00        76

    accuracy                           0.84       490
   macro avg       0.42      0.50      0.46       490
weighted avg       0.71      0.84      0.77       490

AUC-ROC: 0.126

Training User29 on: ['User4', 'User5', 'User18', 'User26']
Train samples: 1960, Test samples: 490
Class distribution - Train: [1723  237], Test: [394  96]
              precision    recall  f1-score   support

         0.0       1.00      0.61      0.76       394
         1.0       0.38      1.00      0.55        96

    accuracy                           0.68       490
   macro avg       0.69      0.80      0.65       490
weighted avg       0.88      0.68      0.72       490

AUC-ROC: 1.000

Training User30 on: ['User1', 'User2', 'User3', 'User6', 'User8', 'User9', 'User10', 'User11', 'User12', 'User13', 'User14', 'User15', 'User16', 'User17', 'Use

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
