# LambdaMART (RECOLA)

In [None]:
# pip install lightgbm

In [1]:
import os
import numpy as np
import pandas as pd
import lightgbm as lgb
from scipy.stats import pearsonr, kendalltau
from sklearn.model_selection import GroupKFold

In [2]:
project_directory = r'C:\Users\marco\OneDrive\Desktop\Final Year Project'
os.chdir(project_directory)
base_dir = os.getcwd() 

- Evaluation for arousal

In [5]:

def print_evaluation_callback(period=1, show_stdv=True):
    def callback(env):
        if period > 0 and env.iteration % period == 0:
            result = ''
            for data_name, eval_name, value, _ in env.evaluation_result_list:
                if show_stdv:
                    result += f"{data_name}'s {eval_name}: {value:.6f} "
                else:
                    result += f"{data_name}'s {eval_name}: {value} "
            print(result)
    callback.order = 10
    return callback

input_path = os.path.join('RECOLA Ranking Algorithms', 'RECOLA_Intervals_Data','ArousalValenceTimeSeries_Quartiles.csv')

# Load data
df = pd.read_csv(input_path)

def concordance_correlation_coefficient(y_true, y_pred):
    cor = np.corrcoef(y_true, y_pred)[0][1]
    mean_true, mean_pred = np.mean(y_true), np.mean(y_pred)
    var_true, var_pred = np.var(y_true), np.var(y_pred)
    sd_true, sd_pred = np.std(y_true), np.std(y_pred)
    numerator = 2 * cor * sd_true * sd_pred
    denominator = var_true + var_pred + (mean_true - mean_pred)**2
    return numerator / denominator

def pearson_correlation_coefficient(y_true, y_pred):
    return pearsonr(y_true, y_pred)[0]

def kendalls_tau_coefficient(y_true, y_pred):
    return kendalltau(y_true, y_pred)[0]

def evaluate_individual_performance(ranker, X_test, Y_test, group_labels):
    evaluation_results = []

    for participant_id in np.unique(group_labels):
        idx = group_labels == participant_id
        participant_features, participant_labels = X_test[idx], Y_test[idx]

        # Compute mean feature values for the participant
        mean_features = participant_features.mean( axis=0)
        # Pairwise transformation for each window
        pairwise_features = participant_features - mean_features

        # Pass through the trained model and predict the probabilities
        predicted_probabilities = ranker.predict(pairwise_features)

        # Calculate evaluation metrics
        pcc = pearson_correlation_coefficient(participant_labels, predicted_probabilities)
        ccc = concordance_correlation_coefficient(participant_labels, predicted_probabilities)
        kendall_tau = kendalls_tau_coefficient(participant_labels, predicted_probabilities)

        # Store evaluation results
        evaluation_results.append({
            'Participant ID': participant_id,
            'PCC': pcc,
            'CCC': ccc,
            'KendallTau': kendall_tau
        })

    return pd.DataFrame(evaluation_results)
eval_results = []
all_evaluation_results = []

# Select features and target
feature_cols = [col for col in df.columns if col not in ['participant_id', 'median_arousal', 'median_valence', 'time_window', 'arousal_quartile', 'valence_quartile']]
targets = ['arousal_quartile']
group_kfold = GroupKFold(n_splits=10)

# Iterate over each target
for target in targets:
    print(f"\nProcessing for {target}:")
    X = df[feature_cols].values
    y = df[target].values
    group_col = df['participant_id'].values


    # List for storing evaluation results
    eval_results = []


    for train_idx, test_idx in group_kfold.split(X, y, group_col):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]
        train_groups, train_group_counts = np.unique(group_col[train_idx], return_counts=True)
        test_groups, test_group_counts = np.unique(group_col[test_idx], return_counts=True)

        # LightGBM parameters
        ranker = lgb.LGBMRanker(
            objective="lambdarank",
            boosting_type="gbdt",
            n_estimators=15,
            importance_type="gain",
            metric="ndcg",
            num_leaves=31,
            learning_rate=0.05,
            max_depth=-1,
            label_gain=[i for i in range(max(y_train.max(), y_test.max()) + 1)]
        )

        # Training the model
        ranker.fit(
            X=X_train,
            y=y_train,
            group=train_group_counts,
            eval_set=[(X_test, y_test)],
            eval_group=[test_group_counts],
            eval_at=[4, 8, 15, 50],
            callbacks=[print_evaluation_callback(period=1)]
        )

        # Collect evaluation results
        print("========================================================================================\n")
        eval_result = ranker.best_score_['valid_0']['ndcg@4'], ranker.best_score_['valid_0']['ndcg@8'],ranker.best_score_['valid_0']['ndcg@15'], ranker.best_score_['valid_0']['ndcg@50']
        eval_results.append(eval_result)

        eval_results.append(eval_result)

        individual_results = evaluate_individual_performance(ranker, X_test, y_test, group_col[test_idx])

        all_evaluation_results.append(individual_results)

# Concatenate and save all individual evaluations
final_results_df = pd.concat(all_evaluation_results, ignore_index=True)
final_results_df.sort_values(by='Participant ID', inplace=True)
output_folder = 'RECOLA Ranking Algorithms/Evaluation/LambdaMART'
output_file = 'individual_evaluation_results_arousal.csv'
final_results_df.to_csv(os.path.join(output_folder, output_file), index=False)

print("Final individual evaluation results saved.")


# Calculate and print average NDCG scores at or each target
avg_ndcg_at_4 = np.mean([result[0] for result in eval_results])
avg_ndcg_at_8 = np.mean([result[1] for result in eval_results])
avg_ndcg_at_15 = np.mean([result[2] for result in eval_results])
avg_ndcg_at_50 = np.mean([result[3] for result in eval_results])

print(f"\nAverage NDCG@4 Score for {target}: {avg_ndcg_at_4:.4f}")
print(f"Average NDCG@8 Score for {target}: {avg_ndcg_at_8:.4f}")
print(f"Average NDCG@15 Score for {target}: {avg_ndcg_at_15:.4f}")
print(f"Average NDCG@50 Score for {target}: {avg_ndcg_at_50:.4f}")


Processing for arousal_quartile:
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010340 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 76853


found 0 physical cores < 1
  File "c:\Users\marco\AppData\Local\Programs\Python\Python311\Lib\site-packages\joblib\externals\loky\backend\context.py", line 282, in _count_physical_cores
    raise ValueError(f"found {cpu_count_physical} physical cores < 1")


[LightGBM] [Info] Number of data points in the train set: 2375, number of used features: 302
valid_0's ndcg@4: 1.000000 valid_0's ndcg@8: 1.000000 valid_0's ndcg@15: 1.000000 valid_0's ndcg@50: 0.966169 
valid_0's ndcg@4: 1.000000 valid_0's ndcg@8: 1.000000 valid_0's ndcg@15: 0.992891 valid_0's ndcg@50: 0.972768 
valid_0's ndcg@4: 1.000000 valid_0's ndcg@8: 1.000000 valid_0's ndcg@15: 0.976916 valid_0's ndcg@50: 0.975626 
valid_0's ndcg@4: 1.000000 valid_0's ndcg@8: 1.000000 valid_0's ndcg@15: 0.984672 valid_0's ndcg@50: 0.973324 
valid_0's ndcg@4: 1.000000 valid_0's ndcg@8: 1.000000 valid_0's ndcg@15: 0.991780 valid_0's ndcg@50: 0.976966 
valid_0's ndcg@4: 1.000000 valid_0's ndcg@8: 1.000000 valid_0's ndcg@15: 0.991780 valid_0's ndcg@50: 0.974594 
valid_0's ndcg@4: 1.000000 valid_0's ndcg@8: 1.000000 valid_0's ndcg@15: 0.991780 valid_0's ndcg@50: 0.974836 
valid_0's ndcg@4: 1.000000 valid_0's ndcg@8: 1.000000 valid_0's ndcg@15: 0.991440 valid_0's ndcg@50: 0.972264 
valid_0's ndcg@4: 1

- Evaluation for valence

In [6]:

def print_evaluation_callback(period=1, show_stdv=True):
    def callback(env):
        if period > 0 and env.iteration % period == 0:
            result = ''
            for data_name, eval_name, value, _ in env.evaluation_result_list:
                if show_stdv:
                    result += f"{data_name}'s {eval_name}: {value:.6f} "
                else:
                    result += f"{data_name}'s {eval_name}: {value} "
            print(result)
    callback.order = 10
    return callback

input_path = os.path.join('RECOLA Ranking Algorithms', 'RECOLA_Intervals_Data','ArousalValenceTimeSeries_Quartiles.csv')

# Load data
df = pd.read_csv(input_path)

def concordance_correlation_coefficient(y_true, y_pred):
    cor = np.corrcoef(y_true, y_pred)[0][1]
    mean_true, mean_pred = np.mean(y_true), np.mean(y_pred)
    var_true, var_pred = np.var(y_true), np.var(y_pred)
    sd_true, sd_pred = np.std(y_true), np.std(y_pred)
    numerator = 2 * cor * sd_true * sd_pred
    denominator = var_true + var_pred + (mean_true - mean_pred)**2
    return numerator / denominator

def pearson_correlation_coefficient(y_true, y_pred):
    return pearsonr(y_true, y_pred)[0]

def kendalls_tau_coefficient(y_true, y_pred):
    return kendalltau(y_true, y_pred)[0]

def evaluate_individual_performance(ranker, X_test, Y_test, group_labels):
    evaluation_results = []

    for participant_id in np.unique(group_labels):
        idx = group_labels == participant_id
        participant_features, participant_labels = X_test[idx], Y_test[idx]

        # Compute mean feature values for the participant
        mean_features = participant_features.mean( axis=0)
        # Pairwise transformation for each window
        pairwise_features = participant_features - mean_features

        # Pass through the trained model and predict the probabilities
        predicted_probabilities = ranker.predict(pairwise_features)

        # Calculate evaluation metrics
        pcc = pearson_correlation_coefficient(participant_labels, predicted_probabilities)
        ccc = concordance_correlation_coefficient(participant_labels, predicted_probabilities)
        kendall_tau = kendalls_tau_coefficient(participant_labels, predicted_probabilities)

        # Store evaluation results
        evaluation_results.append({
            'Participant ID': participant_id,
            'PCC': pcc,
            'CCC': ccc,
            'KendallTau': kendall_tau
        })

    return pd.DataFrame(evaluation_results)
eval_results = []
all_evaluation_results = []

# Select features and target
feature_cols = [col for col in df.columns if col not in ['participant_id', 'median_arousal', 'median_valence', 'time_window', 'arousal_quartile', 'valence_quartile']]
targets = ['valence_quartile']
group_kfold = GroupKFold(n_splits=10)
# Iterate over each target
for target in targets:
    print(f"\nProcessing for {target}:")
    X = df[feature_cols].values
    y = df[target].values
    group_col = df['participant_id'].values

    # List for storing evaluation results
    eval_results = []


    for train_idx, test_idx in group_kfold.split(X, y, group_col):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]
        train_groups, train_group_counts = np.unique(group_col[train_idx], return_counts=True)
        test_groups, test_group_counts = np.unique(group_col[test_idx], return_counts=True)

        # LightGBM parameters
        ranker = lgb.LGBMRanker(
            objective="lambdarank",
            boosting_type="gbdt",
            n_estimators=15,
            importance_type="gain",
            metric="ndcg",
            num_leaves=31,
            learning_rate=0.05,
            max_depth=-1,
            label_gain=[i for i in range(max(y_train.max(), y_test.max()) + 1)]
        )

        # Training the model
        ranker.fit(
            X=X_train,
            y=y_train,
            group=train_group_counts,
            eval_set=[(X_test, y_test)],
            eval_group=[test_group_counts],
            eval_at=[4, 8, 15, 50],
            callbacks=[print_evaluation_callback(period=1)]
        )

        # Collect evaluation results
        print("========================================================================================\n")
        eval_result = ranker.best_score_['valid_0']['ndcg@4'], ranker.best_score_['valid_0']['ndcg@8'],ranker.best_score_['valid_0']['ndcg@15'], ranker.best_score_['valid_0']['ndcg@50']
        eval_results.append(eval_result)

        eval_results.append(eval_result)

        individual_results = evaluate_individual_performance(ranker, X_test, y_test, group_col[test_idx])

        all_evaluation_results.append(individual_results)

# Concatenate and save all individual evaluations
final_results_df = pd.concat(all_evaluation_results, ignore_index=True)
final_results_df.sort_values(by='Participant ID', inplace=True)
output_folder = 'RECOLA Ranking Algorithms/Evaluation/LambdaMART'
output_file = 'individual_evaluation_results_valence.csv'
final_results_df.to_csv(os.path.join(output_folder, output_file), index=False)

print("Final individual evaluation results saved.")


# Calculate and print average NDCG scores at or each target
avg_ndcg_at_4 = np.mean([result[0] for result in eval_results])
avg_ndcg_at_8 = np.mean([result[1] for result in eval_results])
avg_ndcg_at_15 = np.mean([result[2] for result in eval_results])
avg_ndcg_at_50 = np.mean([result[3] for result in eval_results])

print(f"\nAverage NDCG@4 Score for {target}: {avg_ndcg_at_4:.4f}")
print(f"Average NDCG@8 Score for {target}: {avg_ndcg_at_8:.4f}")
print(f"Average NDCG@15 Score for {target}: {avg_ndcg_at_15:.4f}")
print(f"Average NDCG@50 Score for {target}: {avg_ndcg_at_50:.4f}")





Processing for valence_quartile:
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.014585 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 76853
[LightGBM] [Info] Number of data points in the train set: 2375, number of used features: 302
valid_0's ndcg@4: 0.934937 valid_0's ndcg@8: 0.866549 valid_0's ndcg@15: 0.845825 valid_0's ndcg@50: 0.836909 
valid_0's ndcg@4: 0.934937 valid_0's ndcg@8: 0.926518 valid_0's ndcg@15: 0.928581 valid_0's ndcg@50: 0.899090 
valid_0's ndcg@4: 0.861355 valid_0's ndcg@8: 0.910166 valid_0's ndcg@15: 0.932299 valid_0's ndcg@50: 0.890280 
valid_0's ndcg@4: 0.934937 valid_0's ndcg@8: 0.957843 valid_0's ndcg@15: 0.954786 valid_0's ndcg@50: 0.887756 
valid_0's ndcg@4: 0.934937 valid_0's ndcg@8: 0.944544 valid_0's ndcg@15: 0.954035 valid_0's ndcg@50: 0.904177 
valid_0's ndcg@4: 0.934937 valid_0's ndcg@8: 0.957843 valid_0's ndcg@15: 0.948136 valid_0's ndcg@50: 0.886591 
valid_0's 