# Three Seconds Intervals (AGAIN Dataset)

In [2]:
# Required Libraries
import os
import joblib
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, kendalltau
from sklearn.model_selection import GroupKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
project_directory = r'C:\Users\marco\OneDrive\Desktop\Final Year Project'
os.chdir(project_directory)
base_dir = os.getcwd() 

In [5]:
base_dir = ('AGAIN Ranking Algorithms')
os.makedirs(base_dir, exist_ok=True)

transformed_dir = os.path.join(base_dir, 'Random Forest Results', 'transformed_data')
os.makedirs(transformed_dir, exist_ok=True) 

evaluation_dir = os.path.join(base_dir, 'Evaluation', 'RandomForest')
os.makedirs(evaluation_dir, exist_ok=True)

def sanitize_game_name(game_name):
    
    invalid_chars = '<>:"/\\|?*'
    for char in invalid_chars:
        game_name = game_name.replace(char, '')
    return game_name

def pairwise_transformation(X, Y, game_name):
    game_name = sanitize_game_name(game_name)  
    
    transformed_data_path =  os.path.join(transformed_dir, f'{game_name}_transformed_data.pkl')
    labels_path = os.path.join(transformed_dir, f'{game_name}_labels.pkl')

    # Check if the files exist
    if os.path.exists(transformed_data_path) and os.path.exists(labels_path):
        print(f"Loading transformed data for {game_name}...")
        transformed_data = joblib.load(transformed_data_path)
        labels = joblib.load(labels_path)
    else:
        print(f"Transforming data for {game_name}...")
        transformed_data, labels = [], []
        for i in range(len(X)):
            for j in range(i + 1, len(X)):
                xi, xj = X[i], X[j]
                yi, yj = Y[i], Y[j]
                if yi > yj:
                    transformed_data.append(xi - xj)
                    labels.append(1)
                    transformed_data.append(xj - xi)
                    labels.append(0)
                elif yi < yj:
                    transformed_data.append(xi - xj)
                    labels.append(0)
                    transformed_data.append(xj - xi)
                    labels.append(1)
        # Save the transformed data
        joblib.dump(np.array(transformed_data), transformed_data_path)
        joblib.dump(np.array(labels), labels_path)

    return np.array(transformed_data), np.array(labels)

def create_windowed_data(input_path, output_path):
    df = pd.read_csv(input_path)

    # Filter for Shooter games
    df = df[df['[control]genre'].isin([ 'Shooter'])]

    # Using the general features
    general_features = [
        '[general]time_passed', '[general]input_intensity',
        '[general]input_diversity', '[general]activity','[general]score', '[general]bot_count',
        '[general]bot_diversity','[general]bot_movement', '[general]player_movement', '[general]object_intensity',
        '[general]object_diversity', '[general]event_intensity', '[general]event_diversity'
    ]

    windowed_data = []

    # Extracting data over a window
    for i in range(0, len(df), 12):
        window = df.iloc[i:i+12]

        # Discard windows of length less than 12 rows
        if len(window) < 12:
            continue

        # Calculating the mean
        averaged_features = window[general_features].mean().to_dict()
        averaged_features['[output]arousal'] = window['[output]arousal'].mean()

        # Extracting related features from the window
        averaged_features['[control]player_id'] = window['[control]player_id'].iloc[0]
        averaged_features['[control]game'] = window['[control]game'].iloc[0]
        averaged_features['[control]genre'] = window['[control]genre'].iloc[0]
        averaged_features['[control]session_id'] = window['[control]session_id'].iloc[0]

        # Appending the computed data to the main list
        windowed_data.append(averaged_features)

    windowed_df = pd.DataFrame(windowed_data)

    # Assigning unique number to each unique player_id
    player_ids = windowed_df['[control]player_id'].unique()
    player_mapping = {player_id: idx for idx, player_id in enumerate(player_ids)}
    windowed_df['player_int_id'] = windowed_df['[control]player_id'].map(player_mapping)

    windowed_df.to_csv(output_path, index=False)


def concordance_correlation_coefficient(y_true, y_pred):
    cor = np.corrcoef(y_true, y_pred)[0][1]
    mean_true, mean_pred = np.mean(y_true), np.mean(y_pred)
    var_true, var_pred = np.var(y_true), np.var(y_pred)
    sd_true, sd_pred = np.std(y_true), np.std(y_pred)
    numerator = 2 * cor * sd_true * sd_pred
    denominator = var_true + var_pred + (mean_true - mean_pred)**2
    ccc = numerator / denominator
    return ccc

def pearson_correlation_coefficient(y_true, y_pred):
    pcc, _ = pearsonr(y_true, y_pred)
    return pcc

def kendalls_tau_coefficient(y_true, y_pred):

    tau, _ = kendalltau(y_true, y_pred)
    return tau

def evaluate_individual_performance(clf, X_test, Y_test, group_labels, game):
    evaluation_results = []

    for participant_id in np.unique(group_labels):

        idx = group_labels == participant_id
        participant_features = X_test[idx]
        participant_labels = Y_test[idx]

        # Compute mean feature values for participant
        mean_features = np.mean(participant_features, axis=0)

        # Pairwise transformation
        transformed_features = participant_features - mean_features

        # Pass the transformed data through the trained RF model to predict probabilities
        predicted_probabilities = clf.predict_proba(transformed_features)[:, 1]

        # Calculate measures with the raw arousal values
        pcc_value = pearsonr(participant_labels, predicted_probabilities)[0]
        ccc_value = concordance_correlation_coefficient(participant_labels, predicted_probabilities)
        kendall_tau_value = kendalltau(participant_labels, predicted_probabilities)[0]

        evaluation_results.append({
            'Game': game,
            'Participant ID': participant_id,
            'PCC': pcc_value,
            'CCC': ccc_value,
            'KendallTau': kendall_tau_value
        })

    return pd.DataFrame(evaluation_results)

# File paths
input_path = os.path.join('Data', 'AGAIN', 'clean_data', 'clean_data.csv')


output_path = os.path.join(base_dir,'intervals_data.csv')

# Creating new file
create_windowed_data(input_path, output_path)
df = pd.read_csv(output_path)

games = df['[control]game'].unique()
train_accuracies = []
test_accuracies = []
accuracy_results_df = pd.DataFrame(columns=['Game', 'Training Accuracy', 'Test Accuracy'])
evaluation_results=[]

# GroupKFold Implementation
group_kfold = GroupKFold(n_splits=10)

# Loops through each games
for game in games:
    game_df = df[df['[control]game'] == game]
    X = game_df[[
        '[general]time_passed', '[general]input_intensity',
        '[general]input_diversity', '[general]activity', '[general]score', '[general]bot_count',
        '[general]bot_diversity', '[general]bot_movement', '[general]player_movement', '[general]object_intensity',
        '[general]object_diversity', '[general]event_intensity', '[general]event_diversity'
    ]].values
    Y = game_df['[output]arousal'].values
    groups = game_df['[control]player_id'].values

    train_accuracies_game = []
    test_accuracies_game = []

    for train_index, test_index in group_kfold.split(X, Y, groups):
        X_train, X_test = X[train_index], X[test_index]
        Y_train, Y_test = Y[train_index], Y[test_index]
        group_labels_test = groups[test_index]

        # Applies pairwise transformation
        X_train_transformed, Y_train_transformed = pairwise_transformation(X_train, Y_train, game)
        X_test_transformed, Y_test_transformed = pairwise_transformation(X_test, Y_test, game)

        # Train the model
        clf = RandomForestClassifier(n_estimators=10, max_depth=10, max_features='sqrt', min_samples_split=4, min_samples_leaf=2, n_jobs=-1)
        clf.fit(X_train_transformed, Y_train_transformed)

        individual_results_df = evaluate_individual_performance(clf, X_test, Y_test, group_labels_test, game)
        evaluation_results.append(individual_results_df)

        # Train and test accuracy
        train_accuracy = accuracy_score(Y_train_transformed, clf.predict(X_train_transformed))
        test_accuracy = accuracy_score(Y_test_transformed, clf.predict(X_test_transformed))

        train_accuracies_game.append(train_accuracy)
        test_accuracies_game.append(test_accuracy)

    # Calculate and append the results for the current game
    avg_train_accuracy = np.mean(train_accuracies_game) * 100
    avg_test_accuracy = np.mean(test_accuracies_game) * 100

    new_row = pd.DataFrame({
        'Game': [game],
        'Training Accuracy': [avg_train_accuracy],
        'Test Accuracy': [avg_test_accuracy]
    })
    accuracy_results_df = pd.concat([accuracy_results_df, new_row], ignore_index=True)

# Save the accuracy results to a CSV file
accuracy_results_file = os.path.join(evaluation_dir, 'accuracy_results.csv')
accuracy_results_df.to_csv(accuracy_results_file, index=False)
print("Accuracy results saved to:", accuracy_results_file)


combined_results_df = pd.concat(evaluation_results, ignore_index=True)
combined_evaluation_results_file = os.path.join(evaluation_dir, 'random_forest_evaluation_results.csv')
combined_results_df.to_csv(combined_evaluation_results_file, index=False)

print("Combined individual performance evaluation results saved.")



  df = pd.read_csv(input_path)


Loading transformed data for Heist!...
Loading transformed data for Heist!...
Loading transformed data for Heist!...
Loading transformed data for Heist!...
Loading transformed data for Heist!...
Loading transformed data for Heist!...
Loading transformed data for Heist!...
Loading transformed data for Heist!...
Loading transformed data for Heist!...
Loading transformed data for Heist!...
Loading transformed data for Heist!...
Loading transformed data for Heist!...
Loading transformed data for Heist!...
Loading transformed data for Heist!...
Loading transformed data for Heist!...
Loading transformed data for Heist!...
Loading transformed data for Heist!...
Loading transformed data for Heist!...
Loading transformed data for Heist!...
Loading transformed data for Heist!...
Loading transformed data for Shootout...
Loading transformed data for Shootout...
Loading transformed data for Shootout...
Loading transformed data for Shootout...
Loading transformed data for Shootout...
Loading transfo