## Visualisations 

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
project_directory = r'C:\Users\marco\OneDrive\Desktop\Final Year Project'
os.chdir(project_directory)
base_dir = os.getcwd() 

In [10]:
def calculate_ci(mean, std_dev, n):
    margin_error = 1.96 * (std_dev / np.sqrt(n))
    return (mean - margin_error, mean + margin_error)

file_dir = 'AGAIN Ranking Algorithms/Evaluation'

# File paths
algorithm_names = {
    'RandomForest/random_forest_evaluation_results.csv': 'Random Forest\nPreference Learner',
    'Ordinal Logistic Regression/OLR_evaluation_result.csv': 'Ordinal Logistic\nRegression',
    'Ordinal Neural Network/ONN_evaluation_results.csv': 'Ordinal Neural\nNetwork',
    'RankNET/ranknet_evaluation_results_Heist!.csv': 'RankNet',
    'RankNET/ranknet_evaluation_results_Shootout.csv': 'RankNet',
    'RankNET/ranknet_evaluation_results_TopDown.csv': 'RankNet',
    'LambdaMart/individual_evaluation_results.csv': 'LambdaMART',
    'Regression/linear_regression_evaluation_results.csv': 'Linear\nRegression',
    'Regression/random_forest_evaluation_results.csv': 'Random Forest\nRegression',
    'Regression/mlp_evaluation_results.csv': 'Multi Layer\nPerceptron Regression',
}

base_results_dir = 'AGAIN Ranking Algorithms/Evaluation/Visualisations'
means_dir = os.path.join(base_results_dir, 'Means')
std_devs_dir = os.path.join(base_results_dir, 'StandardDeviations')
cis_dir = os.path.join(base_results_dir, 'ConfidenceIntervals')
plots_dir = os.path.join(base_results_dir, 'Plots')

os.makedirs(means_dir, exist_ok=True)
os.makedirs(std_devs_dir, exist_ok=True)
os.makedirs(cis_dir, exist_ok=True)
os.makedirs(plots_dir, exist_ok=True)

model_performance = {}

for file_name, algorithm_name in algorithm_names.items():
    full_path = os.path.join(file_dir, file_name)
    if os.path.exists(full_path):
        data = pd.read_csv(full_path)
        game_column = 'Game Name' if 'Game Name' in data.columns else 'Game'

        # Process data for each game
        for game in data[game_column].unique():
            game_data = data[data[game_column] == game]
            game_data = game_data.dropna(subset=['PCC', 'KendallTau'])

            if not game_data.empty:
                pcc_mean = game_data['PCC'].mean()
                kendall_tau_mean = game_data['KendallTau'].mean()
                pcc_sd = game_data['PCC'].std()
                kendall_tau_sd = game_data['KendallTau'].std()
                sample_size = len(game_data)

                # Calculate confidence intervals
                pcc_ci = calculate_ci(pcc_mean, pcc_sd, sample_size)
                kendall_tau_ci = calculate_ci(kendall_tau_mean, kendall_tau_sd, sample_size)
            
                # Replace invalid characters in file names
                safe_algorithm_name = algorithm_name.replace('\n', '_')

                # Save mean, standard deviation, and CI in separate CSVs
                mean_df = pd.DataFrame({'PCC': [pcc_mean], 'Kendall Tau': [kendall_tau_mean]})
                std_dev_df = pd.DataFrame({'PCC': [pcc_sd], 'Kendall Tau': [kendall_tau_sd]})
                ci_df = pd.DataFrame({'PCC': [pcc_ci], 'Kendall Tau': [kendall_tau_ci]})

                mean_path = os.path.join(means_dir, f'{safe_algorithm_name}_{game}.csv')
                std_dev_path = os.path.join(std_devs_dir, f'{safe_algorithm_name}_{game}.csv')
                ci_path = os.path.join(cis_dir, f'{safe_algorithm_name}_{game}.csv')

                mean_df.to_csv(mean_path, index=False)
                std_dev_df.to_csv(std_dev_path, index=False)
                ci_df.to_csv(ci_path, index=False)
    
                # Store in dictionary for plotting
                if game not in model_performance:
                    model_performance[game] = {}
                model_performance[game][algorithm_name] = (pcc_mean, kendall_tau_mean, pcc_ci, kendall_tau_ci)

# Plotting function
def plot_results(models, pcc_means, kendall_means, pcc_cis, kendall_cis, game, output_path):
    fig, ax = plt.subplots(figsize=(19, 9)) 

    index = np.arange(len(models))
    bar_width = 0.35
    
    pcc_bars = ax.bar(index - bar_width / 2, pcc_means, bar_width, yerr=[(ci[1] - ci[0]) / 2 for ci in pcc_cis], capsize=5, color='purple', label='PCC')
    kendall_bars = ax.bar(index + bar_width / 2, kendall_means, bar_width, yerr=[(ci[1] - ci[0]) / 2 for ci in kendall_cis], capsize=5, color='plum', label='Kendall Tau')
    
    ax.set_xlabel('Algorithms', fontsize=16)
    ax.set_ylabel('Average', fontsize=16)
    ax.set_title(f'Averages for {game} Arousal Ranking and Regression Algorithms', fontsize=18)
    ax.set_xticks(index)
    ax.set_xticklabels(models, rotation=360, ha='center', fontsize=16)
    ax.legend()
    ax.grid(True, alpha=0.3)
    ax.tick_params(axis='y', labelsize=16)
    
    plt.ylim(0, max(max(pcc_means), max(kendall_means)) + 0.1) 
    plt.tight_layout()
    plt.savefig(os.path.join(output_path, f'{game}_comparison.png'))
    plt.close()

# Plot results for each game
for game, algorithms in model_performance.items():
    models = list(algorithms.keys())
    pcc_means = [alg[0] for alg in algorithms.values()]
    kendall_means = [alg[1] for alg in algorithms.values()]
    pcc_cis = [alg[2] for alg in algorithms.values()]
    kendall_cis = [alg[3] for alg in algorithms.values()]

    plot_results(models, pcc_means, kendall_means, pcc_cis, kendall_cis, game, plots_dir)
