# Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import kruskal, mannwhitneyu
import seaborn as sns

In [None]:
def load_and_process(file_path):
    """Load CSV file and process the data."""
    data = pd.read_csv(file_path)
    
    def convert_str_to_list(list_str):
        list_str = list_str.strip('[]')
        list_str = list_str.split(',')
        return [float(x.strip()) for x in list_str]

    # Apply the conversion to the relevant columns
    data['Time Steps'] = data['Time Steps'].apply(convert_str_to_list)
    data['Gini Over Time'] = data['Gini Over Time'].apply(convert_str_to_list)
    data['Agents Over Time'] = data['Agents Over Time'].apply(convert_str_to_list)
    
    return data

def plot_results(data, title, metric, y_label, filename):
    """Plot the average and 95% CI of the metric over time and save the plot as an image."""
    fig, ax = plt.subplots(figsize=(12, 6))
    fig.patch.set_facecolor('white')
    ax.set_facecolor('white')

    for (tax_scheme, distributer_scheme, tax_rate), group_data in data.groupby(['Tax Scheme', 'Distributer Scheme', 'Tax Rate']):
        time_steps = group_data.iloc[0]['Time Steps']
        values = np.array(group_data[metric].tolist())
        mean_values = np.mean(values, axis=0)
        std_error = np.std(values, axis=0) / np.sqrt(values.shape[0])
        ci95 = 1.96 * std_error

        label = f"{tax_scheme}, {distributer_scheme}, Tax Rate {tax_rate}"
        ax.plot(time_steps, mean_values, label=label)
        ax.fill_between(time_steps, mean_values - ci95, mean_values + ci95, alpha=0.3)

    ax.set_xlabel('Time Steps', fontsize=14)
    ax.set_ylabel(y_label, fontsize=14)
    ax.set_title(f'Average {y_label} Over Time for {title}', fontsize=16)
    ax.tick_params(axis='both', which='major', labelsize=12)
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=12)
    plt.tight_layout()
    plt.savefig(f'plots/{filename}', bbox_inches='tight')
    plt.show()

# Load data from the twelve files
file_paths = [
    'Experimental Results/experiments_results_meta=1_split.csv',
    'Experimental Results/experiments_results_meta=1_top_heavy.csv',
    'Experimental Results/experiments_results_meta=1_uniform.csv',
    'Experimental Results/experiments_results_meta=2_split.csv',
    'Experimental Results/experiments_results_meta=2_top_heavy.csv',
    'Experimental Results/experiments_results_meta=2_uniform.csv',
    'Experimental Results/experiments_results_meta=3_split.csv',
    'Experimental Results/experiments_results_meta=3_top_heavy.csv',
    'Experimental Results/experiments_results_meta=3_uniform.csv',
    'Experimental Results/experiments_results_meta=4_split.csv',
    'Experimental Results/experiments_results_meta=4_top_heavy.csv',
    'Experimental Results/experiments_results_meta=4_uniform.csv',
    'Experimental Results/experiments_results_meta=5_split.csv',
    'Experimental Results/experiments_results_meta=5_top_heavy.csv',
    'Experimental Results/experiments_results_meta=5_uniform.csv'
]

titles = ['meta_1_split', 'meta_1_top_heavy', 'meta_1_uniform',
          'meta_2_split', 'meta_2_top_heavy', 'meta_2_uniform',
          'meta_3_split', 'meta_3_top_heavy', 'meta_3_uniform',
          'meta_4_split', 'meta_4_top_heavy', 'meta_4_uniform',
          'meta_5_split', 'meta_5_top_heavy', 'meta_5_uniform']

all_data = [load_and_process(file_path) for file_path in file_paths]

# Plot results for each file and save the plots
for data, title in zip(all_data, titles):
    plot_results(data, title, 'Gini Over Time', 'Gini Coefficient', f'{title}_gini_over_time.png')
    plot_results(data, title, 'Agents Over Time', 'Number of Agents', f'{title}_agents_over_time.png')


# Boxplot & Statistical Test

In [None]:

def load_and_process(file_path):
    """Load CSV file and process the data."""
    data = pd.read_csv(file_path)
    
    def convert_str_to_list(list_str):
        list_str = list_str.strip('[]')
        list_str = list_str.split(',')
        return [float(x.strip()) for x in list_str]

    # Apply the conversion to the relevant columns
    data['Gini Over Time'] = data['Gini Over Time'].apply(convert_str_to_list)
    
    return data

def summarize_significant_results(results):
    """Summarize significant comparisons with p-values."""
    significant_results = [res for res in results if res['p_value'] < 0.05]
    summary = pd.DataFrame(significant_results)
    return summary

def visualize_gini_distributions(data, title):
    """Visualize Gini coefficient distributions using box plots."""
    fig, ax = plt.subplots(figsize=(14, 8))
    data_to_plot = []
    labels = []

    for (tax_scheme, distributer_scheme, tax_rate), group_data in data.groupby(['Tax Scheme', 'Distributer Scheme', 'Tax Rate']):
        final_gini = [np.mean(gini[-20:]) for gini in group_data['Gini Over Time']]
        data_to_plot.append(final_gini)
        labels.append(f"{tax_scheme}, {distributer_scheme}, Tax Rate {tax_rate}")

    ax.boxplot(data_to_plot)
    ax.set_xticklabels(labels, rotation=90)
    ax.set_ylabel('Average Gini Coefficient (Last 20 Timesteps)')
    ax.set_title(f'Average Gini Coefficient Distributions for {title}')
    plt.tight_layout()
    plt.show()

def identify_best_combination(data, title):
    """Identify the best combination for reducing Gini coefficient."""
    summary = data.groupby(['Tax Scheme', 'Distributer Scheme', 'Tax Rate']).agg(
        mean_gini=('Final Gini', 'mean'),
        std_gini=('Final Gini', 'std')
    ).sort_values(by='mean_gini')
    
    best_combination = summary.iloc[0]
    print(f"Best combination for {title}:")
    print(best_combination)

# Load data from the twelve files
file_paths = [
    'Experimental Results/experiments_results_meta=1_split.csv',
    'Experimental Results/experiments_results_meta=1_top_heavy.csv',
    'Experimental Results/experiments_results_meta=1_uniform.csv',
    'Experimental Results/experiments_results_meta=2_split.csv',
    'Experimental Results/experiments_results_meta=2_top_heavy.csv',
    'Experimental Results/experiments_results_meta=2_uniform.csv',
    'Experimental Results/experiments_results_meta=3_split.csv',
    'Experimental Results/experiments_results_meta=3_top_heavy.csv',
    'Experimental Results/experiments_results_meta=3_uniform.csv',
    'Experimental Results/experiments_results_meta=4_split.csv',
    'Experimental Results/experiments_results_meta=4_top_heavy.csv',
    'Experimental Results/experiments_results_meta=4_uniform.csv',
    'Experimental Results/experiments_results_meta=5_split.csv',
    'Experimental Results/experiments_results_meta=5_top_heavy.csv',
    'Experimental Results/experiments_results_meta=5_uniform.csv'
]

titles = ['meta_1_split', 'meta_1_top_heavy', 'meta_1_uniform',
          'meta_2_split', 'meta_2_top_heavy', 'meta_2_uniform',
          'meta_3_split', 'meta_3_top_heavy', 'meta_3_uniform',
          'meta_4_split', 'meta_4_top_heavy', 'meta_4_uniform',
          'meta_5_split', 'meta_5_top_heavy', 'meta_5_uniform']

all_data = [load_and_process(file_path) for file_path in file_paths]

# Perform analysis for each map
for data, title in zip(all_data, titles):
    # Get the average Gini coefficient for the last 20 timesteps for each run
    data['Final Gini'] = data['Gini Over Time'].apply(lambda x: np.mean(x[-20:]))
    
    # Perform Kruskal-Wallis test
    group_data = [group['Final Gini'].values for name, group in data.groupby(['Tax Scheme', 'Distributer Scheme', 'Tax Rate'])]
    kruskal_result = kruskal(*group_data)
    print(f"Kruskal-Wallis Test for {title}: H-statistic={kruskal_result.statistic}, p-value={kruskal_result.pvalue}")
    
    # If significant, perform pairwise comparisons
    results = []
    if kruskal_result.pvalue < 0.05:
        combinations = data.groupby(['Tax Scheme', 'Distributer Scheme', 'Tax Rate']).groups.keys()
        combinations = list(combinations)
        for i in range(len(combinations)):
            for j in range(i+1, len(combinations)):
                group1 = data[(data['Tax Scheme'] == combinations[i][0]) & 
                              (data['Distributer Scheme'] == combinations[i][1]) & 
                              (data['Tax Rate'] == combinations[i][2])]['Final Gini'].values
                group2 = data[(data['Tax Scheme'] == combinations[j][0]) & 
                              (data['Distributer Scheme'] == combinations[j][1]) & 
                              (data['Tax Rate'] == combinations[j][2])]['Final Gini'].values
                u_statistic, p_value = mannwhitneyu(group1, group2)
                results.append({
                    'combination_1': combinations[i],
                    'combination_2': combinations[j],
                    'u_statistic': u_statistic,
                    'p_value': p_value
                })

    # Summarize significant results
    summary = summarize_significant_results(results)
    print(f"Significant Comparisons for {title}:")
    print(summary)
    
    # Visualize Gini coefficient distributions
    visualize_gini_distributions(data, title)
    
    # Identify the best combination for reducing Gini coefficient
    identify_best_combination(data, title)
