In [9]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import csv
from collections import defaultdict

In [10]:
def split_into_size_groups(data: pd.DataFrame):
    # This function splits up the dataframe into 3 corresponding to the sizes of 25x4, 10x10, and 4x25 for analysis by group

    # Contexts' 0-24 are the first 25x4, contexts' 25-34 are the first 10x10, contexts' 35-38 are the first 4x25. then the context value resets
    #   to 0 and this continues
    df_list = []
    size_groups = [(0, 25), (25, 35), (35, 39)]
    for start, end in size_groups:
        df_group = data[(data['context'] >= start) & (data['context'] < end)].reset_index(drop=True)
        df_list.append(df_group)

    return df_list

In [11]:
def unpack_data(filename: str):
    df = pd.read_csv(filename)
    return df

In [12]:
print("DEBUG: Unpacking data")
plot_savename = "/home/ia23938/Documents/GitHub/ValueSystemsAggregation/bluepebble_plots/"
results_path = "/home/ia23938/Documents/GitHub/ValueSystemsAggregation/bluepebble_runs/experiment_results_2024-09-27/"
results_filename = {'egal': "egal_society/egal_societyegal_society.csv", 'norm': "norm_society/norm_societynorm_society.csv", "util": "util_society/util_societyutil_society.csv", "random": "rand_society/rand_societyrand_society.csv"}

data = unpack_data(results_path + results_filename["util"])

#data = split_into_size_groups(data)

#data[0]

#for name, filename in results_filename.items():
#    data = unpack_data(results_path + filename)


DEBUG: Unpacking data


### Max/Min over time

In [13]:
"""
For every context, find the agent that is the worst off in terms of divergence, and store as a list
see if one agent (or a small group of agents in a minority) are consistently the worst off
"""
df_dict = {}
unique_p_values = data['p_value'].unique()
# Split the DataFrame by P_Value
for p_value in unique_p_values:
    df_dict[f'df_p_{p_value}'] = data[data['p_value'] == p_value].reset_index(drop=True)
for key in df_dict.keys():
    df_dict[key] = df_dict[key].sort_values(by=['agent', 'context']).reset_index(drop=True)
results_dict = {}
# Iterate over each DataFrame in df_dict
for key, df in df_dict.items():
    # Group by 'context'
    grouped = df.groupby('context')
    max_min_list = []
    for name, group in grouped:
        max_row = group.loc[group['satisfaction'].idxmax()]
        min_row = group.loc[group['satisfaction'].idxmin()]
        max_min_list.append({
            'context': name,
            'agent_max': max_row['agent'],
            'satisfaction_max': max_row['satisfaction'],
            'agent_min': min_row['agent'],
            'satisfaction_min': min_row['satisfaction']
        })
    results_dict[key] = pd.DataFrame(max_min_list)

# Save results_dict to a .csv file
for key, df in results_dict.items():
    df.to_csv(f"{key}_results.csv", index=False)


group max is:        context  p_value  agent  satisfaction  group
0           0        0    0.0      0.021147      1
1           1        0    0.0      0.664921      2
2           2        0    0.0      0.204440      3
3           3        0    0.0      0.916383      4
4           4        0    0.0      0.608712      5
...       ...      ...    ...           ...    ...
3871       34        0   99.0      0.538164   3872
3872       35        0   99.0      0.428489   3873
3873       36        0   99.0      0.387305   3874
3874       37        0   99.0      0.360441   3875
3875       38        0   99.0      0.441942   3876

[3876 rows x 5 columns]
group min is:        context  p_value  agent  satisfaction  group
0           0        0    0.0      0.021147      1
1           1        0    0.0      0.065461      2
2           2        0    0.0      0.023643      3
3           3        0    0.0      0.044744      4
4           4        0    0.0      0.142438      5
...       ...      ...    .

In [14]:
results_dict['df_p_0_1']

KeyError: 'df_p_0_1'

## Cumulative Divergence

In [46]:
df_dict = {}
unique_p_values = data['p_value'].unique()
print("unique_p_values: ", unique_p_values)
# Split the DataFrame by P_Value
for p_value in unique_p_values:
    df_dict[f'df_p_{p_value}'] = data[data['p_value'] == p_value].reset_index(drop=True)
print("Df keys: ", df_dict.keys())

unique_p_values:  [0 1 2 3]
Df keys:  dict_keys(['df_p_0', 'df_p_1', 'df_p_2', 'df_p_3'])


In [47]:
print(df_dict['df_p_0'])

       context  p_value  agent  satisfaction
0            0        0   78.0      0.193128
1            0        0   69.0      0.123980
2            0        0   68.0      0.326464
3            0        0   25.0      0.022045
4            1        0   41.0      0.162506
...        ...      ...    ...           ...
32395       38        0   42.0      0.185328
32396       38        0   90.0      0.188128
32397       38        0   23.0      0.209505
32398       38        0   62.0      0.221764
32399       38        0   89.0      0.027335

[32400 rows x 4 columns]


In [48]:
results_dict = {}
# Iterate over each DataFrame in df_dict
for key, df in df_dict.items():
    # Group by continuous segments where 'context' is unchanging
    df['group'] = (df['context'] != df['context'].shift()).cumsum()
    # Sum the 'satisfaction' values for each group
    grouped = df.groupby('group').agg({
        'context': 'first',
        'satisfaction': 'sum'
    }).reset_index(drop=True)
    
    # Add the p_value to the grouped DataFrame
    grouped['p_value'] = df['p_value'].iloc[0]
    
    # Store the result in the dictionary with p_value as the key
    results_dict[f'df_p_{grouped["p_value"].iloc[0]}'] = grouped

# Print the keys of the results dictionary to verify
print(results_dict.keys())
print(results_dict['df_p_0'])

dict_keys(['df_p_0', 'df_p_1', 'df_p_2', 'df_p_3'])
      context  satisfaction  p_value
0           0      0.665617        0
1           1      1.076129        0
2           2      1.176029        0
3           3      1.294335        0
4           4      1.594306        0
...       ...           ...      ...
4207       34      1.545451        0
4208       35      5.131808        0
4209       36      5.342436        0
4210       37      4.751475        0
4211       38      3.872615        0

[4212 rows x 3 columns]
