# Fit models to human data

In [120]:
import json
import pandas as pd
from pathlib import Path
from tqdm.auto import tqdm

import sys
sys.path.append("../src")

import warnings
warnings.filterwarnings("ignore")

In [121]:
from Config.config import PATHS
from Classes.parameter_recovery import ParameterFit
from Classes.cognitive_model_agents import MODELS

### Load human data

In [122]:
# Load data into a dataframe

two_player = False  # Set to True for 2-player UR experiment, False for multi-player IU experiment
if two_player:
	file_name = '2-player-UR.csv' # <= Data from 2-player UR experiment
	best_fits_file = PATHS['parameter_fit_results'] / Path('best_fit_2P.json')
else:
	file_name = '3-player-IU.csv' # <= Data from 3-player IU experiment
	# file_name = 'multi-player.csv' # <= Data from multi-player IU experiment
	best_fits_file = PATHS['parameter_fit_results'] / Path('best_fit_MP.json')

data_folder = PATHS['human_data']
file = data_folder / Path(file_name)
print(f'Loading data from {file}...')

data = pd.read_csv(file)
columns = ['threshold', 'num_players', 'group', 'round', 'player', 'score', 'decision']
drop_columns = [col for col in data.columns if col not in columns]
data.drop(columns=drop_columns, inplace=True)
data.head(2)

Loading data from /Users/edgar/Documents/GitHub/Cognitive_Models_El_Farol_Bar_Problem/data/human/3-player-IU.csv...


Unnamed: 0,threshold,round,player,decision,score,num_players,group
0,0.333333,1.0,406360458182849,1.0,-1.0,3,17
1,0.333333,1.0,679877000681823,1.0,-1.0,3,17


### Select models

In [123]:
# check_out_these = [model.name() for model in MODELS]
# check_out_these = ['Payoff-M2', 'Fairness-M2', 'AvailableSpace-M2']
# check_out_these = ['Attendance-M2', 'MFP-M2', 'Payoff-M2']
# check_out_these = ['Payoff-M2', 'Fairness-M2']
# check_out_these = ['Fairness-M2']
check_out_these = ['WSLS-M2']
# check_out_these = ['FRA']
# check_out_these = ['FRA+Payoff+Attendance']
my_models = [model for model in MODELS if model.name() in check_out_these]

### Fit with scipy

In [124]:
ParameterFit.run(
    data=data,
    model_list=my_models,
    best_fit_path=PATHS['parameter_fit_results'] / 'best_fit_Scipy.json',
    optimizer_type='scipy',
    hyperparameters=None,
    new_file=True
)

Fitting models...:   0%|          | 0/1 [00:00<?, ?it/s]

Fitting data to model WSLS-M2...
Creating parameter recovery class...
Running optimizer...
Finding deviance for 3 players and threshold 0.6666666666666666...
Optimal parameters for 3 players and threshold 0.6666666666666666:
{'inverse_temperature': np.float64(6.325977217774903), 'wsls_strength': np.float64(1.170112101352312), 'heuristic_strength': np.float64(0.6283120880875657)}
Deviance: -3821.571123959583
AIC: 7649.142247919166
--------------------------------------------------
Finding deviance for 3 players and threshold 0.3333333333333333...
Optimal parameters for 3 players and threshold 0.3333333333333333:
{'inverse_temperature': np.float64(1.9802047951962227), 'wsls_strength': np.float64(4.222684892287479), 'heuristic_strength': np.float64(0.015826549111873534)}
Deviance: -3944.655536850956
AIC: 7895.311073701912
--------------------------------------------------
{'model_name': 'WSLS-M2', 'WSLS-M2_N=3_mu=0.6666666666666666': {'model': 'WSLSM2', 'fixed_parameters': {'num_agents': 

### Fit with Bayesian Optimizer

In [125]:
hyperparameters = {
    'init_points':128,
    'n_iter':64
}

ParameterFit.run(
    data=data,
    model_list=my_models,
    best_fit_path=PATHS['parameter_fit_results'] / 'best_fit_Bayesian.json',
    optimizer_type='bayesian',
    hyperparameters=hyperparameters,
    new_file=True
)

Fitting models...:   0%|          | 0/1 [00:00<?, ?it/s]

Fitting data to model WSLS-M2...
Creating parameter recovery class...
Running optimizer...
Finding deviance for 3 players and threshold 0.6666666666666666...
|   iter    |  target   | invers... | wsls_s... | heuris... |
-------------------------------------------------------------
| [39m1        [39m | [39m-3600.554[39m | [39m27.272386[39m | [39m7.2032449[39m | [39m0.0001143[39m |
| [39m2        [39m | [39m-3600.554[39m | [39m20.046952[39m | [39m1.4675589[39m | [39m0.0923385[39m |
| [39m3        [39m | [39m-3655.809[39m | [39m12.734393[39m | [39m3.4556072[39m | [39m0.3967674[39m |
| [39m4        [39m | [39m-3821.571[39m | [39m34.945454[39m | [39m4.1919451[39m | [39m0.6852195[39m |
| [39m5        [39m | [39m-3600.554[39m | [39m13.880491[39m | [39m8.7811743[39m | [39m0.0273875[39m |
| [39m6        [39m | [39m-3821.571[39m | [39m43.239453[39m | [39m4.1730480[39m | [39m0.5586898[39m |
| [39m7        [39m | [39m-3821.571[39m | 

### Keep best fit from both optimizers

In [126]:
# Load the best fit results from both optimizers
df_Scipy = pd.read_json(PATHS['parameter_fit_results'] / 'best_fit_Scipy.json', lines=True)
df_Bayesian = pd.read_json(PATHS['parameter_fit_results'] / 'best_fit_Bayesian.json', lines=True)

df_list = []
best_optimizer = []
# Iterate through each model and compare the deviance from both optimizers
for model in my_models:
    best_fits = {'model_name': model.name()}
    # Filter the dataframes for the current model
    df1_Scipy = df_Scipy[df_Scipy['model_name'] == model.name()]
    df1_Bayesian = df_Bayesian[df_Bayesian['model_name'] == model.name()]
    # Iterate through each num_agents and threshold combination
    cols = [col for col in df1_Bayesian.columns if col != 'model_name']
    for col in cols:
        num_agents = df1_Scipy[col].values[0]['fixed_parameters']['num_agents']
        threshold = df1_Scipy[col].values[0]['fixed_parameters']['threshold']
        best_fits['num_agents'] = num_agents
        best_fits['threshold'] = threshold
        deviance_Scipy = df1_Scipy[col].values[0]['deviance']
        deviance_Bayesian = df1_Bayesian[col].values[0]['deviance']
        # Compare deviance from both optimizers
        if deviance_Scipy > deviance_Bayesian:
            best_optimizer.append('Scipy')
            best_fits['deviance'] = deviance_Scipy
            best_fits['AIC'] = df1_Scipy[col].values[0]['AIC']
            best_fits['free_parameters'] = df1_Scipy[col].values[0]['free_parameters']
        else:
            best_optimizer.append('Bayesian')
            best_fits['deviance'] = deviance_Bayesian
            best_fits['AIC'] = df1_Bayesian[col].values[0]['AIC']
            best_fits['free_parameters'] = df1_Bayesian[col].values[0]['free_parameters']
        best_fits['fixed_parameters'] = df1_Scipy[col].values[0]['fixed_parameters']
        df = pd.DataFrame().from_dict(best_fits, orient='index').T
        df_list.append(df)
df_best_fits = pd.concat(df_list, ignore_index=True)
print('Otimizer with best results:')
pd.Series(best_optimizer).value_counts(normalize=True).reset_index().rename(columns={'index': 'optimizer', 0: 'proportion'}).sort_values(by='proportion', ascending=False)


Otimizer with best results:


Unnamed: 0,optimizer,proportion
0,Bayesian,1.0


### Save on file

In [130]:
new_file = False

if new_file:
    df_best_fits.to_json(best_fits_file, orient='records', lines=True)
else:
    best_fit_old = pd.read_json(best_fits_file, lines=True)
    for model_name in df_best_fits['model_name'].unique():
        print(f'Processing model: {model_name}')
        df_model = df_best_fits[df_best_fits['model_name'] == model_name]
        if model_name not in best_fit_old['model_name'].unique():
            best_fit_new = pd.concat([best_fit_old, df_model], ignore_index=True)
        else:
            list_fixed_parameters_new = df_model['fixed_parameters'].values
            for fixed_parameters_new in list_fixed_parameters_new:
                print(f'Processing fixed parameters: {fixed_parameters_new}')
                df_parameters = df_model[df_model['fixed_parameters'] == fixed_parameters_new]
                mask = (best_fit_old['model_name'] == model_name) & (best_fit_old['fixed_parameters'] == fixed_parameters_new)
                if mask.sum() == 0:
                    best_fit_new = pd.concat([best_fit_old, df_parameters], ignore_index=True)
                else:
                    # Update the existing entry with the new data
                    best_fit_new = best_fit_old.copy()                    
                    best_fit_new.loc[mask, df_parameters.columns] = df_parameters.values
                

    best_fit_new.to_json(best_fits_file, orient='records', lines=True)

Processing model: WSLS-M2


---