# Fit models to human data

In [80]:
import json
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm.auto import tqdm

import sys
sys.path.append("../src")

import warnings
warnings.filterwarnings("ignore")

In [81]:
from Config.config import PATHS
from Classes.parameter_recovery import ParameterFit
from Classes.cognitive_model_agents import MODELS

### Load human data

In [82]:
# Load data into a dataframe

two_player = False  # Set to True for 2-player UR experiment, False for multi-player IU experiment
if two_player:
	file_name = '2-player-UR.csv' # <= Data from 2-player UR experiment
	best_fits_file = PATHS['parameter_fit_results'] / Path('best_fit_2P.json')
else:
	# file_name = '3-player-IU.csv' # <= Data from 3-player IU experiment
	file_name = 'multi-player.csv' # <= Data from multi-player IU experiment
	best_fits_file = PATHS['parameter_fit_results'] / Path('best_fit_MP.json')

data_folder = PATHS['human_data']
file = data_folder / Path(file_name)
print(f'Loading data from {file}...')

data = pd.read_csv(file)
columns = ['threshold', 'num_players', 'group', 'round', 'player', 'score', 'decision']
drop_columns = [col for col in data.columns if col not in columns]
data.drop(columns=drop_columns, inplace=True)
data.head(2)

Loading data from C:\Users\andra\Documents\Cognitive_Models_El_Farol_Bar_Problem\data\human\multi-player.csv...


Unnamed: 0,threshold,round,player,decision,score,num_players,group
0,0.875,1.0,589037721408239,1.0,1.0,8,7
1,0.875,1.0,586994455133371,1.0,1.0,8,7


### Select models

In [83]:
# check_out_these = [model.name() for model in MODELS]
# check_out_these = ['Attendance-M1', 'Attendance-M2', 'Attendance-M3'] 
# check_out_these = ['WSLS-M1', 'WSLS-M2', 'WSLS-M3']
check_out_these = ['AvailableSpace-M1', 'AvailableSpace-M2', 'AvailableSpace-M3']
# check_out_these = ['Payoff-M1', 'Payoff-M2', 'Payoff-M3']
# check_out_these = ['Payoff-M2', 'Fairness-M2', 'AvailableSpace-M2']
# check_out_these = ['Attendance-M2', 'MFP-M2', 'Payoff-M2']
# check_out_these = ['Payoff-M2', 'Fairness-M2']
# check_out_these = ['Fairness-M2']
# check_out_these = ['FRA']
# check_out_these = ['FRA+Payoff+Attendance']
my_models = [model for model in MODELS if model.name() in check_out_these]

### Fit with scipy

In [84]:
ParameterFit.run(
    data=data,
    model_list=my_models,
    best_fit_path=PATHS['parameter_fit_results'] / 'best_fit_Scipy.json',
    optimizer_type='scipy',
    hyperparameters=None,
    new_file=True
)

Fitting models...:   0%|          | 0/3 [00:00<?, ?it/s]

Fitting data to model AvailableSpace-M1...
Creating parameter recovery class...
Running optimizer...
Finding deviance for 4 players and threshold 0.25...
Optimal parameters for 4 players and threshold 0.25:
{'inverse_temperature': 16.159105404416895, 'learning_rate': 0.0025450583580916247, 'bias': 0.14096270398875876}
Deviance: -1259.414198987431
AIC: 2524.828397974862
--------------------------------------------------
Finding deviance for 4 players and threshold 0.5...
Optimal parameters for 4 players and threshold 0.5:
{'inverse_temperature': 1.0, 'learning_rate': 0.022937723301753067, 'bias': 0.7168647568418854}
Deviance: -1371.826215068871
AIC: 2749.652430137742
--------------------------------------------------
Finding deviance for 4 players and threshold 0.75...
Optimal parameters for 4 players and threshold 0.75:
{'inverse_temperature': 14.286750344645174, 'learning_rate': 0.01018831343442416, 'bias': 0.7149883543254818}
Deviance: -1139.6672488790523
AIC: 2285.3344977581046
----

### Fit with Bayesian Optimizer

In [85]:
hyperparameters = {
    'init_points':128,
    'n_iter':64
}

ParameterFit.run(
    data=data,
    model_list=my_models,
    best_fit_path=PATHS['parameter_fit_results'] / 'best_fit_Bayesian.json',
    optimizer_type='bayesian',
    hyperparameters=hyperparameters,
    new_file=True
)

Fitting models...:   0%|          | 0/3 [00:00<?, ?it/s]

Fitting data to model AvailableSpace-M1...
Creating parameter recovery class...
Running optimizer...
Finding deviance for 4 players and threshold 0.25...
Optimal parameters for 4 players and threshold 0.25:
{'bias': 0.01988013383979559, 'inverse_temperature': 2.6512921732963144, 'learning_rate': 0.028306488020794607}
Deviance: -1280.6087342365731
AIC: 2567.2174684731463
--------------------------------------------------
Finding deviance for 4 players and threshold 0.5...
Optimal parameters for 4 players and threshold 0.5:
{'bias': 0.417022004702574, 'inverse_temperature': 46.38044308685596, 'learning_rate': 0.00011437481734488664}
Deviance: -1373.6608184344232
AIC: 2753.3216368688463
--------------------------------------------------
Finding deviance for 4 players and threshold 0.75...
Optimal parameters for 4 players and threshold 0.75:
{'bias': 0.28777533858634874, 'inverse_temperature': 9.191800043451494, 'learning_rate': 0.019366957870297075}
Deviance: -1142.3493854371504
AIC: 2290

### Keep best fit from both optimizers

In [86]:
# Load the best fit results from both optimizers
df_Scipy = pd.read_json(PATHS['parameter_fit_results'] / 'best_fit_Scipy.json', lines=True)
df_Bayesian = pd.read_json(PATHS['parameter_fit_results'] / 'best_fit_Bayesian.json', lines=True)

df_list = []
best_optimizer = []
# Iterate through each model and compare the deviance from both optimizers
for model in my_models:
    best_fits = {'model_name': model.name()}
    # Filter the dataframes for the current model
    df1_Scipy = df_Scipy[df_Scipy['model_name'] == model.name()]
    df1_Bayesian = df_Bayesian[df_Bayesian['model_name'] == model.name()]
    # Iterate through each num_agents and threshold combination
    cols = [col for col in df1_Bayesian.columns if col != 'model_name']
    for col in cols:
        if not isinstance(df1_Scipy[col].values[0], dict):
            continue
        num_agents = df1_Scipy[col].values[0]['fixed_parameters']['num_agents']
        threshold = df1_Scipy[col].values[0]['fixed_parameters']['threshold']
        best_fits['num_agents'] = num_agents
        best_fits['threshold'] = threshold
        deviance_Scipy = df1_Scipy[col].values[0]['deviance']
        deviance_Bayesian = df1_Bayesian[col].values[0]['deviance']
        # Compare deviance from both optimizers
        if deviance_Scipy > deviance_Bayesian:
            best_optimizer.append('Scipy')
            best_fits['deviance'] = deviance_Scipy
            best_fits['AIC'] = df1_Scipy[col].values[0]['AIC']
            best_fits['free_parameters'] = df1_Scipy[col].values[0]['free_parameters']
        else:
            best_optimizer.append('Bayesian')
            best_fits['deviance'] = deviance_Bayesian
            best_fits['AIC'] = df1_Bayesian[col].values[0]['AIC']
            best_fits['free_parameters'] = df1_Bayesian[col].values[0]['free_parameters']
        best_fits['fixed_parameters'] = df1_Scipy[col].values[0]['fixed_parameters']
        df = pd.DataFrame().from_dict(best_fits, orient='index').T
        df_list.append(df)
df_best_fits = pd.concat(df_list, ignore_index=True)
print('Otimizer with best results:')
pd.Series(best_optimizer).value_counts(normalize=True).reset_index().rename(columns={'index': 'optimizer', 0: 'proportion'}).sort_values(by='proportion', ascending=False)


Otimizer with best results:


Unnamed: 0,optimizer,proportion
0,Scipy,0.615385
1,Bayesian,0.384615


### Save on file

In [87]:
new_file = False # Set to True if you want to create a new file, False to update the existing one

if new_file:
    df_best_fits.to_json(best_fits_file, orient='records', lines=True)
else:
    best_fit = pd.read_json(best_fits_file, lines=True)
    for model_name in df_best_fits['model_name'].unique():
        df_model = df_best_fits[df_best_fits['model_name'] == model_name]
        if model_name not in best_fit['model_name'].unique():
            print(f'Adding new model: {model_name}')
            best_fit = pd.concat([best_fit, df_model], ignore_index=True)
        else:
            list_fixed_parameters_new = df_model['fixed_parameters'].values
            for fixed_parameters_new in list_fixed_parameters_new:
                df_parameters = df_model[df_model['fixed_parameters'] == fixed_parameters_new]
                mask = (best_fit['model_name'] == model_name) & (best_fit['fixed_parameters'] == fixed_parameters_new)
                if mask.sum() == 0:
                    best_fit = pd.concat([best_fit, df_parameters], ignore_index=True)
                else:
                    # Update the existing entry with the new data
                    best_fit = best_fit.copy()                    
                    best_fit.loc[mask, df_parameters.columns] = df_parameters.values
                

    best_fit.to_json(best_fits_file, orient='records', lines=True)
    
print(f'Best fit results saved to {best_fits_file}')

Adding new model: AvailableSpace-M1
Adding new model: AvailableSpace-M2
Adding new model: AvailableSpace-M3
Best fit results saved to C:\Users\andra\Documents\Cognitive_Models_El_Farol_Bar_Problem\reports\MLE\best_fit_MP.json


---