In [1]:
import os
import sys
import scipy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

import import_ipynb
from model import Model, ModelLocBias
from model import fast_optimize as optimize

importing Jupyter notebook from model.ipynb
The source folder is: /Users/aureliennioche/Documents/PythonProjects/ProspecTonk/data/source
The figure folder is: /Users/aureliennioche/Documents/PythonProjects/ProspecTonk/fig
The backup folder is: /Users/aureliennioche/Documents/PythonProjects/ProspecTonk/data/backup


In [2]:
FIG_FOLDER = 'fig'
SOURCE_FOLDER = os.path.join('data', 'source')
BACKUP_FOLDER = os.path.join('data', 'backup')
print(f"The source folder is: {os.path.abspath(SOURCE_FOLDER)}")
print(f"The figure folder is: {os.path.abspath(FIG_FOLDER)}")
print(f"The backup folder is: {os.path.abspath(BACKUP_FOLDER)}")

# Create folders
for f in SOURCE_FOLDER, FIG_FOLDER, BACKUP_FOLDER:
    os.makedirs(f, exist_ok=True)

The source folder is: /Users/aureliennioche/Documents/PythonProjects/ProspecTonk/data/source
The figure folder is: /Users/aureliennioche/Documents/PythonProjects/ProspecTonk/fig
The backup folder is: /Users/aureliennioche/Documents/PythonProjects/ProspecTonk/data/backup


In [3]:
DATASET = "B"
MODEL = ModelLocBias

# Import data

In [4]:
df_bhv = pd.read_csv(os.path.join(BACKUP_FOLDER, f"df_bhv{DATASET}.csv"))
df_bhv.date = pd.to_datetime(df_bhv.date)
df_bhv

Unnamed: 0,monkey,date,c,p0,x0,p1,x1,time_response,left_X,left_Y,...,is_same_x,is_best_left,is_best_right,pair_id,is_control,is_risky,is_neither_risky_nor_control,is_reversed,choose_risky,choose_best
0,Ola,2020-06-25,0,0.75,2,0.75,3,528,1195.0,131.0,...,False,False,True,0,True,False,False,True,False,False
1,Ola,2020-06-25,0,0.50,3,0.25,3,506,1195.0,131.0,...,True,True,False,1,True,False,False,False,False,True
2,Ola,2020-06-25,0,0.75,2,0.75,-2,394,469.0,131.0,...,False,True,False,2,True,False,False,False,False,True
3,Ola,2020-06-25,0,0.25,-2,0.25,-3,396,469.0,131.0,...,False,True,False,3,True,False,False,False,False,True
4,Ola,2020-06-25,0,0.75,-1,0.75,-3,329,469.0,131.0,...,False,True,False,4,True,False,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137292,Alv,2020-10-25,0,0.25,-2,1.00,-1,1074,1195.0,131.0,...,False,False,False,83,False,True,False,False,True,False
137293,Alv,2020-10-25,0,1.00,-2,1.00,-3,1995,469.0,131.0,...,False,True,False,30,True,False,False,False,False,True
137294,Alv,2020-10-25,1,0.25,3,0.75,2,843,1195.0,131.0,...,False,False,False,9,False,True,False,False,False,False
137295,Alv,2020-10-25,0,1.00,1,0.25,1,703,469.0,131.0,...,True,True,False,17,True,False,False,False,False,True


## Trial with Bar

In [5]:
m = "Bar"
cd = "gain" 

# Select the date
df_m = df_bhv[(df_bhv.monkey == m) & (df_bhv.is_risky == True) & (df_bhv[f"is_{cd}"] == True)]

# Drop columns with identical values
nunique = df_m.apply(pd.Series.nunique)
cols_to_drop = nunique[nunique == 1].index
df_m.drop(cols_to_drop, axis=1)

df_m

Unnamed: 0,monkey,date,c,p0,x0,p1,x1,time_response,left_X,left_Y,...,is_same_x,is_best_left,is_best_right,pair_id,is_control,is_risky,is_neither_risky_nor_control,is_reversed,choose_risky,choose_best
35,Bar,2020-02-22,1,1.00,2,0.75,3,2000,469.0,412.0,...,False,False,False,29,False,True,False,True,True,False
155,Bar,2020-02-22,0,0.75,1,0.25,2,1219,469.0,412.0,...,False,False,False,88,False,True,False,True,False,False
162,Bar,2020-02-22,1,0.50,2,1.00,1,1680,469.0,412.0,...,False,False,False,91,False,True,False,False,False,False
172,Bar,2020-02-22,1,0.25,3,0.50,1,2672,469.0,412.0,...,False,False,False,95,False,True,False,False,False,False
180,Bar,2020-02-22,0,0.75,2,0.25,3,1161,469.0,412.0,...,False,False,False,67,False,True,False,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
487324,Bar,2020-06-22,0,0.75,2,0.50,3,624,469.0,412.0,...,False,False,False,56,False,True,False,True,False,False
487326,Bar,2020-06-22,1,0.50,2,0.75,1,854,469.0,412.0,...,False,False,False,14,False,True,False,False,False,False
487329,Bar,2020-06-22,1,0.25,2,1.00,1,589,469.0,412.0,...,False,False,False,130,False,True,False,False,False,False
487812,Bar,2020-06-22,1,0.75,1,0.50,3,1073,469.0,412.0,...,False,False,False,84,False,True,False,True,True,False


### Fit all the data at once...

In [9]:
best_param, best_value = optimize(model=MODEL, data=df_m)
print("param", MODEL.param_labels)
print("param", MODEL.fit_bounds)
print("best_param", best_param)

param ['distortion', 'precision', 'risk_aversion', 'loc_bias']
param [(0.25, 4), (0.1, 10.0), (-3, 0.75), (-10.0, 10.0)]
best_param [ 0.91550932  0.49280347 -0.10914122 -0.11728342]


### Fit the data day by day...

In [None]:
dates = df_m.date.unique()
n_days = len(dates)
n_param = len(Model.param_labels)

r = np.zeros((n_days, n_param))
n_trial = np.zeros(n_days, dtype=int)
for i in tqdm(range(n_days), file=sys.stdout):
    slide = df_m.date == dates[i]
    n_trial[i] = np.sum(slide)
    best_param, best_value = optimize(model=Model, data=df_m[slide])
    r[i] = best_param

df_fit_bar = pd.DataFrame(r, index=dates, columns=Model.param_labels)
df_fit_bar["n"] = n_trial
df_fit_bar

### Fit the data using a window

In [None]:
w = 10

dates = df_m.date.unique()
n_days = len(dates)
n_param = len(Model.param_labels)

r = np.zeros((n_days, n_param))
n_trial = np.zeros(n_days, dtype=int)
for i in tqdm(range(n_days), file=sys.stdout):
    
    # Apply mask for dates
    b0 = dates[max(i-(w-1), 0)]
    b1 = dates[min(i+w, n_days-1)]
    mask = (df_m.date >= b0) & (df_m.date <= b1)
    
    # Optimize
    best_param, best_value = optimize(model=Model, data=df_m[mask])
    r[i] = best_param
    
    # Count thre trials
    n_trial[i] = np.sum(mask)

df_fit_bar_window = pd.DataFrame(r, index=dates, columns=Model.param_labels)
df_fit_bar_window["n"] = n_trial
df_fit_bar_window

## Day by day (with all the monkeys)

### Do the fit

In [None]:
df_fit_day = pd.DataFrame()

cond = "gain", "loss"
monkeys = df_bhv.monkey.unique()

for i_m, m in enumerate(monkeys):

    print(f"monkey {i_m+1}/{len(monkeys)}")

    for cd in cond:
        
        # Select the data
        df_m = df_bhv[(df_bhv.monkey == m) & (df_bhv.is_risky == True) & (df_bhv[f"is_{cd}"] == True)]
        
        # Get the dates
        dates = df_m.date.unique()
        n_days = len(dates)
        
        # Get the number of parameters
        n_param = len(Model.param_labels)
        
        # Optimize for each day separately
        r = np.zeros((n_days, n_param))
        n_trial = np.zeros(n_days, dtype=int)
        for i in tqdm(range(n_days), file=sys.stdout):
            slide = df_m.date == dates[i]
            n_trial[i] = np.sum(slide)
            best_param, best_value = optimize(model=Model, data=df_m[slide])
            r[i] = best_param
        
        # Backup
        df_m_cd = pd.DataFrame(r, columns=Model.param_labels)
        df_m_cd["date"] = dates
        df_m_cd["monkey"] = m
        df_m_cd["condition"] = cd.replace("is_", "")
        df_m_cd["n"] = n_trial
        df_fit_day = pd.concat((df_fit_day, df_m_cd))

df_fit_day.to_csv(os.path.join(BACKUP_FOLDER, "df_fit_day.csv"), index=False)

### Load the results

In [None]:
df_fit_day = pd.read_csv(os.path.join(BACKUP_FOLDER, "df_fit_day.csv"))
df_fit_day.date = pd.to_datetime(df_fit_day.date)
df_fit_day

## Using a slide window (with all the monkeys)

### Dot the fit

In [None]:
# How many days before and after?
w = 10

df_fit = pd.DataFrame()

cond = "gain", "loss"
monkeys = df_bhv.monkey.unique()

for i_m, m in enumerate(monkeys):

    print(f"monkey {i_m+1}/{len(monkeys)}")

    for cd in cond:
        
        # Get data
        df_m = df_bhv[(df_bhv.monkey == m) & (df_bhv.is_risky == True) & (df_bhv[f"is_{cd}"] == True)]
        
        # Get dates
        dates = df_m.date.unique()
        n_days = len(dates)
        n_param = len(Model.param_labels)
        
        # Optimize for each 'windowed' day
        r = np.zeros((n_days, n_param))
        n_trial = np.zeros(n_days, dtype=int)
        for i in tqdm(range(n_days), file=sys.stdout):
            
            # Get the mask
            b0 = dates[max(i-w, 0)]
            b1 = dates[min(i+w, n_days-1)]
            mask = (df_m.date >= b0) & (df_m.date <= b1)
            
            # Optimize
            best_param, best_value = optimize(model=Model, data=df_m[mask])
            r[i] = best_param
            
            # Count thre trials
            n_trial[i] = np.sum(mask)
        
        # Backup
        df_m_cd = pd.DataFrame(r, columns=Model.param_labels)
        df_m_cd["date"] = dates
        df_m_cd["monkey"] = m
        df_m_cd["condition"] = cd
        df_m_cd["n"] = n_trial
        df_fit = pd.concat((df_fit, df_m_cd))

df_fit.to_csv(os.path.join(BACKUP_FOLDER, f"df_fit_w{w}.csv"))

### Load the results

In [None]:
df_fit = pd.read_csv(os.path.join(BACKUP_FOLDER, f"df_fit_w{w}.csv"), index_col=0)
df_fit.date = pd.to_datetime(df_fit.date)
df_fit

# At once

In [11]:
cond = "gain", "loss"
monkeys = df_bhv.monkey.unique()

df_fit_overall = pd.DataFrame()

for i_m, m in tqdm(enumerate(monkeys), file=sys.stdout, total=len(monkeys)):

    for cd in cond:
        
        # Select the data
        df_m = df_bhv[(df_bhv.monkey == m) & (df_bhv.is_risky == True) & (df_bhv[f"is_{cd}"] == True)]
        
        # Get the dates
        dates = df_m.date.unique()
        if len(dates) < 1:
            continue
        
        # Get the number of parameters
        n_param = len(MODEL.param_labels)
        
        # Optimize
        best_param, best_value = optimize(model=MODEL, data=df_m)
        
        # Backup
        df_fit_m = pd.DataFrame(best_param.reshape((1, len(best_param))), columns=MODEL.param_labels)
        df_fit_m["date_begin"] = dates[0]
        df_fit_m["date_end"] = dates[-1]
        df_fit_m["monkey"] = m
        df_fit_m["condition"] = cd
        df_fit_m["n"] = len(df_m)
        df_fit_overall = pd.concat((df_fit_overall, df_fit_m))
        
df_fit_overall.to_csv(os.path.join(BACKUP_FOLDER, f"df_fit_overall{DATASET}_{MODEL.__name__}.csv"))

100%|██████████| 15/15 [00:06<00:00,  2.18it/s]


### Load the results

In [12]:
df_fit_overall = pd.read_csv(os.path.join(BACKUP_FOLDER, f"df_fit_overall{DATASET}_{MODEL.__name__}.csv"))
for c in ("date_begin", "date_end"):
    df_fit_overall[c] = pd.to_datetime(df_fit_overall[c])
df_fit_overall.drop(df_fit_overall.filter(regex="Unname"),axis=1, inplace=True)
df_fit_overall

Unnamed: 0,distortion,precision,risk_aversion,loc_bias,date_begin,date_end,monkey,condition,n
0,1.327775,0.285684,0.206004,0.051842,2020-02-22,2020-06-22,Ala,gain,8326
1,1.051529,0.23133,-0.582592,-0.071694,2020-02-22,2020-06-22,Ala,loss,8285
2,0.768159,0.447276,0.075584,-0.936214,2020-02-22,2020-06-22,Alv,gain,9996
3,0.462213,0.229596,-0.398661,-0.675575,2020-02-22,2020-06-22,Alv,loss,9988
4,0.915509,0.492803,-0.109141,-0.117283,2020-02-22,2020-06-22,Bar,gain,10361
5,0.769959,0.189979,-0.640682,-0.121369,2020-02-22,2020-06-22,Bar,loss,10365
6,0.25,1.612589,0.083753,-10.0,2020-02-23,2020-06-22,Ola,gain,5235
7,0.25,0.3302,-0.903815,-1.570219,2020-02-23,2020-06-22,Ola,loss,5237
8,0.610421,0.704556,0.75,2.518428,2020-05-20,2020-06-18,Yin,gain,389
9,0.888765,2.47764,0.571941,10.0,2020-05-20,2020-06-19,Yin,loss,382
