In [6]:
import os
import sys
import scipy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

import import_ipynb
from model import Model 
from model import fast_optimize as optimize

In [7]:
FIG_FOLDER = 'fig'
SOURCE_FOLDER = os.path.join('data', 'source')
BACKUP_FOLDER = os.path.join('data', 'backup')
print(f"The source folder is: {os.path.abspath(SOURCE_FOLDER)}")
print(f"The figure folder is: {os.path.abspath(FIG_FOLDER)}")
print(f"The backup folder is: {os.path.abspath(BACKUP_FOLDER)}")

# Create folders
for f in SOURCE_FOLDER, FIG_FOLDER, BACKUP_FOLDER:
    os.makedirs(f, exist_ok=True)

The source folder is: /Users/aureliennioche/Documents/PythonProjects/ProspecTonk/data/source
The figure folder is: /Users/aureliennioche/Documents/PythonProjects/ProspecTonk/fig
The backup folder is: /Users/aureliennioche/Documents/PythonProjects/ProspecTonk/data/backup


# Import data

In [8]:
df_bhv = pd.read_csv(os.path.join(BACKUP_FOLDER, "df_bhv.csv"))
df_bhv.date = pd.to_datetime(df_bhv.date)
df_bhv

Unnamed: 0,monkey,date,c,p0,x0,p1,x1,time_response,left_X,left_Y,...,is_same_x,is_best_left,is_best_right,pair_id,is_control,is_risky,is_neither_risky_nor_control,is_reversed,choose_risky,choose_best
0,Ola,2020-06-25,0,0.75,2,0.75,3,528,1195.0,131.0,...,False,False,True,0,True,False,False,True,False,False
1,Ola,2020-06-25,0,0.50,3,0.25,3,506,1195.0,131.0,...,True,True,False,1,True,False,False,False,False,True
2,Ola,2020-06-25,0,0.75,2,0.75,-2,394,469.0,131.0,...,False,True,False,2,True,False,False,False,False,True
3,Ola,2020-06-25,0,0.25,-2,0.25,-3,396,469.0,131.0,...,False,True,False,3,True,False,False,False,False,True
4,Ola,2020-06-25,0,0.75,-1,0.75,-3,329,469.0,131.0,...,False,True,False,4,True,False,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137292,Alv,2020-10-25,0,0.25,-2,1.00,-1,1074,1195.0,131.0,...,False,False,False,83,False,True,False,False,True,False
137293,Alv,2020-10-25,0,1.00,-2,1.00,-3,1995,469.0,131.0,...,False,True,False,30,True,False,False,False,False,True
137294,Alv,2020-10-25,1,0.25,3,0.75,2,843,1195.0,131.0,...,False,False,False,9,False,True,False,False,False,False
137295,Alv,2020-10-25,0,1.00,1,0.25,1,703,469.0,131.0,...,True,True,False,17,True,False,False,False,False,True


## Trial with Bar

In [9]:
m = "Bar"
cd = "gain" 

# Select the date
df_m = df_bhv[(df_bhv.monkey == m) & (df_bhv.is_risky == True) & (df_bhv[f"is_{cd}"] == True)]

# Drop columns with identical values
nunique = df_m.apply(pd.Series.nunique)
cols_to_drop = nunique[nunique == 1].index
df_m.drop(cols_to_drop, axis=1)

df_m

Unnamed: 0,monkey,date,c,p0,x0,p1,x1,time_response,left_X,left_Y,...,is_same_x,is_best_left,is_best_right,pair_id,is_control,is_risky,is_neither_risky_nor_control,is_reversed,choose_risky,choose_best
618,Bar,2020-06-25,1,0.25,3,0.75,1,666,1195.0,131.0,...,False,False,False,88,False,True,False,False,False,False
628,Bar,2020-06-25,1,0.25,3,0.50,2,655,1195.0,131.0,...,False,False,False,43,False,True,False,False,False,False
642,Bar,2020-06-25,1,1.00,1,0.75,3,845,1195.0,131.0,...,False,False,False,15,False,True,False,True,True,False
672,Bar,2020-06-25,1,0.50,3,0.75,2,1374,469.0,131.0,...,False,False,False,81,False,True,False,False,False,False
697,Bar,2020-06-25,1,0.75,1,0.50,2,1094,469.0,131.0,...,False,False,False,12,False,True,False,True,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137103,Bar,2020-10-25,1,1.00,1,0.50,2,584,469.0,131.0,...,False,False,False,29,False,True,False,True,True,False
137117,Bar,2020-10-25,1,1.00,1,0.25,2,635,469.0,131.0,...,False,False,False,99,False,True,False,True,True,False
137122,Bar,2020-10-25,0,0.50,3,0.75,1,701,469.0,131.0,...,False,False,False,23,False,True,False,False,True,False
137127,Bar,2020-10-25,1,0.25,3,0.50,1,577,1195.0,131.0,...,False,False,False,95,False,True,False,False,False,False


### Fit all the data at once...

In [10]:
best_param, best_value = optimize(model=Model, data=df_m)
print("best_param", best_param)

best_param [ 0.72158789  0.72756213 -0.20691694]


### Fit the data day by day...

In [11]:
dates = df_m.date.unique()
n_days = len(dates)
n_param = len(Model.param_labels)

r = np.zeros((n_days, n_param))
n_trial = np.zeros(n_days, dtype=int)
for i in tqdm(range(n_days), file=sys.stdout):
    slide = df_m.date == dates[i]
    n_trial[i] = np.sum(slide)
    best_param, best_value = optimize(model=Model, data=df_m[slide])
    r[i] = best_param

df_fit_bar = pd.DataFrame(r, index=dates, columns=Model.param_labels)
df_fit_bar["n"] = n_trial
df_fit_bar

100%|██████████| 113/113 [00:02<00:00, 41.24it/s]


Unnamed: 0,distortion,precision,risk_aversion,n
2020-06-25,0.831596,1.281070,-0.295729,115
2020-06-26,0.930450,1.079993,-0.067523,138
2020-06-27,0.869438,0.615552,0.111349,116
2020-06-28,0.935447,0.585639,-0.097054,118
2020-06-29,0.744479,0.490433,-0.255992,130
...,...,...,...,...
2020-10-21,0.618827,0.223106,0.466525,14
2020-10-22,0.606719,0.100000,-0.135914,12
2020-10-23,1.261552,0.100000,-0.289819,12
2020-10-24,0.309839,0.251927,0.398676,6


### Fit the data using a window

In [12]:
w = 10

dates = df_m.date.unique()
n_days = len(dates)
n_param = len(Model.param_labels)

r = np.zeros((n_days, n_param))
n_trial = np.zeros(n_days, dtype=int)
for i in tqdm(range(n_days), file=sys.stdout):
    
    # Apply mask for dates
    b0 = dates[max(i-(w-1), 0)]
    b1 = dates[min(i+w, n_days-1)]
    mask = (df_m.date >= b0) & (df_m.date <= b1)
    
    # Optimize
    best_param, best_value = optimize(model=Model, data=df_m[mask])
    r[i] = best_param
    
    # Count thre trials
    n_trial[i] = np.sum(mask)

df_fit_bar_window = pd.DataFrame(r, index=dates, columns=Model.param_labels)
df_fit_bar_window["n"] = n_trial
df_fit_bar_window

100%|██████████| 113/113 [00:03<00:00, 30.13it/s]


Unnamed: 0,distortion,precision,risk_aversion,n
2020-06-25,0.861389,0.887938,-0.260100,855
2020-06-26,0.834342,0.865360,-0.257379,873
2020-06-27,0.802773,0.872041,-0.281786,902
2020-06-28,0.795199,0.874206,-0.281777,914
2020-06-29,0.801317,0.876550,-0.282040,926
...,...,...,...,...
2020-10-21,0.559044,0.270883,0.069137,160
2020-10-22,0.568057,0.273317,0.092846,149
2020-10-23,0.586313,0.281441,0.111589,137
2020-10-24,1.533834,0.617392,0.143454,125


## Day by day (with all the monkeys)

### Do the fit

In [13]:
df_fit_day = pd.DataFrame()

cond = "gain", "loss"
monkeys = df_bhv.monkey.unique()

for i_m, m in enumerate(monkeys):

    print(f"monkey {i_m+1}/{len(monkeys)}")

    for cd in cond:
        
        # Select the data
        df_m = df_bhv[(df_bhv.monkey == m) & (df_bhv.is_risky == True) & (df_bhv[f"is_{cd}"] == True)]
        
        # Get the dates
        dates = df_m.date.unique()
        n_days = len(dates)
        
        # Get the number of parameters
        n_param = len(Model.param_labels)
        
        # Optimize for each day separately
        r = np.zeros((n_days, n_param))
        n_trial = np.zeros(n_days, dtype=int)
        for i in tqdm(range(n_days), file=sys.stdout):
            slide = df_m.date == dates[i]
            n_trial[i] = np.sum(slide)
            best_param, best_value = optimize(model=Model, data=df_m[slide])
            r[i] = best_param
        
        # Backup
        df_m_cd = pd.DataFrame(r, columns=Model.param_labels)
        df_m_cd["date"] = dates
        df_m_cd["monkey"] = m
        df_m_cd["condition"] = cd.replace("is_", "")
        df_m_cd["n"] = n_trial
        df_fit_day = pd.concat((df_fit_day, df_m_cd))

df_fit_day.to_csv(os.path.join(BACKUP_FOLDER, "df_fit_day.csv"), index=False)

monkey 1/15
100%|██████████| 86/86 [00:01<00:00, 52.30it/s]
100%|██████████| 86/86 [00:01<00:00, 52.73it/s]
monkey 2/15
100%|██████████| 103/103 [00:02<00:00, 50.40it/s]
100%|██████████| 103/103 [00:02<00:00, 42.60it/s]
monkey 3/15
100%|██████████| 60/60 [00:01<00:00, 48.74it/s]
100%|██████████| 60/60 [00:01<00:00, 38.63it/s]
monkey 4/15
100%|██████████| 113/113 [00:02<00:00, 43.43it/s]
100%|██████████| 113/113 [00:03<00:00, 37.39it/s]
monkey 5/15
100%|██████████| 66/66 [00:01<00:00, 51.37it/s]
100%|██████████| 66/66 [00:01<00:00, 53.48it/s]
monkey 6/15
100%|██████████| 111/111 [00:02<00:00, 41.97it/s]
100%|██████████| 111/111 [00:02<00:00, 40.91it/s]
monkey 7/15
100%|██████████| 113/113 [00:02<00:00, 41.58it/s]
100%|██████████| 113/113 [00:03<00:00, 34.77it/s]
monkey 8/15
100%|██████████| 106/106 [00:02<00:00, 43.99it/s]
100%|██████████| 106/106 [00:02<00:00, 39.67it/s]
monkey 9/15
100%|██████████| 98/98 [00:01<00:00, 49.40it/s]
100%|██████████| 98/98 [00:02<00:00, 42.50it/s]
monkey 1

### Load the results

In [14]:
df_fit_day = pd.read_csv(os.path.join(BACKUP_FOLDER, "df_fit_day.csv"))
df_fit_day.date = pd.to_datetime(df_fit_day.date)
df_fit_day

Unnamed: 0,distortion,precision,risk_aversion,date,monkey,condition,n
0,0.200000,10.000000,0.990000,2020-06-25,Ola,gain,46
1,1.800000,2.298493,0.095398,2020-06-26,Ola,gain,13
2,1.800000,0.641667,0.929759,2020-06-27,Ola,gain,18
3,0.200000,0.487869,0.004734,2020-06-28,Ola,gain,12
4,1.192238,0.917417,-0.192953,2020-06-29,Ola,gain,23
...,...,...,...,...,...,...,...
2607,0.200000,10.000000,0.990000,2020-10-20,Yin,loss,3
2608,0.200000,1.038210,-0.879448,2020-10-21,Yin,loss,6
2609,0.784174,0.100000,-0.990000,2020-10-22,Yin,loss,6
2610,1.800000,1.078414,0.873188,2020-10-24,Yin,loss,6


## Using a slide window (with all the monkeys)

### Dot the fit

In [15]:
# How many days before and after?
w = 10

df_fit = pd.DataFrame()

cond = "gain", "loss"
monkeys = df_bhv.monkey.unique()

for i_m, m in enumerate(monkeys):

    print(f"monkey {i_m+1}/{len(monkeys)}")

    for cd in cond:
        
        # Get data
        df_m = df_bhv[(df_bhv.monkey == m) & (df_bhv.is_risky == True) & (df_bhv[f"is_{cd}"] == True)]
        
        # Get dates
        dates = df_m.date.unique()
        n_days = len(dates)
        n_param = len(Model.param_labels)
        
        # Optimize for each 'windowed' day
        r = np.zeros((n_days, n_param))
        n_trial = np.zeros(n_days, dtype=int)
        for i in tqdm(range(n_days), file=sys.stdout):
            
            # Get the mask
            b0 = dates[max(i-w, 0)]
            b1 = dates[min(i+w, n_days-1)]
            mask = (df_m.date >= b0) & (df_m.date <= b1)
            
            # Optimize
            best_param, best_value = optimize(model=Model, data=df_m[mask])
            r[i] = best_param
            
            # Count thre trials
            n_trial[i] = np.sum(mask)
        
        # Backup
        df_m_cd = pd.DataFrame(r, columns=Model.param_labels)
        df_m_cd["date"] = dates
        df_m_cd["monkey"] = m
        df_m_cd["condition"] = cd
        df_m_cd["n"] = n_trial
        df_fit = pd.concat((df_fit, df_m_cd))

df_fit.to_csv(os.path.join(BACKUP_FOLDER, f"df_fit_w{w}.csv"))

monkey 1/15
100%|██████████| 86/86 [00:01<00:00, 77.66it/s]
100%|██████████| 86/86 [00:01<00:00, 52.46it/s]
monkey 2/15
100%|██████████| 103/103 [00:02<00:00, 51.03it/s]
100%|██████████| 103/103 [00:03<00:00, 26.19it/s]
monkey 3/15
100%|██████████| 60/60 [00:01<00:00, 49.40it/s]
100%|██████████| 60/60 [00:02<00:00, 25.05it/s]
monkey 4/15
100%|██████████| 113/113 [00:02<00:00, 41.29it/s]
100%|██████████| 113/113 [00:04<00:00, 24.44it/s]
monkey 5/15
100%|██████████| 66/66 [00:01<00:00, 60.32it/s]
100%|██████████| 66/66 [00:01<00:00, 43.99it/s]
monkey 6/15
100%|██████████| 111/111 [00:03<00:00, 34.21it/s]
100%|██████████| 111/111 [00:04<00:00, 26.14it/s]
monkey 7/15
100%|██████████| 113/113 [00:03<00:00, 30.34it/s]
100%|██████████| 113/113 [00:04<00:00, 23.08it/s]
monkey 8/15
100%|██████████| 106/106 [00:03<00:00, 32.74it/s]
100%|██████████| 106/106 [00:04<00:00, 26.18it/s]
monkey 9/15
100%|██████████| 98/98 [00:02<00:00, 47.09it/s]
100%|██████████| 98/98 [00:03<00:00, 26.25it/s]
monkey 1

### Load the results

In [16]:
df_fit = pd.read_csv(os.path.join(BACKUP_FOLDER, f"df_fit_w{w}.csv"), index_col=0)
df_fit.date = pd.to_datetime(df_fit.date)
df_fit

Unnamed: 0,distortion,precision,risk_aversion,date,monkey,condition,n
0,0.200000,4.270038,0.990000,2020-06-25,Ola,gain,184
1,0.200000,2.771034,0.371044,2020-06-26,Ola,gain,190
2,0.200000,3.808867,0.430676,2020-06-27,Ola,gain,208
3,0.200000,3.722660,0.085007,2020-06-28,Ola,gain,220
4,0.200000,4.670819,0.132592,2020-06-29,Ola,gain,244
...,...,...,...,...,...,...,...
57,0.200000,10.000000,-0.990000,2020-10-20,Yin,loss,77
58,0.313025,10.000000,-0.990000,2020-10-21,Yin,loss,71
59,0.200000,10.000000,-0.990000,2020-10-22,Yin,loss,70
60,1.159001,10.000000,0.790311,2020-10-24,Yin,loss,66


# At once

In [17]:
cond = "gain", "loss"
monkeys = df_bhv.monkey.unique()

df_fit_overall = pd.DataFrame()

for i_m, m in tqdm(enumerate(monkeys), file=sys.stdout, total=len(monkeys)):

    for cd in cond:
        
        # Select the data
        df_m = df_bhv[(df_bhv.monkey == m) & (df_bhv.is_risky == True) & (df_bhv[f"is_{cd}"] == True)]
        
        # Get the dates
        dates = df_m.date.unique()
        
        # Get the number of parameters
        n_param = len(Model.param_labels)
        
        # Optimize
        best_param, best_value = fast_optimize(model=Model, data=df_m)
        
        # Backup
        df_fit_m = pd.DataFrame(best_param.reshape((1, len(best_param))), columns=Model.param_labels)
        df_fit_m["date_begin"] = dates[0]
        df_fit_m["date_end"] = dates[-1]
        df_fit_m["monkey"] = m
        df_fit_m["condition"] = cd
        df_fit_m["n"] = len(df_m)
        df_fit_overall = pd.concat((df_fit_overall, df_fit_m))
        
df_fit_overall.to_csv(os.path.join(BACKUP_FOLDER, f"df_fit_overall.csv"))

100%|██████████| 15/15 [00:01<00:00,  8.84it/s]


### Load the results

In [18]:
df_fit_overall = pd.read_csv(os.path.join(BACKUP_FOLDER, f"df_fit_overall.csv"))
for c in ("date_begin", "date_end"):
    df_fit_overall[c] = pd.to_datetime(df_fit_overall[c])
df_fit_overall.drop(df_fit_overall.filter(regex="Unname"),axis=1, inplace=True)
df_fit_overall

Unnamed: 0,distortion,precision,risk_aversion,date_begin,date_end,monkey,condition,n
0,0.2,10.0,0.99,2020-06-25,2020-10-25,Ola,gain,837
1,0.25969,5.82262,-0.99,2020-06-25,2020-10-25,Ola,loss,844
2,1.037591,3.41175,0.408652,2020-06-25,2020-10-25,Abr,gain,1873
3,0.643684,0.496632,-0.406756,2020-06-25,2020-10-25,Abr,loss,1874
4,1.500298,2.562927,-0.172519,2020-06-25,2020-10-25,Nem,gain,800
5,0.61801,0.304122,-0.437583,2020-06-25,2020-10-25,Nem,loss,803
6,1.258861,1.165837,-0.345788,2020-06-25,2020-10-25,Alv,gain,1825
7,0.776557,0.334887,-0.493689,2020-06-25,2020-10-25,Alv,loss,1824
8,0.2,1.572916,0.99,2020-06-25,2020-10-25,Ner,gain,465
9,1.767768,6.214167,0.535331,2020-06-25,2020-10-25,Ner,loss,462
