In [1]:
import pandas as pd
import numpy as np



# Starting from HMTG Obversation Data (González et al)

In [2]:
allDat = pd.read_csv('turkData_exp2.csv', index_col=0, header=0)
allDat.drop(labels=['turker', 'turker_id', 'pred', 'exp', 'pred_rt', 'pred_prcnt', 'turker_pe', 'ret_prcnt'], axis=1, inplace=True)
allDat.rename({'im': 'IM'}, inplace=True)


In [3]:
xx = allDat.loc[allDat.trustee == allDat.trustee.unique()[0]][:57]
for p2 in allDat.trustee.unique()[1:]:
    xx = xx.append(allDat.loc[allDat.trustee == p2][:57])

xx.reset_index(drop=True, inplace=True)
xx.to_csv('HMTG_possib_stim.csv', sep=",", header=True)

In [4]:

xx.to_json('HMTG_possib_stim.js', orient='records')


# Probes

# Starting from Original HMTG Data

In [2]:
import pandas as pd
import numpy as np

allData = pd.read_csv('allDataLongFollowup.csv', sep=",", names=['subj','block', 'trial', 'inv','baseMult','mult','exp','ret'])
allData = allData[allData.baseMult == 4].drop(labels=['block', 'exp', 'baseMult'], axis=1).reset_index(drop=True)
allData = allData[(allData.subj != 7) & (allData.subj != 43)].reset_index(drop = True) # excluding these subs for reasons Jeroen VBaar told me on slack -_-
allData.head()

Unnamed: 0,subj,trial,inv,mult,ret
0,1,1,9,4,18
1,1,2,6,6,10
2,1,3,4,4,4
3,1,4,5,2,2
4,1,5,3,6,3


# Identifying trustees to use as stimuli

In [3]:
param_space = pd.read_csv('ClusteringMap_study-1_baseMult-4_model-MP_precision-100.csv', sep=",")
param_space_melt = param_space.melt(id_vars='phi', value_vars=param_space.keys()[1:], var_name='theta', value_name='cluster')
param_space_melt['theta'] = pd.to_numeric(param_space_melt.theta)
cluster_centers = [np.mean(param_space_melt[param_space_melt.cluster == x]) for x in np.arange(4)]

### Computing Each Trustee's distance from their cluster origin

In [4]:
trustee_params = pd.read_csv('ParamsClusters_study-1_baseMult-4_model-MP_ppSOE_precision-100.csv',
                             sep=",", index_col=0).rename(index=str, columns={'sub': 'subj'})
trustee_params = trustee_params[(trustee_params.subj != 7) & (trustee_params.subj != 43)].reset_index(drop=True) #exclude 2 subjects
trustee_params['dist2cntr'] = 0
closest_trustees = []

for cluster in trustee_params.clust.unique():
    for subject in trustee_params[trustee_params.clust == cluster].subj:
        thayta = trustee_params.theta.loc[trustee_params.subj == subject]
        phee = trustee_params.phi.loc[trustee_params.subj == subject]
        trustee_params.loc[trustee_params.subj == subject, 'dist2cntr'] = np.sqrt(
            (abs(thayta - cluster_centers[cluster].theta)**2) + (abs(phee - cluster_centers[cluster].phi)**2))

    a = trustee_params[trustee_params.clust == cluster]
    closest_trustees.append(a.sort_values('dist2cntr')[:5].subj.tolist())



subs_to_observe = [item for sublist in closest_trustees for item in sublist]


## A little more cleaning / formatting of stimulus variables for experiment

In [5]:

allData = allData[allData['subj'].isin(subs_to_observe)].reset_index(drop=True)
# recomputing 'expectation' variable
allData['exp'] = allData.inv * 2
allData['IM'] = allData.inv * allData.mult

# remove inv == 0 trials
allData = allData[allData.inv > 0].reset_index(drop=True)
# Removing extra x4 trials.

allData['trial'] = 0

for sub in allData.subj.unique():
    game = allData[allData.subj == sub] #.reset_index(drop=True)
    trials_per_context = min(sum(game.mult == 2),
                             sum(game.mult == 4),
                             sum(game.mult == 6))
    for x in [2,4,6]:
        x_trials = game[game.mult == x].index
        keep = np.random.choice(x_trials, size=trials_per_context, replace=False)
        drop = x_trials[~x_trials.isin(keep)]
        allData = allData.drop(drop, axis=0)

    allData.loc[allData.subj == sub, 'trial'] = np.arange(sum(allData.subj == sub))


allData = allData.rename(index =str, columns={'subj': 'trustee'})

# Choosing a single trustee and random trial order

In [5]:
stim = allData.loc[allData.trustee == 4]
stim = stim.sample(frac=1, random_state=42).reset_index(drop=True)
stim['trial'] = range(len(stim))
stim.to_csv('HMTG_possib_stim.csv', sep=",", header=True)

In [5]:
subs_to_observe

[54, 41, 65, 92, 32, 62, 52, 72, 91, 83, 97, 34, 47, 6, 4, 93, 100, 81, 56, 75]