# Imports etc.

In [None]:
import json
import numpy as np
import os
import pandas as pd
import plotnine as gg
gg.theme_set(gg.theme_classic)
default_figure_size = (6.4, 4.8)

In [None]:
# data_dir = 'C:/Users/maria/MEGAsync/Berkeley/CHaRLy/data/mTurk1'
data_dir = 'C:/Users/maria/MEGAsync/Berkeley/CHaRLy/data/RPP34'
plot_dir = data_dir + 'figures'
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)

## Get all_data

In [None]:
rule_data = pd.read_csv(os.path.join(data_dir, 'rule_data.csv'), index_col=0)
rule_data.loc[rule_data.phase == 'high'][:30]

In [None]:
interesting_cols = [
#     'sid',
    'trial_type', 'phase', 'block', 'trial', 'trial_', 'subtrial', 'points', 'rt', #'key_press',
    'action_id', 'action_name', 'middle_item', 'middle_item_name', 'goal_star', 'goal_star_name', 'correct', 'unlocked_star',
    'unlocked_star_name', 'timeout', 'star_iteration',
#     'middle_item_lowTransferRules', 'middle_item_lowRules', 'bool_middle_item_lowRules',
#     'unlocked_star_highTransferRules', 'unlocked_star_highRules', 'bool_unlocked_star_highRules',
    'chance_performer',
]

In [None]:
all_data = pd.read_csv(os.path.join(data_dir, 'all_data.csv'), index_col=0)
all_data = all_data.loc[np.invert(all_data['inattentives'])]
all_data

# Results

In [None]:
id_cols_trial = ['sid', 'trial_type', 'phase', 'phaseNum', 'trial']
id_cols_block = ['sid', 'trial_type', 'phase', 'phaseNum', 'block']

learn_curves_trial = all_data.groupby(id_cols_trial).aggregate('mean').reset_index()[id_cols_trial + ['acc']]
learn_curves_block = all_data.groupby(id_cols_block).aggregate('mean').reset_index()[id_cols_block + ['acc']]
learn_curves_trial

In [None]:
# Plot performance over trials (averaged over blocks)
g = (gg.ggplot(learn_curves_trial, gg.aes('trial', 'acc', color='phase'))
     + gg.stat_summary(position=gg.position_dodge(width=0.1))
     + gg.stat_summary(position=gg.position_dodge(width=0.1), geom='line')
     + gg.facet_grid('~ trial_type')
     + gg.labs(y='Accuracy')
    )
print(g)
g.save(os.path.join(plot_dir, '103_PerformanceOverTrials.png'))

# Plot performance over blocks (averaged over trials)
g += gg.aes(x='block')
g.data = learn_curves_block
print(g)
g.save(os.path.join(plot_dir, '103_PerformanceOverBlocks.png'))

In [None]:
id_cols_trial = ['sid', 'trial_type', 'phase', 'phaseNum', 'trial', 'goal_star']
id_cols_block = ['sid', 'trial_type', 'phase', 'phaseNum', 'block', 'goal_star']

learn_curves_trial = all_data.groupby(id_cols_trial).aggregate('mean').reset_index()[id_cols_trial + ['acc']]
learn_curves_block = all_data.groupby(id_cols_block).aggregate('mean').reset_index()[id_cols_block + ['acc']]
learn_curves_trial

In [None]:
# Plot performance over trials (averaged over blocks)
g = (gg.ggplot(learn_curves_trial, gg.aes('trial', 'acc', color='phase'))
     + gg.stat_summary(position=gg.position_dodge(width=0.1))
     + gg.stat_summary(position=gg.position_dodge(width=0.1), geom='line')
     + gg.facet_grid('goal_star ~ trial_type')
     + gg.labs(y='Accuracy')
    )
print(g)
g.save(os.path.join(plot_dir, '103_PerformanceOverTrials_byGoalStar.png'))

# Plot performance over blocks (averaged over trials)
g += gg.aes(x='block')
g.data = learn_curves_block
print(g)
g.save(os.path.join(plot_dir, '103_PerformanceOverBlocks_byGoalStar.png'))

## Perseverance: Following old rules after transfer

In [None]:
id_vars_block = ['sid', 'block', 'phase', 'trial_type', 'phaseNum']
id_vars_trial = ['sid', 'trial', 'phase', 'trial_type', 'phaseNum']

melt_vars_low = ['bool_middle_item_lowRulesLearnOnly', 'bool_middle_item_lowRulesTransferOnly', 'bool_middle_item_lowRulesBoth']
melt_vars_high = ['bool_unlocked_star_highRulesLearnOnly', 'bool_unlocked_star_highRulesTransferOnly', 'bool_unlocked_star_highRulesBoth']

In [None]:
def get_melt_dat(all_data, id_vars, melt_vars):
    
    # Which subtrials for high and low rules?
    if 'low' in melt_vars[0]:
        subtrials = [1, 3]
    else:
        subtrials = [3]
        
    # Average each participant
    sum_dat = all_data.loc[all_data.subtrial.isin(subtrials), id_vars + melt_vars].groupby(id_vars).aggregate('mean').reset_index()
    
    # Split learn and transfer performance into two separate rows to be able to plot them together
    melt_dat = sum_dat.melt(id_vars=id_vars)
    
    return melt_dat

# Example use
get_melt_dat(all_data, id_vars_block, melt_vars_low)

In [None]:
def combine_low_high(all_data, id_vars):
    
    dat_low = get_melt_dat(all_data, id_vars, melt_vars_low)
    dat_low = dat_low.loc[dat_low['phase'] == 'low']

    dat_high = get_melt_dat(all_data, id_vars, melt_vars_high)
    dat_high = dat_high.loc[dat_high['phase'] == 'high']

    dat_both = pd.concat([dat_low, dat_high])
    dat_both['rulePhase'] = dat_both['variable'].apply(lambda x : x.split('_')[-1].split('Rules')[1])
    
    return dat_both

# Example use
combine_low_high(all_data, id_vars_trial)

In [None]:
# Get data
trial_dat = combine_low_high(all_data, id_vars_trial)
block_dat = combine_low_high(all_data, id_vars_block)

# Plot rule use over Trials (averaged over blocks)
g = (gg.ggplot(trial_dat, gg.aes('trial', 'value', color='phase'))
     + gg.stat_summary()
     + gg.stat_summary(geom='line')
     + gg.facet_grid('rulePhase ~ trial_type')
     + gg.labs(y='Frequency of rule use', color='Rule')
    )
g.save(os.path.join(plot_dir, '103_RuleUseOverTrials.png'))
print(g)

# Plot rule use over Blocks (averaged over trials)
g += gg.aes(x='block')
g.data = block_dat
g.save(os.path.join(plot_dir, '103_RuleUseOverBlocks.png'))
print(g)

# RTs

In [None]:
block_dat = all_data.groupby(['sid', 'block', 'phase', 'trial_type']).aggregate('mean').reset_index()

g = (gg.ggplot(block_dat, gg.aes('block', 'rt', color='phase'))
     + gg.stat_summary()
     + gg.stat_summary(geom='line')
     + gg.facet_grid(' ~ trial_type')
    )
print(g)
g.save(os.path.join(plot_dir, '103_RtOverBlock.png'))

In [None]:
trial_dat = all_data.groupby(['sid', 'trial', 'phase', 'trial_type']).aggregate('mean').reset_index()

g += gg.aes(x='trial')
g.data = trial_dat
print(g)
g.save(os.path.join(plot_dir, '103_RtOverTrial.png'))

In [None]:
def get_diff_dat(dat, col):

    learn_dat = dat.loc[dat['trial_type'] == 'learning']
    trans_dat = dat.loc[dat['trial_type'] == 'transfer']

    cols = [col, 'sid', 'phase', 'rt']
    diff_dat = pd.merge(learn_dat[cols], trans_dat[cols], on=cols[:-1], suffixes=['_learn', '_trans'])
    diff_dat['rt_trans_minus_learn'] = diff_dat['rt_trans'] - diff_dat['rt_learn']
    
    return diff_dat

# Example use
get_diff_dat(trial_dat, 'trial')

In [None]:
diff_dat_trial = get_diff_dat(trial_dat, 'trial')
diff_dat_block = get_diff_dat(block_dat, 'block')

g = (gg.ggplot(diff_dat_trial, gg.aes('trial', 'rt_trans_minus_learn', color='phase'))
     + gg.stat_summary()
     + gg.stat_summary(geom='line')
     + gg.labs(y='Speed loss in transfer')
    )
print(g)
g.save(os.path.join(plot_dir, '103_SpeedlossOverTrial.png'))

In [None]:
g.data = diff_dat_block
g += gg.aes(x='block')
print(g)
g.save(os.path.join(plot_dir, '103_SpeedlossOverBlock.png'))