# Imports etc.

In [None]:
import json
import numpy as np
import os
import pandas as pd
import plotnine as gg
gg.theme_set(gg.theme_classic)
default_figure_size = (6.4, 4.8)

In [None]:
# data_dir = 'C:/Users/maria/MEGAsync/Berkeley/CHaRLy/data/mTurk1'
data_dir = 'C:/Users/maria/MEGAsync/Berkeley/CHaRLy/data/RPP34'
plot_dir = data_dir + 'figures'
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)

## Get all_data

In [None]:
rule_data = pd.read_csv(os.path.join(data_dir, 'rule_data.csv'), index_col=0)
rule_data.loc[rule_data.phase == 'high'][:30]

In [None]:
interesting_cols = [
#     'sid',
    'trial_type', 'phase', 'block', 'trial', 'trial_', 'subtrial', 'points', 'rt', #'key_press',
    'action_id', 'action_name', 'middle_item', 'middle_item_name', 'goal_star', 'goal_star_name', 'correct', 'unlocked_star',
    'unlocked_star_name', 'timeout', 'star_iteration',
#     'middle_item_lowTransferRules', 'middle_item_lowRules', 'bool_middle_item_lowRules',
#     'unlocked_star_highTransferRules', 'unlocked_star_highRules', 'bool_unlocked_star_highRules',
    'chance_performer',
]

In [None]:
all_data = pd.read_csv(os.path.join(data_dir, 'all_data.csv'), index_col=0)
all_data = all_data.loc[np.invert(all_data['inattentives'] | all_data['psych_disorder'] | all_data['head_trauma'])]
all_data

# Results

In [None]:
id_cols_trial = ['sid', 'trial_type', 'phase', 'phaseNum', 'trial']
id_cols_block = ['sid', 'trial_type', 'phase', 'phaseNum', 'block']

learn_curves_trial = all_data.groupby(id_cols_trial).aggregate('mean').reset_index()[id_cols_trial + ['acc']]
learn_curves_block = all_data.groupby(id_cols_block).aggregate('mean').reset_index()[id_cols_block + ['acc']]
learn_curves_trial

In [None]:
# Plot performance over trials (averaged over blocks)
g = (gg.ggplot(learn_curves_trial, gg.aes('trial', 'acc', color='phase'))
     + gg.stat_summary(position=gg.position_dodge(width=0.1))
     + gg.stat_summary(position=gg.position_dodge(width=0.1), geom='line')
     + gg.facet_grid('~ trial_type')
     + gg.labs(y='Accuracy')
    )
print(g)
g.save(os.path.join(plot_dir, '103_PerformanceOverTrials.png'))

# Plot performance over blocks (averaged over trials)
g += gg.aes(x='block')
g.data = learn_curves_block
print(g)
g.save(os.path.join(plot_dir, '103_PerformanceOverBlocks.png'))

In [None]:
id_cols_trial = ['sid', 'trial_type', 'phase', 'phaseNum', 'trial', 'goal_star']
id_cols_block = ['sid', 'trial_type', 'phase', 'phaseNum', 'block', 'goal_star']

learn_curves_trial = all_data.groupby(id_cols_trial).aggregate('mean').reset_index()[id_cols_trial + ['acc']]
learn_curves_block = all_data.groupby(id_cols_block).aggregate('mean').reset_index()[id_cols_block + ['acc']]
learn_curves_trial

In [None]:
# Plot performance over trials (averaged over blocks)
g = (gg.ggplot(learn_curves_trial, gg.aes('trial', 'acc', color='phase'))
     + gg.stat_summary(position=gg.position_dodge(width=0.1))
     + gg.stat_summary(position=gg.position_dodge(width=0.1), geom='line')
     + gg.facet_grid('goal_star ~ trial_type')
     + gg.labs(y='Accuracy')
    )
print(g)
g.save(os.path.join(plot_dir, '103_PerformanceOverTrials_byGoalStar.png'))

# Plot performance over blocks (averaged over trials)
g += gg.aes(x='block')
g.data = learn_curves_block
print(g)
g.save(os.path.join(plot_dir, '103_PerformanceOverBlocks_byGoalStar.png'))

## Perseverance: Following old rules after transfer

In [None]:
id_vars_block = ['sid', 'block', 'phase', 'trial_type', 'phaseNum']
id_vars_trial = ['sid', 'trial', 'phase', 'trial_type', 'phaseNum']

melt_vars_low = ['bool_middle_item_lowRulesLearnOnly', 'bool_middle_item_lowRulesTransferOnly', 'bool_middle_item_lowRulesBoth']
melt_vars_high = ['bool_unlocked_star_highRulesLearnOnly', 'bool_unlocked_star_highRulesTransferOnly', 'bool_unlocked_star_highRulesBoth']

In [None]:
def get_melt_dat(all_data, id_vars, melt_vars):
    
    # Which subtrials for high and low rules?
    if 'low' in melt_vars[0]:
        subtrials = [1, 3]
    else:
        subtrials = [3]
        
    # Average each participant
    sum_dat = all_data.loc[all_data.subtrial.isin(subtrials), id_vars + melt_vars].groupby(id_vars).aggregate('mean').reset_index()
    
    # Split learn and transfer performance into two separate rows to be able to plot them together
    melt_dat = sum_dat.melt(id_vars=id_vars)
    
    return melt_dat

# Example use
get_melt_dat(all_data, id_vars_block, melt_vars_low)

In [None]:
def combine_low_high(all_data, id_vars):
    
    dat_low = get_melt_dat(all_data, id_vars, melt_vars_low)
    dat_low = dat_low.loc[dat_low['phase'] == 'low']

    dat_high = get_melt_dat(all_data, id_vars, melt_vars_high)
    dat_high = dat_high.loc[dat_high['phase'] == 'high']

    dat_both = pd.concat([dat_low, dat_high])
    dat_both['rulePhase'] = dat_both['variable'].apply(lambda x : x.split('_')[-1].split('Rules')[1])
    
    return dat_both

# Example use
combine_low_high(all_data, id_vars_trial)

In [None]:
# Get data
trial_dat = combine_low_high(all_data, id_vars_trial)
block_dat = combine_low_high(all_data, id_vars_block)

# Plot rule use over Trials (averaged over blocks)
g = (gg.ggplot(trial_dat, gg.aes('trial', 'value', color='phase'))
     + gg.stat_summary()
     + gg.stat_summary(geom='line')
     + gg.facet_grid('rulePhase ~ trial_type')
     + gg.labs(y='Frequency of rule use', color='Rule')
    )
g.save(os.path.join(plot_dir, '103_RuleUseOverTrials.png'))
print(g)

# Plot rule use over Blocks (averaged over trials)
g += gg.aes(x='block')
g.data = block_dat
g.save(os.path.join(plot_dir, '103_RuleUseOverBlocks.png'))
print(g)

# RTs

In [None]:
block_dat = all_data.groupby(['sid', 'block', 'phase', 'trial_type']).aggregate('mean').reset_index()

g = (gg.ggplot(block_dat, gg.aes('block', 'rt', color='phase'))
     + gg.stat_summary()
     + gg.stat_summary(geom='line')
     + gg.facet_grid(' ~ trial_type')
    )
print(g)
g.save(os.path.join(plot_dir, '103_RtOverBlock.png'))

In [None]:
trial_dat = all_data.groupby(['sid', 'trial', 'phase', 'trial_type']).aggregate('mean').reset_index()

g += gg.aes(x='trial')
g.data = trial_dat
print(g)
g.save(os.path.join(plot_dir, '103_RtOverTrial.png'))

In [None]:
def get_diff_dat(dat, cols):

    learn_dat = dat.loc[dat['trial_type'] == 'learning']
    trans_dat = dat.loc[dat['trial_type'] == 'transfer']

    cols = cols + ['sid', 'phase', 'rt']
    diff_dat = pd.merge(learn_dat[cols], trans_dat[cols], on=cols[:-1], suffixes=['_learn', '_trans'])
    diff_dat['rt_trans_minus_learn'] = diff_dat['rt_trans'] - diff_dat['rt_learn']
    
    return diff_dat

# Example use
get_diff_dat(trial_dat, ['trial'])

In [None]:
diff_dat_trial = get_diff_dat(trial_dat, ['trial'])
diff_dat_block = get_diff_dat(block_dat, ['block'])

g = (gg.ggplot(diff_dat_trial, gg.aes('trial', 'rt_trans_minus_learn', color='phase'))
     + gg.stat_summary()
     + gg.stat_summary(geom='line')
     + gg.labs(y='Slowing in transfer')
    )
print(g)
g.save(os.path.join(plot_dir, '103_speedlossOverTrial.png'))

In [None]:
trial_dat_ = all_data.groupby(['sid', 'trial', 'phase', 'trial_type', 'goal_star']).aggregate('mean').reset_index()
diff_dat_trial_star = get_diff_dat(trial_dat_, ['trial', 'goal_star'])

g_ = g + gg.facet_grid('~ goal_star')
g_.data = diff_dat_trial_star
print(g_)
g_.save(os.path.join(plot_dir, '103_transferSlowingOverTrialByStar.png'))

In [None]:
g.data = diff_dat_block
g += gg.aes(x='block')
print(g)
g.save(os.path.join(plot_dir, '103_speedlossOverBlock.png'))

In [None]:
block_dat_ = all_data.groupby(['sid', 'block', 'phase', 'trial_type', 'goal_star']).aggregate('mean').reset_index()
diff_dat_block_star = get_diff_dat(block_dat_, ['block', 'goal_star'])

g += gg.facet_grid('~ goal_star')
g.data = diff_dat_block_star
print(g)
g.save(os.path.join(plot_dir, '103_transferSlowingOverBlockByStar.png'))

## Collect the wrong star

In [None]:
# Get data
id_cols = ['sid', 'phase', 'trial_type', 'unlocked_star']
sub_dat = all_data.loc[all_data.correct == 0].reset_index()
sum_dat = sub_dat.groupby(id_cols).count().reset_index()[id_cols + ['index']]
sum_dat = sum_dat.rename(columns={'index': 'n_unlocked'})
sum_dat.loc[np.isnan(sum_dat.n_unlocked), 'n_unlocked'] = 0

# Plot
g = (gg.ggplot(sum_dat, gg.aes('unlocked_star', 'n_unlocked', group='unlocked_star'))
     + gg.geom_point(gg.aes(color='factor(sid)'), position='jitter')
#          + gg.stat_summary(geom='bar')
     + gg.geom_violin()
     + gg.stat_summary()
     + gg.theme(legend_position='none')
     + gg.facet_grid('phase ~ trial_type')
    )
print(g)
g.save(os.path.join(plot_dir, '103_CollectingWrongStar.png'))

## First vs second star

Compare the same repetition between the star presented first and the star presented second -> in high transfer, the second star does not benefit from the first, but in low transfer, it does

In [None]:
sub_dat = all_data.loc[(all_data['trial_type'] == 'transfer')]
# sub_dat.loc[(sub_dat['star_iteration'] == 0) & ()]
sub_dat[interesting_cols]

In [None]:
subj_dat = sub_dat.loc[sub_dat['block'].isin([0, 1])].groupby(
    ['sid', 'block', 'trial_type', 'phase', 'goal_star']).aggregate('mean').reset_index()

g = (gg.ggplot(subj_dat, gg.aes('factor(block)', 'acc'))
     + gg.stat_summary(gg.aes(fill='factor(goal_star)'), geom='bar', position=gg.position_dodge(width=0.9))
     + gg.stat_summary(gg.aes(fill='factor(goal_star)'), position=gg.position_dodge(width=0.9))
     + gg.geom_point(gg.aes(color='factor(sid)'), position=gg.position_dodge(width=0.5))
     + gg.geom_line(gg.aes(color='factor(sid)', group='factor(sid)', linetype='factor(goal_star)'), position=gg.position_dodge(width=0.5))
     + gg.facet_grid('phase ~ trial_type')
    )
g

In [None]:
id_cols = ['sid', 'trial_type', 'phase']
interest_cols = ['goal_star', 'acc']

b0_dat = sub_dat.loc[
    (sub_dat['subtrial'] == 3) & (sub_dat['block'] == 0),
    id_cols + interest_cols
].groupby(id_cols).aggregate('mean').reset_index()
b1_dat = sub_dat.loc[
    (sub_dat['subtrial'] == 3) & (sub_dat['block'] == 1),
    id_cols + interest_cols
].groupby(id_cols).aggregate('mean').reset_index()
b0_dat

In [None]:
iteration_dat = pd.merge(b0_dat, b1_dat, on=id_cols, suffixes=['_block0', '_block1'])
iteration_dat['acc1_minus_acc0'] = iteration_dat['acc_block1'] - iteration_dat['acc_block0']
iteration_dat

In [None]:
# I would have expected:
    # NO learning in high, because 2-key sequences stay the same, and what was learned for one star does not help the other
    # LOTS of learning in low, because both stars rely on 2-key sequence c, which changes; so if it's learning for the first star, it should help the second
# But that's not what we see:
    # High and low differ when star 2 is presented first, but not when star 1 is presented first
    # Why?
g = (gg.ggplot(iteration_dat, gg.aes('phase', 'acc1_minus_acc0', color='factor(phase)'))
     + gg.stat_summary()
     + gg.geom_hline(yintercept=0, linetype='dotted')
     + gg.labs(x='', y='Improvement 1st to 2nd star (transfer)', color='')
#      + gg.facet_grid('~ goal_star_block0', labeller='label_both')
    )
g

In [None]:
# Every star gets better over time
subj_dat = sub_dat.groupby(['sid', 'goal_star', 'star_iteration', 'phase', 'trial_type']).aggregate('mean').reset_index()

g = (gg.ggplot(subj_dat, gg.aes('factor(star_iteration)', 'acc', color='factor(goal_star)', group='factor(goal_star)'))
     + gg.stat_summary(position=gg.position_dodge(width=0.5))
     + gg.stat_summary(position=gg.position_dodge(width=0.5), geom='line')
     + gg.facet_grid('phase ~ trial_type')
    )
print(g)
g.save(os.path.join(plot_dir, '103_accOverStariteration.png'))