# Imports etc.

In [None]:
import json
import numpy as np
import os
import pandas as pd
import plotnine as gg
gg.theme_set(gg.theme_classic)
default_figure_size = (6.4, 4.8)

In [None]:
# data_dir = 'C:/Users/maria/MEGAsync/Berkeley/CHaRLy/data/mTurk1'
data_dir = 'C:/Users/maria/MEGAsync/Berkeley/CHaRLy/data/RPP34'
plot_dir = data_dir + 'figures'
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)

## Get all_data

In [None]:
rule_data = pd.read_csv(os.path.join(data_dir, 'rule_data.csv'), index_col=0)
rule_data.loc[rule_data.phase == 'high'][:30]

In [None]:
all_data = pd.read_csv(os.path.join(data_dir, 'all_data.csv'), index_col=0)
all_data = all_data.loc[np.invert(all_data['inattentives'])]
all_data

# Results

## Learning curves

### Individual participants

In [None]:
# Learning curves, averaging over goal stars 
def plot_LearnCurves_Trial(dat, suf=''):
    
    g = (gg.ggplot(dat, gg.aes('trial_', 'acc'))
         + gg.stat_summary(fun_y=np.mean, geom='point')
         + gg.stat_summary(fun_y=np.mean, geom='line')
         + gg.facet_grid('phase + phaseNum ~ trial_type')
        )
    g.save(os.path.join(plot_dir, '2_LearnCurves_Trial{}.png'.format(suf)))
    
    return g

# Use
learn_curves_all = plot_LearnCurves_Trial(all_data, '_all')
learn_curves

In [None]:
# Learning curves, trial-by-trial
learn_curves2 = learn_curves + gg.aes(x='trial', color='factor(sid)')
learn_curves2.save(os.path.join(plot_dir, '2_LearnCurves_Block.png'))
print(learn_curves2)

learn_curves_all2 = learn_curves_all + gg.aes(x='trial', color='factor(sid)', linetype='chance_performer')
learn_curves_all2.save(os.path.join(plot_dir, '2_LearnCurves_Block_all.png'))

In [None]:
# Learning curves, trial-by-trial
learn_curves3 = learn_curves + gg.aes(x='trial', color='factor(goal_star)')
learn_curves3.save(os.path.join(plot_dir, '2_LearnCurves_Star.png'))
print(learn_curves3)

learn_curves_all3 = learn_curves_all + gg.aes(x='trial', color='factor(goal_star)')
learn_curves_all3.save(os.path.join(plot_dir, '2_LearnCurves_Star_all.png'))

In [None]:
learn_curves_all4 = learn_curves_all + gg.aes(x='star_iteration')
learn_curves_all4

In [None]:
learn_curves_all5 = learn_curves_all + gg.aes(x='block')
learn_curves_all5

### Population average

In [None]:
id_cols = ['sid', 'trial_type', 'phase', 'phaseNum', 'trial']
learn_curves_sum = all_data.groupby(id_cols).aggregate('mean').reset_index()[id_cols + ['acc']]
learn_curves_sum

In [None]:
g = (gg.ggplot(learn_curves_sum, gg.aes('trial', 'acc', color='phase'))
     + gg.stat_summary(position=gg.position_dodge(width=0.1))
     + gg.stat_summary(position=gg.position_dodge(width=0.1), geom='line')
     + gg.facet_grid('phaseNum ~ trial_type')
    )
print(g)
g.save(os.path.join(plot_dir, '2_LearnCurves_BlockAvg.png'))

## Perseverance: Following old rules after transfer

In [None]:
def plot_PerseveraceLowRules(dat, suf=''):
    
    g = (gg.ggplot(dat, gg.aes('trial', 'bool_middle_item_lowRulesLearnOnly', color='factor(sid)', linetype='chance_performer'))
         + gg.stat_summary(fun_y=np.mean, geom='point')
         + gg.stat_summary(fun_y=np.mean, geom='line')
         + gg.facet_grid('phase + phaseNum ~ trial_type')
        )
    g.save(os.path.join(plot_dir, '5_PerseveranceLowRules{}.png'.format(suf)))
    
    return g

# Use
sub_dat = all_data.loc[all_data.subtrial.isin([1, 3])]
pers_all = plot_PerseveraceLowRules(sub_dat, '_all')
pers = plot_PerseveraceLowRules(sub_dat.loc[np.invert(sub_dat['chance_performer'])])
pers

In [None]:
pers_all2 = pers_all + gg.aes(y='bool_unlocked_star_highRulesLearnOnly')
pers_all2.data = pers_all.data.loc[pers_all.data.subtrial == 3]
pers_all2.save(os.path.join(plot_dir, '5_PerseveranceHighRules_all.png'))

pers2 = pers + gg.aes(y='bool_unlocked_star_highRulesLearnOnly')
pers2.data = pers.data.loc[pers.data.subtrial == 3]
pers2.save(os.path.join(plot_dir, '5_PerseveranceHighRules.png'))
print(pers2)

## Analyzing performance by level

In [None]:
pers_all3 = pers_all + gg.aes(y='bool_middle_item_lowRulesTransferOnly')
pers_all3.save(os.path.join(plot_dir, '6_DetailsLowTransferRules_all.png'))

pers3 = pers + gg.aes(y='bool_middle_item_lowRulesTransferOnly')
pers3.save(os.path.join(plot_dir, '6_DetailsLowTransferRules.png'))
pers3

In [None]:
pers_all4 = pers_all2 + gg.aes(y='bool_unlocked_star_highRulesTransferOnly')
pers_all4.save(os.path.join(plot_dir, '6_DetailsHighTransferRules_all.png'))

pers4 = pers2 + gg.aes(y='bool_unlocked_star_highRulesTransferOnly')
pers4.save(os.path.join(plot_dir, '6_DetailsHighTransferRules.png'))
pers4

In [None]:
pers_all6 = pers_all + gg.aes(y='bool_middle_item_lowRulesBoth')
pers_all6.save(os.path.join(plot_dir, '6_DetailsLowBothRules_all.png'))

pers6 = pers + gg.aes(y='bool_middle_item_lowRulesBoth')
pers6.save(os.path.join(plot_dir, '6_DetailsLowBothRules.png'))
pers6

In [None]:
pers_all5 = pers_all2 + gg.aes(y='bool_unlocked_star_highRulesBoth')
pers_all5.save(os.path.join(plot_dir, '6_DetailsHighBothRules_all.png'))

pers5 = pers2 + gg.aes(y='bool_unlocked_star_highRulesBoth')
pers5.save(os.path.join(plot_dir, '6_DetailsHighBothRules.png'))
pers5

## Time frame for learning low / high rules

In [None]:
# How fast are low rules learnt? How many middle-items are produced through learning?
def plot_TimeframeLearnRules(dat, suf):
    
    # Get data
    if 'Low' in suf:
        sub_dat = dat.loc[dat['subtrial'].isin([1, 3])]  # Select subtrials 1 and 3, in which middle-items *can* appear
        y = 'bool_middle_item_lowRulesLearnOnly'
        if 'Trans' in suf:
            y = 'bool_middle_item_lowRulesTransferOnly'

    elif 'High' in suf:
        sub_dat = all_data.loc[all_data['subtrial'] == 3]  # Select subtrial 3, in which stars *can* appear
        y = 'bool_unlocked_star_highRulesLearnOnly'
        if 'Trans' in suf:
            y = 'bool_unlocked_star_highRulesTransferOnly'
    else:
        raise(ValueError, 'suf must contain either "Low", or "High".')

    # Plot
    g = (gg.ggplot(sub_dat, gg.aes('trial_', y, color='factor(subtrial)'))
         + gg.stat_summary(fun_y=np.mean, geom='point')
         + gg.stat_summary(fun_y=np.mean, geom='line')
         + gg.facet_grid('phase ~ trial_type')
        )
    g.save(os.path.join(plot_dir, '7_TimeFrameLearnRules{}'.format(suf)))
    
    return g

# Use
plot_TimeframeLearnRules(all_data, 'LowLearn_all')
plot_TimeframeLearnRules(all_data, 'LowTrans_all')

In [None]:
plot_TimeframeLearnRules(all_data, 'HighLearn_all')
plot_TimeframeLearnRules(all_data, 'HighTrans_all')

## Overall rules in training / transfer high / low

In [None]:
# # Hypothesis: Previous rules (high-level or low-level) persist significantly after they stop being valuable
# # (compared to baseline of not-yet-learned rules). There is a difference between high- and low-level (especially
# # early-on, i.e., in the first few blocks).
# g = (gg.ggplot(item_sum_dat, gg.aes('trial_type', color='factor(sid)', group='factor(sid)'))
#      + gg.geom_point()
#      + gg.geom_line()
#      + gg.facet_grid('phaseNum ~ phase')
#     )

# for col in goal_cols:
#     name = col.split('_')[-1]
#     g_col = g + gg.aes(y=col)
#     g_col.save(os.path.join(plot_dir, '8_Performance{}.png'.format(name)))
#     print(g_col)

## Forgetting of old rules and learning of new rules during transfer

In [None]:
id_cols = ['sid', 'phase', 'trial_type', 'block']
sum_rules = all_data.loc[all_data.subtrial.isin([1, 3]), id_cols + goal_cols].groupby(id_cols).aggregate('mean').reset_index()
sum_rules = sum_rules.melt(id_vars=id_cols, var_name='rule', value_name='acc')
sum_rules.loc[sum_rules.rule.isin([c for c in set(sum_rules.rule) if 'high' in c]), 'acc'] *= 2
sum_rules

In [None]:
# Hypothesis: middle-layer items are easier (faster) to learn, but harder (slower) to unlearn than high-level stars
# Reason: There is a difference in the level of abstraction. The deeper down, the harder to unlearn. Maybe only
# the top-level is still malleable.
g = (gg.ggplot(gg.aes('block', 'acc', color='rule'))
     + gg.stat_summary(position=gg.position_dodge(width=0.1))
     + gg.stat_summary(position=gg.position_dodge(width=0.1), geom='line')
     + gg.coord_cartesian(ylim=(0, 1))
     + gg.facet_grid('phaseNum ~ phase + trial_type')
    )

g_high = g
g_high.data = sum_rules.loc[
    sum_rules.rule.isin([c for c in set(sum_rules.rule) if 'high' in c]) &
    (sum_rules.phase == 'high') #& (sum_rules.trial_type == 'transfer')
]
g_high.save(os.path.join(plot_dir, '10_ForgettingRelearningHighRules.png'))
print(g_high)

g_low = g
g_low.data = sum_rules.loc[
    sum_rules.rule.isin([c for c in set(sum_rules.rule) if 'low' in c]) &
    (sum_rules.phase == 'low') #& (sum_rules.trial_type == 'transfer')
]
g_low.save(os.path.join(plot_dir, '10_ForgettingRelearninglowRules.png'))
print(g_low)