# Prep

## Imports & paths

In [None]:
import json
import numpy as np
import os
import pandas as pd
import plotnine as gg
gg.theme_set(gg.theme_classic)
default_figure_size = (6.4, 4.8)

In [None]:
# data_dir = 'C:/Users/maria/MEGAsync/Berkeley/CHaRLy/data/mTurk1'
data_dir = 'C:/Users/maria/MEGAsync/Berkeley/CHaRLy/data/RPP34'
plot_dir = data_dir + 'figures'
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)

## Get all_data

In [None]:
rule_data = pd.read_csv(os.path.join(data_dir, 'rule_data.csv'), index_col=0)
rule_data.loc[rule_data.phase == 'high'][:30]

In [None]:
all_data = pd.read_csv(os.path.join(data_dir, 'all_data.csv'), index_col=0)
incl_data = all_data.loc[np.invert(all_data.chance_performer) & np.invert(all_data.inattentives)]
all_data

In [None]:
interesting_cols = [
#     'sid',
    'trial_type', 'phase', 'block', 'trial', 'trial_', 'subtrial', 'points', 'rt', #'key_press',
    'action_id', 'action_name', 'middle_item', 'middle_item_name', 'goal_star', 'goal_star_name', 'correct', 'unlocked_star',
    'unlocked_star_name', 'timeout', 'star_iteration',
#     'middle_item_lowTransferRules', 'middle_item_lowRules', 'bool_middle_item_lowRules',
#     'unlocked_star_highTransferRules', 'unlocked_star_highRules', 'bool_unlocked_star_highRules',
#     'chance_performer',
]

## Functions to make dataframes

In [None]:
# rt_sum
def get_rt_sum(data, id_cols):
    
    rt_sum_dat_subtrial = data.groupby(id_cols + ['subtrial']).aggregate('mean').reset_index()[id_cols + ['subtrial', 'rt']]
    rt_sum_dat = data.groupby(id_cols).aggregate('mean').reset_index()[id_cols + ['rt', 'acc']]
    rt_sum_dat = rt_sum_dat.rename(columns={'rt': 'mean_rt'})

    rt_sum = pd.merge(rt_sum_dat, rt_sum_dat_subtrial, on=id_cols)
    rt_sum['rt_delta'] = rt_sum['rt'] - rt_sum['mean_rt']
    
    return rt_sum

# # Example use
# get_rt_sum(incl_data, id_cols=['sid', 'phase', 'trial_type'])

In [None]:
def rt_sum2zigzag(rt_sum, id_cols):
    
    rt_wide = rt_sum.pivot_table(index=id_cols + ['acc'], columns='subtrial', values='rt_delta').reset_index()
    rt_wide = rt_wide.rename(columns={i: 'rt_delta_{}'.format(i) for i in range(4)})
    rt_wide['rt_zigzag'] = rt_wide['rt_delta_0'] - rt_wide['rt_delta_1'] + rt_wide['rt_delta_2'] - rt_wide['rt_delta_3']
    
    return rt_wide

# # Example use
# rt_sum2zigzag(rt_sum, id_cols=['sid', 'phase', 'trial_type'])

In [None]:
def add_info_to_rt_wide(rt_wide, id_cols, all_data):
    
    add_dat = all_data.groupby(id_cols).aggregate('mean').reset_index()[
        id_cols + [
            'goal_star', 'star_iteration',  # Only meaningful for rt_wide_block
            'rt',
            'bool_middle_item_lowRulesLearnOnly', 'bool_middle_item_lowRulesTransferOnly',
            'bool_unlocked_star_highRulesLearnOnly', 'bool_unlocked_star_highRulesTransferOnly',
        ]]
    
    rt_wide = pd.merge(rt_wide, add_dat, on=id_cols)
    
    return rt_wide

# # Example use
# id_cols = ['sid', 'phase', 'trial_type', 'block']
# add_info_to_rt_wide(rt_wide_block, id_cols, all_data)

In [None]:
# Get rt_sum
id_cols = ['sid', 'chance_performer', 'phase', 'phaseNum', 'trial_type']
rt_sum = get_rt_sum(incl_data, id_cols)
rt_wide = rt_sum2zigzag(rt_sum, id_cols)
rt_wide = add_info_to_rt_wide(rt_wide, id_cols, all_data).drop(columns=['goal_star', 'star_iteration'], axis=1)
rt_wide

In [None]:
# Get rt_sum_block
id_cols = ['sid', 'chance_performer', 'phase', 'phaseNum', 'trial_type', 'block']
rt_sum_block = get_rt_sum(all_data, id_cols)
rt_wide_block = rt_sum2zigzag(rt_sum_block, id_cols)
rt_wide_block = add_info_to_rt_wide(rt_wide_block, id_cols, all_data)
rt_wide_block

# Results

## Population-wide

In [None]:
# Plot
g = (gg.ggplot(rt_sum, gg.aes('subtrial', 'rt_delta'))
     + gg.geom_point(gg.aes(color='factor(sid)'), position=gg.position_dodge(width=0.2))
     + gg.geom_line(gg.aes(color='factor(sid)'), position=gg.position_dodge(width=0.2), linetype='dotted')
     + gg.stat_summary()
     + gg.stat_summary(geom='line')
     + gg.facet_grid('phase ~ trial_type')
)
g.save(os.path.join(plot_dir, '01_RTOverSubtrial.png'))
print(g)

In [None]:
# There is more extra zigzag during low transfer than high transfer
# => Because middle-layer items needs to be re-learned
g = (gg.ggplot(rt_wide_block, gg.aes('factor(star_iteration)', 'rt_zigzag', color='phase', group='phase'))
     + gg.stat_summary(position=gg.position_dodge(width=0.1))
     + gg.stat_summary(position=gg.position_dodge(width=0.1), geom='line')
     + gg.facet_grid('phaseNum ~ trial_type', labeller='label_context')
    )
g.save(os.path.join(plot_dir, '01_RtzigzagOverStarIteration.png'))
print(g)

In [None]:
def zscore(values):
    
    return (values - np.mean(values)) / np.std(values)

# Example use
zscore(np.arange(6))

In [None]:
def zigzag(rts):
    
    assert len(rts) == 4
    return rts[0] - rts[1] + rts[2] - rts[3]

# Example use
zigzag([1, -1, 1, -1])

In [None]:
def add_zrt_and_rtzigzag(all_data, verbose=True):
    
    for sid in np.unique(all_data['sid']):
        print('sid {} / {}'.format(sid, len(np.unique(all_data['sid']))))

        for phase in np.unique(all_data['phase']):
            for trial_type in np.unique(all_data['trial_type']):
                for block in np.unique(all_data['block']):

                    # Get indixes for this sid, this phase, this trial_type, and this block
                    sub_idx = (
                        all_data['sid'] == sid) & (
                        all_data['phase'] == phase) & (
                        all_data['trial_type'] == trial_type) & (
                        all_data['block'] == block
                    )

                    if np.sum(sub_idx) > 0:

                        # Add z-scored RTs (z-scored with this subset)
                        all_data.loc[sub_idx, 'z_rt'] = zscore(all_data.loc[sub_idx, 'rt'])
                        all_data.loc[np.invert(np.isnan(all_data['z_rt']))]

                        # Add RT zigzag
                        for trial in np.unique(all_data['trial']):
                            sub_idxx = sub_idx & (all_data['trial'] == trial)
                            if np.sum(sub_idxx) == 4:
                                all_data.loc[sub_idxx, 'rt_zigzag'] = zigzag(all_data.loc[sub_idxx, 'z_rt'].values)
                                
                    # Print out which parts of the data were missing
                            else:
                                if verbose:
                                    print('\t\tsid {}, phase {}, trial_type {}, block {}, trial {} has {} subtrials.'
                                          .format(sid, phase, trial_type, block, trial, np.sum(sub_idxx)))
                    else:
                        if verbose:
                            print('\tsid {}, phase {}, trial_type {}, block {} does not exist.'.format(sid, phase, trial_type, block))
                        
# Use
add_zrt_and_rtzigzag(all_data)

In [None]:
sub_dat = all_data.loc[all_data['sid'].isin(np.arange(10))]

In [None]:
g = (gg.ggplot(all_data, gg.aes('subtrial', 'z_rt'))
     + gg.stat_summary()
     + gg.facet_grid('phase ~ trial_type')
    )
g

In [None]:
g = (gg.ggplot(all_data, gg.aes('block', 'rt_zigzag', color='phase'))
     + gg.stat_summary()
     + gg.stat_summary(geom='line')
     + gg.facet_grid('phaseNum ~ trial_type')
    )
g

In [None]:
g = (gg.ggplot(all_data, gg.aes('star_iteration', 'rt_zigzag', color='phase'))
     + gg.stat_summary()
     + gg.stat_summary(geom='line')
     + gg.facet_grid('phaseNum ~ trial_type')
    )
g

In [None]:
g = (gg.ggplot(all_data, gg.aes('acc', 'rt_zigzag', color='phase'))
     + gg.stat_summary(geom='pointrange')
     + gg.facet_grid('phaseNum ~ trial_type')
    )
g

In [None]:
id_cols = ['sid', 'phase', 'trial_type']
dat = all_data.groupby(id_cols).aggregate('mean').reset_index()[id_cols + ['rt_zigzag']]
dat2 = pd.merge(
    dat.loc[dat['phase'] == 'low'].drop(columns=['phase']),
    dat.loc[dat['phase'] == 'high'].drop(columns=['phase']),
    on=['sid', 'trial_type'],
    suffixes=['_low', '_high']
)
dat2['rt_zigzag_low_minus_high'] = dat2['rt_zigzag_low'] - dat2['rt_zigzag_high']
dat2

In [None]:
g = (gg.ggplot(dat2, gg.aes('trial_type', 'rt_zigzag_low_minus_high'))
     + gg.stat_summary()
    )
g

In [None]:
id_cols = ['sid', 'trial_type', 'block', 'goal_star']
interest_cols = ['rt_zigzag', 'acc']
dat = all_data.groupby(id_cols + ['phase']).aggregate('mean').reset_index()[id_cols + ['phase'] + interest_cols]
dat2 = pd.merge(
    dat.loc[dat['phase'] == 'low'].drop(columns=['phase']),
    dat.loc[dat['phase'] == 'high'].drop(columns=['phase']),
    on=id_cols,
    suffixes=['_low', '_high']
)
dat2['rt_zigzag_low_minus_high'] = dat2['rt_zigzag_low'] - dat2['rt_zigzag_high']
dat2

In [None]:
g = (gg.ggplot(dat, gg.aes('block', 'rt_zigzag', color='phase'))
     + gg.stat_summary()
     + gg.stat_summary(geom='line')
     + gg.facet_grid(' ~ trial_type')
    )
g

In [None]:
g = (gg.ggplot(dat2, gg.aes('block', 'rt_zigzag_low_minus_high'))
     + gg.stat_summary()
     + gg.stat_summary(geom='line')
     + gg.geom_hline(yintercept=0, linetype='dotted')
     + gg.facet_grid('~ trial_type')
    )
g

In [None]:
g = (gg.ggplot(dat, gg.aes('rt_zigzag', 'acc', color='factor(sid)', shape='factor(goal_star)'))
     + gg.geom_point()
     + gg.geom_line(gg.aes(group='factor(sid)'), alpha=0.2)
     + gg.geom_smooth(group=1, color='black')
     + gg.facet_grid('phase ~ trial_type + goal_star')
    )
print(g)
# g.save(os.path.join(plot_dir, '01_RtzigzagAcc_0.png'))

# g2 = g
# g2.data = rt_wide_block.loc[rt_wide_block.acc > 0]
# print(g2)
# g2.save(os.path.join(plot_dir, '01_RtzigzagAcc.png'))

In [None]:
all_data[interesting_cols[:-2] + ['z_rt', 'rt_zigzag']]

In [None]:
all_data.loc[(np.isnan(all_data['z_rt']))]

In [None]:
(gg.ggplot(sub_dat, gg.aes('subtrial', 'z_rt'))
 + gg.geom_point()
 + gg.stat_summary(geom='bar')
)

In [None]:
all_data[]
all_data.loc[all_data['subtrial'] == 0, 'rt']

In [None]:
rt_wide_block

In [None]:
g = (gg.ggplot(rt_wide, gg.aes('rt_zigzag', 'acc', color='factor(sid)'))
     + gg.geom_point()
     + gg.geom_smooth(group=1)
     + gg.facet_grid('phase ~ trial_type')
    )
g.save(os.path.join(plot_dir, '01_CorrelationRtzigzagPerformance.png'))
print(g)

## Individuals

In [None]:
def plot_RTOverSubtrialDetails(dat, suf=''):
    
    g = (gg.ggplot(dat, gg.aes('subtrial', 'rt', color='factor(sid)', group='sid', linetype='chance_performer'))
         + gg.stat_summary(position=gg.position_dodge(width=0.2))
         + gg.stat_summary(position=gg.position_dodge(width=0.2), geom='line')
         + gg.facet_grid('phase ~ phaseNum + trial_type')
        )
    g.save(os.path.join(plot_dir, '01_RTOverSubtrial_Details{}.png'.format(suf)))
    
    return g

# Use
rts_all = plot_RTOverSubtrialDetails(all_data, '_all')
plot_RTOverSubtrialDetails(incl_data)

## RT pattern || behavior on a single-block level

In [None]:
gg.options.figure_size = (10, 7)
g = (gg.ggplot(rt_wide_block, gg.aes('block', 'rt_zigzag', color='factor(sid)', shape='factor(goal_star)', linetype='chance_performer'))
     + gg.geom_point(gg.aes(size='acc'), position=gg.position_dodge(width=0.5))
     + gg.geom_line(gg.aes(group='factor(sid)'), position=gg.position_dodge(width=0.5))
     + gg.facet_grid('phase ~ trial_type')
    )
g.save(os.path.join(plot_dir, '01_RtzigzagOverBlocks.png'))
print(g)
gg.options.figure_size = default_figure_size

In [None]:
g = (gg.ggplot(rt_wide_block, gg.aes('acc', 'rt_zigzag', color='factor(sid)', shape='factor(goal_star)'))
     + gg.geom_point()
     + gg.geom_line(gg.aes(group='factor(sid)'), alpha=0.2)
     + gg.geom_smooth(group=1, color='black')
     + gg.facet_grid('phase ~ trial_type')
    )
print(g)
g.save(os.path.join(plot_dir, '01_RtzigzagAcc_0.png'))

g2 = g
g2.data = rt_wide_block.loc[rt_wide_block.acc > 0]
print(g2)
g2.save(os.path.join(plot_dir, '01_RtzigzagAcc.png'))

In [None]:
# RT zigzag supports learning middle-layer items: no zigzag = no middle-layer items
# lots of zigzag = intermediate performance (learning); no zigzag = perfect performance (no distrimination)
gll = g + gg.aes(x='bool_middle_item_lowRulesLearnOnly')
gll.save(os.path.join(plot_dir, '01_RtzigzagLoWRulesLearn.png'))
gll

In [None]:
# ???
glt = g + gg.aes(x='bool_middle_item_lowRulesTransferOnly')
glt.save(os.path.join(plot_dir, '01_RtzigzagLoWRulesTransfer.png'))
glt

In [None]:
# Good star performance comes AFTER learning middle-layer items => when RT zigzag is gone completely.
ghl = g + gg.aes(x='bool_unlocked_star_highRulesLearnOnly')
ghl.save(os.path.join(plot_dir, '01_RtzigzagHighRulesLearn.png'))
ghl

In [None]:
# High-level transfer does NOT require relearning middle-layer sequences
# => RT zigzag occurs in the beginning, when trying out different middle-layer sequences, and then dies out as we learn them
ght = g + gg.aes(x='bool_unlocked_star_highRulesTransferOnly')
ght.save(os.path.join(plot_dir, '01_RtzigzagHighRulesTransfer.png'))
ght