In [1]:
try:
    import davos
except:
    %pip install davos
    import davos

davos.config.suppress_stdout = False

In [2]:
smuggle pandas as pd                                         # pip: pandas==1.5.1
smuggle numpy as np                                          # pip: numpy==1.22.3
smuggle seaborn as sns                                       # pip: seaborn==0.12.1
smuggle dill as pickle                                       # pip: dill==0.3.6

smuggle h5py                                                 # pip: h5py==3.7.0
smuggle pathos                                               # pip: pathos==0.3.0
smuggle quail                                                # pip: quail==0.2.2
smuggle requests                                             # pip: requests==2.28.1
smuggle jinja2                                                # pip: jinja2==3.1.2
smuggle os
smuggle pathlib
smuggle warnings
smuggle string
smuggle random as rand
from copy smuggle copy

from tqdm smuggle tqdm                                       # pip: tqdm==4.64.1
from matplotlib smuggle pyplot as plt                        # pip: matplotlib==3.6.2
from matplotlib.ticker smuggle MaxNLocator
from pathos.multiprocessing smuggle ProcessingPool as Pool   # pip: pathos==0.3.0
from multiprocessing smuggle cpu_count                       # pip: multiprocess==0.70.14
from sklearn.decomposition smuggle IncrementalPCA as PCA     # pip: scikit-learn==1.1.3
from scipy smuggle stats                                     # pip: scipy==1.10.0
from statsmodels.stats.multitest import multipletests        # pip: statsmodels==0.14.0

# local functions
from dataloader import datadir, grouping, feature_groupings, descriptions, sort_by_grouping, fetch_data
from analyze import analyze_data, recover_fingerprint_features, organize_by_listgroup, random, adaptive, non_adaptive_exclude_random, \
                    select_conds, select_lists, filter, get_diffs, stack_diffs, pnr_matrix, accuracy2df, adaptive_listnum2cond, \
                    clustering_matrices, average_by_cond, rename_features, fingerprint2temporal, get_boundaries, \
                    recall_accuracy_near_boundaries, results, results_by_list, analyses, listgroups, orders, ttest, merge_results, \
                    create_clustering_df

Set seeds for deterministic output (affects bootstrap-estimated confidence intervals)

In [3]:
np.random.seed(42)
rand.seed(42)

Set up functions for creating and saving tables of $t$-tests

In [4]:
table_dir = pathlib.Path(datadir).parent.parent.joinpath('paper', 'tables')
table_dir.mkdir(exist_ok=True)

class TTestTable(object):
    @staticmethod
    def _rename(name):
        replace = {'firstLetter': 'first letter', 'first_letter': 'first letter', 'wordLength': 'length', 'pos': 'location',
                   'temporal': 'temp', 'accuracy': 'acc', 'clustering': 'clust',
                   'semantic': 'sem', 'lexicographic': 'lex', 'visual': 'vis',
                   'category': 'cat', 'size': 'sz', 'length': 'len', 'location': 'loc', 'color': 'clr', 'first letter': '1\\textsuperscript{st} ltr'}
        
        name = name.lower()
        for k, v in replace.items():
            name = name.replace(k.lower(), v.lower())        
        return name.capitalize()

    def __init__(self):
        self.table = pd.DataFrame(columns=['$t$-value', 'df', 'Cohen\'s $d$', '$p$-value (raw)', '$p$-value (corrected)', '95\% CI (lower bound)', '95\% CI (upper bound)'])
    
    def add_stat(self, name, results, **kwargs):
        t, df, d, p_raw, ci_lower, ci_upper = results
        self.table.loc[TTestTable._rename(name)] = [t, df, d, p_raw, None, ci_lower, ci_upper]
        self.correct_pvalues(**kwargs)
    
    def correct_pvalues(self, alpha=0.05, method='fdr_bh'):
        _, p_corrected, _, _ = multipletests(self.table['$p$-value (raw)'], alpha=alpha, method=method)
        self.table['$p$-value (corrected)'] = p_corrected
    
    def to_latex(self, fname=None, **kwargs):        
        self.correct_pvalues(**kwargs)
        
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            return self.table.to_latex(buf=fname, escape=False, formatters={
                '$t$-value': lambda x: f'{x:.3f}',
                'df': lambda x: f'{x:.0f}',
                'Cohen\'s $d$': lambda x: f'{x:.3f}',
                '$p$-value (raw)': lambda x: f'{x:.3f}' if x > 0.001 else '$< 0.001$',
                '$p$-value (corrected)': lambda x: f'{x:.3f}' if x > 0.001 else '$< 0.001$',
                '95\% CI (lower bound)': lambda x: f'{x:.3f}',
                '95\% CI (upper bound)': lambda x: f'{x:.3f}'
            })

Keep track of abbreviations used in tables

In [5]:
abbreviations = {'temporal': 'temp', 'accuracy': 'acc', 'clustering': 'clust',
                 'category': 'cat', 'size': 'sz', 'length': 'len',
                 'location': 'loc', 'color': 'clr', 
                 'first letter': '1\\textsuperscript{st} ltr'}

# reverse the dictionary and turn it into a dataframe

abbreviations = pd.DataFrame.from_dict({v: k for k, v in abbreviations.items()}, orient='index').reset_index()
abbreviations.columns = ['Abbreviation', 'Description']
abbreviations

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    abbreviations.to_latex(buf=table_dir.joinpath('abbreviations.tex'), index=False, escape=False)

# Performance on *feature-rich* versus *reduced* lists:
  - accuracy
  - temporal clustering
  - non-visual feature-based clustering (category, size, length, first letter)

In [6]:
feature_rich_vs_reduced = TTestTable()

print('Accuracy for feature-rich vs. reduced (all lists):')
feature_rich_vs_reduced.add_stat('Accuracy', ttest(results['accuracy']['feature-rich'], results['accuracy']['reduced']))

Accuracy for feature-rich vs. reduced (all lists):
t(126) = -0.290,~p = 0.772,~d = -0.051,~\mathrm{CI} = [-2.387,~1.768]


In [7]:
print('Temporal clustering for feature-rich vs. reduced (all lists):')
feature_rich_vs_reduced.add_stat('Temporal clustering', ttest(results['fingerprint']['feature-rich'], results['fingerprint']['reduced'], x_col='temporal', y_col='temporal'))

Temporal clustering for feature-rich vs. reduced (all lists):
t(126) = 10.632,~p < 0.001,~d = 1.882,~\mathrm{CI} = [7.786,~14.386]


In [8]:
features = ['category', 'size', 'wordLength', 'firstLetter']
for i, f in enumerate(features):
    print(f'{f} clustering for feature-rich vs. reduced (all lists):')
    feature_rich_vs_reduced.add_stat(f'{f} clustering', ttest(results['fingerprint']['feature-rich'], results['fingerprint']['reduced'], x_col=f, y_col=f))

    if i < len(features) - 1:
        print('\n')

category clustering for feature-rich vs. reduced (all lists):
t(126) = 10.148,~p < 0.001,~d = 1.796,~\mathrm{CI} = [7.324,~13.778]


size clustering for feature-rich vs. reduced (all lists):
t(126) = 12.033,~p < 0.001,~d = 2.129,~\mathrm{CI} = [9.030,~15.918]


wordLength clustering for feature-rich vs. reduced (all lists):
t(126) = 10.720,~p < 0.001,~d = 1.897,~\mathrm{CI} = [7.442,~15.174]


firstLetter clustering for feature-rich vs. reduced (all lists):
t(126) = 6.679,~p < 0.001,~d = 1.182,~\mathrm{CI} = [4.490,~9.611]


In [9]:
feature_rich_vs_reduced.to_latex(table_dir.joinpath('feature_rich_vs_reduced.tex'), alpha=0.05, method='fdr_bh')

# Performance on {*feature-rich*, *reduced*} lists versus *reduced ({early, late})* lists
We'll compare early and late lists separately (e.g., early to early, late to late, early to late, etc.)

Metrics:
  - accuracy
  - temporal clustering
  - non-visual feature-based clustering (category, size, length, first letter)

## Early vs. late (all conditions and metrics)

### Accuracy

In [10]:
early_vs_late_fr = TTestTable()     # feature rich
early_vs_late_r = TTestTable()      # reduced 
early_vs_late_early = TTestTable()  # reduced (early)
early_vs_late_late = TTestTable()   # reduced (late)

print('Accuracy for feature-rich (early lists) vs. feature-rich (late lists):')
early_vs_late_fr.add_stat('Accuracy', ttest(results['accuracy']['feature-rich'], results['accuracy']['feature-rich'], x_lists='Early', y_lists='Late', independent_sample=False))

Accuracy for feature-rich (early lists) vs. feature-rich (late lists):


t(66) = 4.553,~p < 0.001,~d = 0.233,~\mathrm{CI} = [2.427,~7.262]


In [11]:
print('Accuracy for reduced (early lists) vs. reduced (late lists):')
early_vs_late_r.add_stat('Accuracy', ttest(results['accuracy']['reduced'], results['accuracy']['reduced'], x_lists='Early', y_lists='Late', independent_sample=False))

Accuracy for reduced (early lists) vs. reduced (late lists):
t(60) = 2.434,~p = 0.018,~d = 0.134,~\mathrm{CI} = [0.493,~4.910]


In [12]:
print('Accuracy for reduced (early) (early lists) vs. reduced (early) (late lists):')
early_vs_late_early.add_stat('Accuracy', ttest(results['accuracy']['reduced (early)'], results['accuracy']['reduced (early)'], x_lists='Early', y_lists='Late', independent_sample=False))

Accuracy for reduced (early) (early lists) vs. reduced (early) (late lists):
t(41) = 1.499,~p = 0.141,~d = 0.098,~\mathrm{CI} = [-0.345,~3.579]


In [13]:
print('Accuracy for reduced (late) (early lists) vs. reduced (late) (late lists):')
early_vs_late_late.add_stat('Accuracy', ttest(results['accuracy']['reduced (late)'], results['accuracy']['reduced (late)'], x_lists='Early', y_lists='Late', independent_sample=False))

Accuracy for reduced (late) (early lists) vs. reduced (late) (late lists):
t(40) = 1.462,~p = 0.152,~d = 0.121,~\mathrm{CI} = [-0.376,~2.993]


### Temporal clustering

In [14]:
print('Temporal clustering for feature-rich (early lists) vs. feature-rich (late lists):')
early_vs_late_fr.add_stat('Temporal clustering', ttest(results['fingerprint']['feature-rich'], results['fingerprint']['feature-rich'], x_col='temporal', y_col='temporal', x_lists='Early', y_lists='Late', independent_sample=False))

Temporal clustering for feature-rich (early lists) vs. feature-rich (late lists):
t(66) = 2.268,~p = 0.027,~d = 0.181,~\mathrm{CI} = [0.437,~4.425]


In [15]:
print('Temporal clustering for reduced (early lists) vs. reduced (late lists):')
early_vs_late_r.add_stat('Temporal clustering', ttest(results['fingerprint']['reduced'], results['fingerprint']['reduced'], x_col='temporal', y_col='temporal', x_lists='Early', y_lists='Late', independent_sample=False))

Temporal clustering for reduced (early lists) vs. reduced (late lists):
t(60) = 0.986,~p = 0.328,~d = 0.061,~\mathrm{CI} = [-0.897,~3.348]


In [16]:
print('Temporal clustering for reduced (early) (early lists) vs. reduced (early) (late lists):')
early_vs_late_early.add_stat('Temporal clustering', ttest(results['fingerprint']['reduced (early)'], results['fingerprint']['reduced (early)'], x_col='temporal', y_col='temporal', x_lists='Early', y_lists='Late', independent_sample=False))

Temporal clustering for reduced (early) (early lists) vs. reduced (early) (late lists):


t(41) = 0.857,~p = 0.396,~d = 0.068,~\mathrm{CI} = [-1.012,~2.896]


In [17]:
print('Temporal clustering for reduced (late) (early lists) vs. reduced (late) (late lists):')
early_vs_late_late.add_stat('Temporal clustering', ttest(results['fingerprint']['reduced (late)'], results['fingerprint']['reduced (late)'], x_col='temporal', y_col='temporal', x_lists='Early', y_lists='Late', independent_sample=False))

Temporal clustering for reduced (late) (early lists) vs. reduced (late) (late lists):
t(40) = 1.244,~p = 0.221,~d = 0.128,~\mathrm{CI} = [-0.894,~3.088]


### Non-visual feature based clustering

In [18]:
for i, f in enumerate(features):
    print(f'{f} clustering for feature-rich (early lists) vs. feature-rich (late lists):')
    early_vs_late_fr.add_stat(f'{f} clustering', ttest(results['fingerprint']['feature-rich'], results['fingerprint']['feature-rich'], x_col=f, y_col=f, x_lists='Early', y_lists='Late', independent_sample=False))

    if i < len(features) - 1:
        print('\n')

category clustering for feature-rich (early lists) vs. feature-rich (late lists):


t(66) = 3.684,~p < 0.001,~d = 0.220,~\mathrm{CI} = [1.733,~5.732]


size clustering for feature-rich (early lists) vs. feature-rich (late lists):
t(66) = 1.629,~p = 0.108,~d = 0.100,~\mathrm{CI} = [-0.207,~3.905]


wordLength clustering for feature-rich (early lists) vs. feature-rich (late lists):
t(66) = -0.100,~p = 0.921,~d = -0.010,~\mathrm{CI} = [-2.217,~1.899]


firstLetter clustering for feature-rich (early lists) vs. feature-rich (late lists):
t(66) = -0.412,~p = 0.681,~d = -0.045,~\mathrm{CI} = [-2.461,~1.645]


In [19]:
for i, f in enumerate(features):
    print(f'{f} clustering for reduced (early lists) vs. reduced (late lists):')
    early_vs_late_r.add_stat(f'{f} clustering', ttest(results['fingerprint']['reduced'], results['fingerprint']['reduced'], x_col=f, y_col=f, x_lists='Early', y_lists='Late', independent_sample=False))

    if i < len(features) - 1:
        print('\n')

category clustering for reduced (early lists) vs. reduced (late lists):
t(60) = 2.755,~p = 0.008,~d = 0.177,~\mathrm{CI} = [0.761,~5.189]


size clustering for reduced (early lists) vs. reduced (late lists):
t(60) = 3.081,~p = 0.003,~d = 0.201,~\mathrm{CI} = [1.210,~5.326]


wordLength clustering for reduced (early lists) vs. reduced (late lists):
t(60) = 3.762,~p < 0.001,~d = 0.261,~\mathrm{CI} = [1.604,~6.821]


firstLetter clustering for reduced (early lists) vs. reduced (late lists):
t(60) = 1.721,~p = 0.090,~d = 0.175,~\mathrm{CI} = [-0.138,~4.098]


In [20]:
for i, f in enumerate(features):
    print(f'{f} clustering for reduced (early) (early lists) vs. reduced (early) (late lists):')
    early_vs_late_early.add_stat(f'{f} clustering', ttest(results['fingerprint']['reduced (early)'], results['fingerprint']['reduced (early)'], x_col=f, y_col=f, x_lists='Early', y_lists='Late', independent_sample=False))

    if i < len(features) - 1:
        print('\n')

category clustering for reduced (early) (early lists) vs. reduced (early) (late lists):
t(41) = 0.707,~p = 0.484,~d = 0.068,~\mathrm{CI} = [-1.314,~2.830]


size clustering for reduced (early) (early lists) vs. reduced (early) (late lists):
t(41) = 0.803,~p = 0.427,~d = 0.079,~\mathrm{CI} = [-1.142,~2.953]


wordLength clustering for reduced (early) (early lists) vs. reduced (early) (late lists):
t(41) = 0.461,~p = 0.648,~d = 0.060,~\mathrm{CI} = [-1.545,~2.462]


firstLetter clustering for reduced (early) (early lists) vs. reduced (early) (late lists):
t(41) = 0.781,~p = 0.439,~d = 0.101,~\mathrm{CI} = [-1.039,~2.881]


In [21]:
for i, f in enumerate(features):
    print(f'{f} clustering for reduced (late) (early lists) vs. reduced (late) (late lists):')
    early_vs_late_late.add_stat(f'{f} clustering', ttest(results['fingerprint']['reduced (late)'], results['fingerprint']['reduced (late)'], x_col=f, y_col=f, x_lists='Early', y_lists='Late', independent_sample=False))

    if i < len(features) - 1:
        print('\n')

category clustering for reduced (late) (early lists) vs. reduced (late) (late lists):
t(40) = -0.101,~p = 0.920,~d = -0.009,~\mathrm{CI} = [-2.307,~1.776]


size clustering for reduced (late) (early lists) vs. reduced (late) (late lists):
t(40) = 0.555,~p = 0.582,~d = 0.058,~\mathrm{CI} = [-1.444,~2.274]


wordLength clustering for reduced (late) (early lists) vs. reduced (late) (late lists):
t(40) = 1.482,~p = 0.146,~d = 0.126,~\mathrm{CI} = [-0.444,~3.743]


firstLetter clustering for reduced (late) (early lists) vs. reduced (late) (late lists):
t(40) = -0.143,~p = 0.887,~d = -0.017,~\mathrm{CI} = [-2.204,~1.830]


In [22]:
early_vs_late_fr.to_latex(table_dir.joinpath('early_vs_late_fr.tex'), alpha=0.05, method='fdr_bh')
early_vs_late_r.to_latex(table_dir.joinpath('early_vs_late_r.tex'), alpha=0.05, method='fdr_bh')
early_vs_late_early.to_latex(table_dir.joinpath('early_vs_late_early.tex'), alpha=0.05, method='fdr_bh')
early_vs_late_late.to_latex(table_dir.joinpath('early_vs_late_late.tex'), alpha=0.05, method='fdr_bh')

## feature-rich vs. reduced ({early, late})

### Accuracy

In [23]:
feature_rich_vs_reduced_early = TTestTable()
feature_rich_vs_reduced_late = TTestTable()

print('Accuracy for feature-rich vs.reduced (early) (all lists):')
feature_rich_vs_reduced_early.add_stat('Accuracy', ttest(results['accuracy']['feature-rich'], results['accuracy']['reduced (early)']))

Accuracy for feature-rich vs.reduced (early) (all lists):
t(107) = -2.230,~p = 0.028,~d = -0.439,~\mathrm{CI} = [-4.252,~-0.229]


In [24]:
print('Accuracy for feature-rich vs. reduced (late) (all lists):')
feature_rich_vs_reduced_late.add_stat('Accuracy', ttest(results['accuracy']['feature-rich'], results['accuracy']['reduced (late)']))

Accuracy for feature-rich vs. reduced (late) (all lists):
t(106) = -0.638,~p = 0.525,~d = -0.126,~\mathrm{CI} = [-2.720,~1.362]


### Temporal clustering

In [25]:
print('Temporal clustering for feature-rich vs. reduced (early) (all lists):')
feature_rich_vs_reduced_early.add_stat('Temporal clustering', ttest(results['fingerprint']['feature-rich'], results['fingerprint']['reduced (early)'], x_col='temporal', y_col='temporal'))

Temporal clustering for feature-rich vs. reduced (early) (all lists):
t(107) = -1.379,~p = 0.171,~d = -0.271,~\mathrm{CI} = [-3.319,~0.474]


In [26]:
print('Temporal clustering for feature-rich vs. reduced (late) (all lists):')
feature_rich_vs_reduced_late.add_stat('Temporal clustering', ttest(results['fingerprint']['feature-rich'], results['fingerprint']['reduced (late)'], x_col='temporal', y_col='temporal'))

Temporal clustering for feature-rich vs. reduced (late) (all lists):
t(106) = -0.535,~p = 0.593,~d = -0.106,~\mathrm{CI} = [-2.552,~1.237]


### Non-visual feature based clustering

In [27]:
features = ['category', 'size', 'wordLength', 'firstLetter']
for i, f in enumerate(features):
    print(f'{f} clustering for feature-rich vs. reduced (early) (all lists):')
    feature_rich_vs_reduced_early.add_stat(f'{f} clustering', ttest(results['fingerprint']['feature-rich'], results['fingerprint']['reduced (early)'], x_col=f, y_col=f))

    if i < len(features) - 1:
        print('\n')

category clustering for feature-rich vs. reduced (early) (all lists):
t(107) = 0.013,~p = 0.989,~d = 0.003,~\mathrm{CI} = [-2.003,~2.102]


size clustering for feature-rich vs. reduced (early) (all lists):
t(107) = -0.349,~p = 0.728,~d = -0.069,~\mathrm{CI} = [-2.244,~1.641]


wordLength clustering for feature-rich vs. reduced (early) (all lists):
t(107) = -0.581,~p = 0.563,~d = -0.114,~\mathrm{CI} = [-2.328,~1.291]


firstLetter clustering for feature-rich vs. reduced (early) (all lists):
t(107) = 0.636,~p = 0.526,~d = 0.125,~\mathrm{CI} = [-1.291,~2.940]


In [28]:
features = ['category', 'size', 'wordLength', 'firstLetter']
for i, f in enumerate(features):
    print(f'{f} clustering for feature-rich vs. reduced (late) (all lists):')
    feature_rich_vs_reduced_late.add_stat(f'{f} clustering', ttest(results['fingerprint']['feature-rich'], results['fingerprint']['reduced (late)'], x_col=f, y_col=f))

    if i < len(features) - 1:
        print('\n')

category clustering for feature-rich vs. reduced (late) (all lists):
t(106) = -1.345,~p = 0.181,~d = -0.267,~\mathrm{CI} = [-3.525,~0.660]


size clustering for feature-rich vs. reduced (late) (all lists):
t(106) = -1.441,~p = 0.153,~d = -0.286,~\mathrm{CI} = [-3.557,~0.382]


wordLength clustering for feature-rich vs. reduced (late) (all lists):
t(106) = -1.261,~p = 0.210,~d = -0.250,~\mathrm{CI} = [-3.611,~0.669]


firstLetter clustering for feature-rich vs. reduced (late) (all lists):
t(106) = 0.939,~p = 0.350,~d = 0.186,~\mathrm{CI} = [-1.018,~2.949]


In [29]:
feature_rich_vs_reduced_early.to_latex(table_dir.joinpath('feature_rich_vs_reduced_early.tex'), alpha=0.05, method='fdr_bh')
feature_rich_vs_reduced_late.to_latex(table_dir.joinpath('feature_rich_vs_reduced_late.tex'), alpha=0.05, method='fdr_bh')

## Reduced vs. reduced ({early, late})

### Accuracy

In [30]:
reduced_vs_reduced_early = TTestTable()
reduced_vs_reduced_late = TTestTable()

print('Accuracy for reduced vs. reduced (early) (all lists):')
reduced_vs_reduced_early.add_stat('Accuracy', ttest(results['accuracy']['reduced'], results['accuracy']['reduced (early)']))

Accuracy for reduced vs. reduced (early) (all lists):
t(101) = -2.045,~p = 0.043,~d = -0.410,~\mathrm{CI} = [-3.826,~0.112]


In [31]:
print('Accuracy for reduced vs. reduced (late) (all lists):')
reduced_vs_reduced_late.add_stat('Accuracy', ttest(results['accuracy']['reduced'], results['accuracy']['reduced (late)']))

Accuracy for reduced vs. reduced (late) (all lists):
t(100) = -0.407,~p = 0.685,~d = -0.082,~\mathrm{CI} = [-2.477,~1.626]


### Temporal clustering

In [32]:
print('Temporal clustering for reduced vs. reduced (early) (all lists):')
reduced_vs_reduced_early.add_stat('Temporal clustering', ttest(results['fingerprint']['reduced'], results['fingerprint']['reduced (early)'], x_col='temporal', y_col='temporal'))

Temporal clustering for reduced vs. reduced (early) (all lists):
t(101) = -10.689,~p < 0.001,~d = -2.143,~\mathrm{CI} = [-13.479,~-8.512]


In [33]:
print('Temporal clustering for reduced vs. reduced (late) (all lists):')
reduced_vs_reduced_late.add_stat('Temporal clustering', ttest(results['fingerprint']['reduced'], results['fingerprint']['reduced (late)'], x_col='temporal', y_col='temporal'))

Temporal clustering for reduced vs. reduced (late) (all lists):
t(100) = -9.885,~p < 0.001,~d = -1.996,~\mathrm{CI} = [-14.701,~-6.499]


### Non-visual feature based clustering

In [34]:
features = ['category', 'size', 'wordLength', 'firstLetter']
for i, f in enumerate(features):
    print(f'{f} clustering for reduced vs. reduced (early) (all lists):')
    reduced_vs_reduced_early.add_stat(f'{f} clustering', ttest(results['fingerprint']['reduced'], results['fingerprint']['reduced (early)'], x_col=f, y_col=f))

    if i < len(features) - 1:
        print('\n')

category clustering for reduced vs. reduced (early) (all lists):
t(101) = -9.538,~p < 0.001,~d = -1.912,~\mathrm{CI} = [-12.332,~-7.457]


size clustering for reduced vs. reduced (early) (all lists):
t(101) = -12.222,~p < 0.001,~d = -2.451,~\mathrm{CI} = [-15.311,~-9.954]


wordLength clustering for reduced vs. reduced (early) (all lists):
t(101) = -10.620,~p < 0.001,~d = -2.129,~\mathrm{CI} = [-13.902,~-8.239]


firstLetter clustering for reduced vs. reduced (early) (all lists):
t(101) = -5.213,~p < 0.001,~d = -1.045,~\mathrm{CI} = [-7.290,~-3.403]


In [35]:
features = ['category', 'size', 'wordLength', 'firstLetter']
for i, f in enumerate(features):
    print(f'{f} clustering for reduced vs. reduced (late) (all lists):')
    reduced_vs_reduced_late.add_stat(f'{f} clustering', ttest(results['fingerprint']['reduced'], results['fingerprint']['reduced (late)'], x_col=f, y_col=f))

    if i < len(features) - 1:
        print('\n')

category clustering for reduced vs. reduced (late) (all lists):
t(100) = -10.436,~p < 0.001,~d = -2.107,~\mathrm{CI} = [-15.607,~-6.940]


size clustering for reduced vs. reduced (late) (all lists):
t(100) = -12.413,~p < 0.001,~d = -2.507,~\mathrm{CI} = [-18.413,~-8.398]


wordLength clustering for reduced vs. reduced (late) (all lists):
t(100) = -9.672,~p < 0.001,~d = -1.953,~\mathrm{CI} = [-14.476,~-6.437]


firstLetter clustering for reduced vs. reduced (late) (all lists):
t(100) = -4.555,~p < 0.001,~d = -0.920,~\mathrm{CI} = [-7.332,~-2.538]


In [36]:
reduced_vs_reduced_early.to_latex(table_dir.joinpath('reduced_vs_reduced_early.tex'), alpha=0.05, method='fdr_bh')
reduced_vs_reduced_late.to_latex(table_dir.joinpath('reduced_vs_reduced_late.tex'), alpha=0.05, method='fdr_bh')

# Order manipulation analyses

- When lists are sorted by a given feature, how is memory performance affected (relative to feature-rich -- early lists only)?
- Do some order manipulations matter more than others?  E.g. compare semantic vs. lexicographic vs. visual -- early lists only

### Accuracy

In [37]:
order_manip_vs_feature_rich_accuracy_early = TTestTable()

print('Accuracy for category vs. feature-rich (early lists):')
order_manip_vs_feature_rich_accuracy_early.add_stat('Category', ttest(results['accuracy']['category'], results['accuracy']['feature-rich'], x_lists='Early', y_lists='Early'))

print('\nAccuracy for size vs. feature-rich (early lists):')
order_manip_vs_feature_rich_accuracy_early.add_stat('Size', ttest(results['accuracy']['size'], results['accuracy']['feature-rich'], x_lists='Early', y_lists='Early'))

print('\nAccuracy for length vs. feature-rich (early lists):')
order_manip_vs_feature_rich_accuracy_early.add_stat('Length', ttest(results['accuracy']['length'], results['accuracy']['feature-rich'], x_lists='Early', y_lists='Early'))

print('\nAccuracy for first letter vs. feature-rich (early lists):')
order_manip_vs_feature_rich_accuracy_early.add_stat('First letter', ttest(results['accuracy']['first letter'], results['accuracy']['feature-rich'], x_lists='Early', y_lists='Early'))

print('\nAccuracy for color vs. feature-rich (early lists):')
order_manip_vs_feature_rich_accuracy_early.add_stat('Color', ttest(results['accuracy']['color'], results['accuracy']['feature-rich'], x_lists='Early', y_lists='Early'))

print('\nAccuracy for location vs. feature-rich (early lists):')
order_manip_vs_feature_rich_accuracy_early.add_stat('Location', ttest(results['accuracy']['location'], results['accuracy']['feature-rich'], x_lists='Early', y_lists='Early'))

order_manip_vs_feature_rich_accuracy_early.to_latex(table_dir.joinpath('order_manip_vs_feature_rich_accuracy_early.tex'), alpha=0.05, method='fdr_bh')

Accuracy for category vs. feature-rich (early lists):
t(95) = 3.034,~p = 0.003,~d = 0.667,~\mathrm{CI} = [1.048,~5.113]

Accuracy for size vs. feature-rich (early lists):
t(95) = -1.013,~p = 0.314,~d = -0.223,~\mathrm{CI} = [-3.055,~0.865]

Accuracy for length vs. feature-rich (early lists):
t(95) = -0.550,~p = 0.584,~d = -0.121,~\mathrm{CI} = [-2.368,~1.363]

Accuracy for first letter vs. feature-rich (early lists):
t(95) = -0.690,~p = 0.492,~d = -0.152,~\mathrm{CI} = [-2.663,~1.119]

Accuracy for color vs. feature-rich (early lists):
t(96) = 1.850,~p = 0.067,~d = 0.402,~\mathrm{CI} = [-0.010,~3.712]

Accuracy for location vs. feature-rich (early lists):
t(95) = 0.043,~p = 0.966,~d = 0.010,~\mathrm{CI} = [-1.598,~1.729]


### Temporal clustering

In [38]:
order_manip_vs_feature_rich_temporal_early = TTestTable()

print('Temporal clustering for category vs. feature-rich (early lists):')
order_manip_vs_feature_rich_temporal_early.add_stat('Category', ttest(results['fingerprint']['category'], results['fingerprint']['feature-rich'], x_lists='Early', y_lists='Early', x_col='temporal', y_col='temporal'))

print('\nTemporal clustering for size vs. feature-rich (early lists):')
order_manip_vs_feature_rich_temporal_early.add_stat('Size', ttest(results['fingerprint']['size'], results['fingerprint']['feature-rich'], x_lists='Early', y_lists='Early', x_col='temporal', y_col='temporal'))

print('\nTemporal clustering for length vs. feature-rich (early lists):')
order_manip_vs_feature_rich_temporal_early.add_stat('Length', ttest(results['fingerprint']['length'], results['fingerprint']['feature-rich'], x_lists='Early', y_lists='Early', x_col='temporal', y_col='temporal'))

print('\nTemporal clustering for first letter vs. feature-rich (early lists):')
order_manip_vs_feature_rich_temporal_early.add_stat('First letter', ttest(results['fingerprint']['first letter'], results['fingerprint']['feature-rich'], x_lists='Early', y_lists='Early', x_col='temporal', y_col='temporal'))

print('\nTemporal clustering for color vs. feature-rich (early lists):')
order_manip_vs_feature_rich_temporal_early.add_stat('Color', ttest(results['fingerprint']['color'], results['fingerprint']['feature-rich'], x_lists='Early', y_lists='Early', x_col='temporal', y_col='temporal'))

print('\nTemporal clustering for location vs. feature-rich (early lists):')
order_manip_vs_feature_rich_temporal_early.add_stat('Location', ttest(results['fingerprint']['location'], results['fingerprint']['feature-rich'], x_lists='Early', y_lists='Early', x_col='temporal', y_col='temporal'))

order_manip_vs_feature_rich_temporal_early.to_latex(table_dir.joinpath('order_manip_vs_feature_rich_temporal_early.tex'), alpha=0.05, method='fdr_bh')

Temporal clustering for category vs. feature-rich (early lists):


t(95) = 8.813,~p < 0.001,~d = 1.936,~\mathrm{CI} = [6.793,~11.751]

Temporal clustering for size vs. feature-rich (early lists):
t(95) = 2.630,~p = 0.010,~d = 0.578,~\mathrm{CI} = [0.831,~4.866]

Temporal clustering for length vs. feature-rich (early lists):
t(95) = -1.547,~p = 0.125,~d = -0.340,~\mathrm{CI} = [-3.693,~0.341]

Temporal clustering for first letter vs. feature-rich (early lists):
t(95) = 2.858,~p = 0.005,~d = 0.628,~\mathrm{CI} = [1.031,~4.886]

Temporal clustering for color vs. feature-rich (early lists):
t(96) = -1.339,~p = 0.184,~d = -0.291,~\mathrm{CI} = [-3.238,~0.394]

Temporal clustering for location vs. feature-rich (early lists):
t(95) = 1.705,~p = 0.092,~d = 0.374,~\mathrm{CI} = [-0.155,~3.521]


### Feature-based clustering

In [39]:
conds = ['category', 'size', 'length', 'first letter', 'color', 'location']
features = ['category', 'size', 'wordLength', 'firstLetter', 'color', 'location']

for i, c in enumerate(conds):
    for j, f in enumerate(features):
        print(f'{f} clustering for {c} vs. feature-rich (early lists):')
        ttest(results['fingerprint'][c], results['fingerprint']['feature-rich'], x_col=f, y_col=f, x_lists='Early', y_lists='Early')

        if (j < len(features) - 1) or (i < len(conds) - 1):
            print('\n')
    
    if i < len(conds) - 1:
        print('--- \n')

category clustering for category vs. feature-rich (early lists):
t(95) = 4.429,~p < 0.001,~d = 0.973,~\mathrm{CI} = [2.995,~6.289]


size clustering for category vs. feature-rich (early lists):
t(95) = 3.727,~p < 0.001,~d = 0.819,~\mathrm{CI} = [1.983,~5.750]


wordLength clustering for category vs. feature-rich (early lists):
t(95) = 0.154,~p = 0.878,~d = 0.034,~\mathrm{CI} = [-1.764,~1.928]


firstLetter clustering for category vs. feature-rich (early lists):
t(95) = -1.610,~p = 0.111,~d = -0.354,~\mathrm{CI} = [-3.614,~0.280]


color clustering for category vs. feature-rich (early lists):
t(95) = -0.375,~p = 0.709,~d = -0.082,~\mathrm{CI} = [-2.553,~1.469]


location clustering for category vs. feature-rich (early lists):
t(95) = -0.347,~p = 0.730,~d = -0.076,~\mathrm{CI} = [-2.412,~1.475]


--- 

category clustering for size vs. feature-rich (early lists):
t(95) = 0.330,~p = 0.742,~d = 0.073,~\mathrm{CI} = [-1.719,~2.419]


size clustering for size vs. feature-rich (early lists):
t

## Combine "categories" of features:
  - semantic = category + size
  - lexicographic = length + first letter
  - visual = color + location

In [40]:
groups = {k: v for k, v in feature_groupings.items() if k in ['semantic', 'lexicographic', 'visual']}

merged_results = {}
for k, v in results.items():
    if k not in ['fingerprint', 'accuracy', 'corrected fingerprint']:
        continue
    merged_results[k] = merge_results(v, groups)

## Compare semantic vs. feature-rich

### Accuracy

In [41]:
print('Accuracy for semantic vs. feature-rich (early lists):')
ttest(merged_results['accuracy']['semantic'], results['accuracy']['feature-rich'], x_lists='Early', y_lists='Early');

print('\nAccuracy for lexicographic vs. feature-rich (early lists):')
ttest(merged_results['accuracy']['lexicographic'], results['accuracy']['feature-rich'], x_lists='Early', y_lists='Early');

print('\nAccuracy for visual vs. feature-rich (early lists):')
ttest(merged_results['accuracy']['visual'], results['accuracy']['feature-rich'], x_lists='Early', y_lists='Early');

Accuracy for semantic vs. feature-rich (early lists):
t(125) = 1.197,~p = 0.233,~d = 0.213,~\mathrm{CI} = [-0.753,~3.199]

Accuracy for lexicographic vs. feature-rich (early lists):
t(125) = -0.776,~p = 0.439,~d = -0.138,~\mathrm{CI} = [-2.935,~1.206]

Accuracy for visual vs. feature-rich (early lists):
t(126) = 1.256,~p = 0.212,~d = 0.222,~\mathrm{CI} = [-0.805,~3.035]


In [42]:
print('Accuracy for semantic vs. feature-rich (early lists):')
ttest(merged_results['accuracy']['semantic'], results['accuracy']['feature-rich'], x_lists='Early', y_lists='Early');

print('\nAccuracy for lexicographic vs. feature-rich (early lists):')
ttest(merged_results['accuracy']['lexicographic'], results['accuracy']['feature-rich'], x_lists='Early', y_lists='Early');

print('\nAccuracy for visual vs. feature-rich (early lists):')
ttest(merged_results['accuracy']['visual'], results['accuracy']['feature-rich'], x_lists='Early', y_lists='Early');

Accuracy for semantic vs. feature-rich (early lists):
t(125) = 1.197,~p = 0.233,~d = 0.213,~\mathrm{CI} = [-1.034,~2.991]

Accuracy for lexicographic vs. feature-rich (early lists):
t(125) = -0.776,~p = 0.439,~d = -0.138,~\mathrm{CI} = [-3.002,~1.186]

Accuracy for visual vs. feature-rich (early lists):
t(126) = 1.256,~p = 0.212,~d = 0.222,~\mathrm{CI} = [-0.708,~3.008]


### Temporal clustering

In [43]:
print('Temporal clustering for semantic vs. feature-rich (early lists):')
ttest(merged_results['fingerprint']['semantic'], results['fingerprint']['feature-rich'], x_lists='Early', y_lists='Early', x_col='temporal', y_col='temporal');

print('\nTemporal clustering for lexicographic vs. feature-rich (early lists):')
ttest(merged_results['fingerprint']['lexicographic'], results['fingerprint']['feature-rich'], x_lists='Early', y_lists='Early', x_col='temporal', y_col='temporal');

print('\nTemporal clustering for visual vs. feature-rich (early lists):')
ttest(merged_results['fingerprint']['visual'], results['fingerprint']['feature-rich'], x_lists='Early', y_lists='Early', x_col='temporal', y_col='temporal');

Temporal clustering for semantic vs. feature-rich (early lists):
t(125) = 6.474,~p < 0.001,~d = 1.151,~\mathrm{CI} = [4.383,~8.915]

Temporal clustering for lexicographic vs. feature-rich (early lists):
t(125) = 0.786,~p = 0.433,~d = 0.140,~\mathrm{CI} = [-1.188,~2.501]

Temporal clustering for visual vs. feature-rich (early lists):
t(126) = 0.254,~p = 0.800,~d = 0.045,~\mathrm{CI} = [-1.863,~2.210]


### Feature based clustering

In [44]:
semantic_vs_feature_rich_feature_early = TTestTable()
lexicographic_vs_feature_rich_feature_early = TTestTable()
visual_vs_feature_rich_feature_early = TTestTable()

conds = ['semantic', 'lexicographic', 'visual']
features = ['category', 'size', 'wordLength', 'firstLetter', 'color', 'location']

for i, c in enumerate(conds):
    for j, f in enumerate(features):
        print(f'{f} clustering for {c} vs. feature-rich (early lists):')

        if c == 'semantic':
            table = semantic_vs_feature_rich_feature_early
        elif c == 'lexicographic':
            table = lexicographic_vs_feature_rich_feature_early
        elif c == 'visual':
            table = visual_vs_feature_rich_feature_early

        table.add_stat(f, ttest(merged_results['fingerprint'][c], results['fingerprint']['feature-rich'], x_col=f, y_col=f, x_lists='Early', y_lists='Early'))

        if (j < len(features) - 1) or (i < len(conds) - 1):
            print('\n')
    
    if i < len(conds) - 1:
        print('--- \n')

semantic_vs_feature_rich_feature_early.to_latex(table_dir.joinpath('semantic_vs_feature_rich_feature_early.tex'), alpha=0.05, method='fdr_bh')
lexicographic_vs_feature_rich_feature_early.to_latex(table_dir.joinpath('lexicographic_vs_feature_rich_feature_early.tex'), alpha=0.05, method='fdr_bh')
visual_vs_feature_rich_feature_early.to_latex(table_dir.joinpath('visual_vs_feature_rich_feature_early.tex'), alpha=0.05, method='fdr_bh')

category clustering for semantic vs. feature-rich (early lists):
t(125) = 2.722,~p = 0.007,~d = 0.484,~\mathrm{CI} = [0.827,~4.932]


size clustering for semantic vs. feature-rich (early lists):
t(125) = 3.866,~p < 0.001,~d = 0.687,~\mathrm{CI} = [2.020,~5.983]


wordLength clustering for semantic vs. feature-rich (early lists):
t(125) = 0.521,~p = 0.603,~d = 0.093,~\mathrm{CI} = [-1.311,~2.333]


firstLetter clustering for semantic vs. feature-rich (early lists):
t(125) = -0.842,~p = 0.401,~d = -0.150,~\mathrm{CI} = [-2.825,~1.095]


color clustering for semantic vs. feature-rich (early lists):
t(125) = -0.650,~p = 0.517,~d = -0.116,~\mathrm{CI} = [-2.680,~1.249]


location clustering for semantic vs. feature-rich (early lists):
t(125) = -0.251,~p = 0.802,~d = -0.045,~\mathrm{CI} = [-2.257,~1.524]


--- 

category clustering for lexicographic vs. feature-rich (early lists):
t(125) = -1.040,~p = 0.301,~d = -0.185,~\mathrm{CI} = [-3.095,~1.092]


size clustering for lexicographic vs. fe

## Compare each pair of (category of) order manipulation conditions (semantic, lexicographic, and visual)

### Accuracy

In [45]:
order_accuracy_comparisons_early = TTestTable()
order_accuracy_comparisons_late = TTestTable()


for i, c1 in enumerate(conds):
    for c2 in conds[i+1:]:
        print(f'Accuracy for {c1} vs. {c2} (early lists):')
        order_accuracy_comparisons_early.add_stat(f'{c1} vs. {c2}', ttest(merged_results['accuracy'][c1], merged_results['accuracy'][c2], x_lists='Early', y_lists='Early'))

        print(f'\nAccuracy for {c1} vs. {c2} (late lists):')
        order_accuracy_comparisons_late.add_stat(f'{c1} vs. {c2}', ttest(merged_results['accuracy'][c1], merged_results['accuracy'][c2], x_lists='Late', y_lists='Late'))

        print('\n\n')

order_accuracy_comparisons_early.to_latex(table_dir.joinpath('order_accuracy_comparisons_early.tex'), alpha=0.05, method='fdr_bh')
order_accuracy_comparisons_late.to_latex(table_dir.joinpath('order_accuracy_comparisons_late.tex'), alpha=0.05, method='fdr_bh')

Accuracy for semantic vs. lexicographic (early lists):
t(118) = 1.936,~p = 0.055,~d = 0.353,~\mathrm{CI} = [0.057,~3.916]

Accuracy for semantic vs. lexicographic (late lists):
t(118) = -0.388,~p = 0.699,~d = -0.071,~\mathrm{CI} = [-2.361,~1.496]



Accuracy for semantic vs. visual (early lists):
t(119) = 0.113,~p = 0.910,~d = 0.021,~\mathrm{CI} = [-1.987,~2.097]

Accuracy for semantic vs. visual (late lists):
t(119) = -0.833,~p = 0.407,~d = -0.151,~\mathrm{CI} = [-2.917,~1.071]



Accuracy for lexicographic vs. visual (early lists):
t(119) = -2.145,~p = 0.034,~d = -0.390,~\mathrm{CI} = [-4.254,~-0.208]

Accuracy for lexicographic vs. visual (late lists):
t(119) = -0.352,~p = 0.726,~d = -0.064,~\mathrm{CI} = [-2.156,~1.565]





### Temporal clustering

In [46]:
order_temporal_comparisons_early = TTestTable()
order_temporal_comparisons_late = TTestTable()

for i, c1 in enumerate(conds):
    for c2 in conds[i+1:]:
        print(f'Temporal clustering for {c1} vs. {c2} (early lists):')
        order_temporal_comparisons_early.add_stat(f'{c1} vs. {c2}', ttest(merged_results['fingerprint'][c1], merged_results['fingerprint'][c2], x_lists='Early', y_lists='Early', x_col='temporal', y_col='temporal'))

        print(f'\nTemporal clustering for {c1} vs. {c2} (late lists):')
        order_temporal_comparisons_late.add_stat(f'{c1} vs. {c2}', ttest(merged_results['fingerprint'][c1], merged_results['fingerprint'][c2], x_lists='Late', y_lists='Late', x_col='temporal', y_col='temporal'))

        print('\n')

order_temporal_comparisons_early.to_latex(table_dir.joinpath('order_temporal_comparisons_early.tex'), alpha=0.05, method='fdr_bh')
order_temporal_comparisons_late.to_latex(table_dir.joinpath('order_temporal_comparisons_late.tex'), alpha=0.05, method='fdr_bh')

Temporal clustering for semantic vs. lexicographic (early lists):
t(118) = 5.620,~p < 0.001,~d = 1.026,~\mathrm{CI} = [3.486,~8.010]

Temporal clustering for semantic vs. lexicographic (late lists):
t(118) = -0.758,~p = 0.450,~d = -0.138,~\mathrm{CI} = [-2.886,~1.145]


Temporal clustering for semantic vs. visual (early lists):
t(119) = 6.613,~p < 0.001,~d = 1.202,~\mathrm{CI} = [4.481,~9.464]

Temporal clustering for semantic vs. visual (late lists):
t(119) = -0.322,~p = 0.748,~d = -0.059,~\mathrm{CI} = [-2.354,~1.514]


Temporal clustering for lexicographic vs. visual (early lists):
t(119) = 0.589,~p = 0.557,~d = 0.107,~\mathrm{CI} = [-1.336,~2.539]

Temporal clustering for lexicographic vs. visual (late lists):
t(119) = 0.562,~p = 0.575,~d = 0.102,~\mathrm{CI} = [-1.561,~2.345]




### Feature-based clustering

In [47]:
feature_comparisons_early = TTestTable()
feature_comparisons_late = TTestTable()

for i, c1 in enumerate(conds):

    for c2 in conds[i+1:]:
        for f in features:
            print(f'{f} clustering for {c1} vs. {c2} (early lists):')
            feature_comparisons_early.add_stat(f'{f}: {c1} vs. {c2}', ttest(merged_results['fingerprint'][c1], merged_results['fingerprint'][c2], x_col=f, y_col=f, x_lists='Early', y_lists='Early'))

            print(f'\n{f} clustering for {c1} vs. {c2} (late lists):')
            feature_comparisons_late.add_stat(f'{f}: {c1} vs. {c2}', ttest(merged_results['fingerprint'][c1], merged_results['fingerprint'][c2], x_col=f, y_col=f, x_lists='Late', y_lists='Late'))

            print('\n')

feature_comparisons_early.to_latex(table_dir.joinpath('feature_comparisons_early.tex'), alpha=0.05, method='fdr_bh')
feature_comparisons_late.to_latex(table_dir.joinpath('feature_comparisons_late.tex'), alpha=0.05, method='fdr_bh')

category clustering for semantic vs. lexicographic (early lists):
t(118) = 3.667,~p < 0.001,~d = 0.670,~\mathrm{CI} = [1.822,~5.942]

category clustering for semantic vs. lexicographic (late lists):
t(118) = -0.720,~p = 0.473,~d = -0.131,~\mathrm{CI} = [-2.670,~1.253]


size clustering for semantic vs. lexicographic (early lists):
t(118) = 4.043,~p < 0.001,~d = 0.738,~\mathrm{CI} = [2.145,~6.296]

size clustering for semantic vs. lexicographic (late lists):
t(118) = -1.897,~p = 0.060,~d = -0.346,~\mathrm{CI} = [-4.039,~0.041]


wordLength clustering for semantic vs. lexicographic (early lists):
t(118) = -3.390,~p < 0.001,~d = -0.619,~\mathrm{CI} = [-5.661,~-1.499]

wordLength clustering for semantic vs. lexicographic (late lists):
t(118) = 1.153,~p = 0.251,~d = 0.211,~\mathrm{CI} = [-0.662,~3.149]


firstLetter clustering for semantic vs. lexicographic (early lists):
t(118) = -5.705,~p < 0.001,~d = -1.042,~\mathrm{CI} = [-7.790,~-3.841]

firstLetter clustering for semantic vs. lexicogr

# When *early* lists are sorted by a given feature, how is memory performance on *late* lists affected (relative to *feature-rich* late lists)

### Accuracy

In [48]:
carryover_accuracy = TTestTable()

print('Accuracy for semantic vs. feature-rich (late lists):')
carryover_accuracy.add_stat('Semantic', ttest(merged_results['accuracy']['semantic'], results['accuracy']['feature-rich'], x_lists='Late', y_lists='Late'))

print('\nAccuracy for lexicographic vs. feature-rich (late lists):')
carryover_accuracy.add_stat('Lexicographic', ttest(merged_results['accuracy']['lexicographic'], results['accuracy']['feature-rich'], x_lists='Late', y_lists='Late'))

print('\nAccuracy for visual vs. feature-rich (late lists):')
carryover_accuracy.add_stat('Visual', ttest(merged_results['accuracy']['visual'], results['accuracy']['feature-rich'], x_lists='Late', y_lists='Late'))

carryover_accuracy.to_latex(table_dir.joinpath('carryover_accuracy.tex'), alpha=0.05, method='fdr_bh')

Accuracy for semantic vs. feature-rich (late lists):
t(125) = 0.487,~p = 0.627,~d = 0.087,~\mathrm{CI} = [-1.661,~2.323]

Accuracy for lexicographic vs. feature-rich (late lists):
t(125) = 0.878,~p = 0.382,~d = 0.156,~\mathrm{CI} = [-1.226,~3.044]

Accuracy for visual vs. feature-rich (late lists):
t(126) = 1.437,~p = 0.153,~d = 0.254,~\mathrm{CI} = [-0.447,~3.519]


### Temporal clustering

In [49]:
carryover_temporal_clustering = TTestTable()

print('Temporal clustering for semantic vs. feature-rich (late lists):')
carryover_temporal_clustering.add_stat('Semantic', ttest(merged_results['fingerprint']['semantic'], results['fingerprint']['feature-rich'], x_lists='Late', y_lists='Late', x_col='temporal', y_col='temporal'))

print('\nTemporal clustering for lexicographic vs. feature-rich (late lists):')
carryover_temporal_clustering.add_stat('Lexicographic', ttest(merged_results['fingerprint']['lexicographic'], results['fingerprint']['feature-rich'], x_lists='Late', y_lists='Late', x_col='temporal', y_col='temporal'))

print('\nTemporal clustering for visual vs. feature-rich (late lists):')
carryover_temporal_clustering.add_stat('Visual', ttest(merged_results['fingerprint']['visual'], results['fingerprint']['feature-rich'], x_lists='Late', y_lists='Late', x_col='temporal', y_col='temporal'))

carryover_temporal_clustering.to_latex(table_dir.joinpath('carryover_temporal_clustering.tex'), alpha=0.05, method='fdr_bh')

Temporal clustering for semantic vs. feature-rich (late lists):
t(125) = 0.157,~p = 0.875,~d = 0.028,~\mathrm{CI} = [-1.859,~1.974]

Temporal clustering for lexicographic vs. feature-rich (late lists):
t(125) = 0.998,~p = 0.320,~d = 0.177,~\mathrm{CI} = [-0.902,~2.920]

Temporal clustering for visual vs. feature-rich (late lists):
t(126) = 0.548,~p = 0.585,~d = 0.097,~\mathrm{CI} = [-1.450,~2.365]


## Feature-based clustering

In [50]:
semantic_carryover_feature_clustering = TTestTable()
lexicographic_carryover_feature_clustering = TTestTable()
visual_carryover_feature_clustering = TTestTable()

conds = ['semantic', 'lexicographic', 'visual']
features = ['category', 'size', 'wordLength', 'firstLetter', 'color', 'location']

for i, c in enumerate(conds):
    if c == 'semantic':
        table = semantic_carryover_feature_clustering
    elif c == 'lexicographic':
        table = lexicographic_carryover_feature_clustering
    elif c == 'visual':
        table = visual_carryover_feature_clustering

    for j, f in enumerate(features):
        print(f'{f} clustering for {c} vs. feature-rich (late lists):')
        table.add_stat(f, ttest(merged_results['fingerprint'][c], results['fingerprint']['feature-rich'], x_col=f, y_col=f, x_lists='Late', y_lists='Late'))

        if (j < len(features) - 1) or (i < len(conds) - 1):
            print('\n')
    
    if i < len(conds) - 1:
        print('--- \n')

semantic_carryover_feature_clustering.to_latex(table_dir.joinpath('semantic_carryover_feature_clustering.tex'), alpha=0.05, method='fdr_bh')
lexicographic_carryover_feature_clustering.to_latex(table_dir.joinpath('lexicographic_carryover_feature_clustering.tex'), alpha=0.05, method='fdr_bh')
visual_carryover_feature_clustering.to_latex(table_dir.joinpath('visual_carryover_feature_clustering.tex'), alpha=0.05, method='fdr_bh')

category clustering for semantic vs. feature-rich (late lists):
t(125) = -0.041,~p = 0.967,~d = -0.007,~\mathrm{CI} = [-2.088,~1.861]


size clustering for semantic vs. feature-rich (late lists):
t(125) = -0.989,~p = 0.324,~d = -0.176,~\mathrm{CI} = [-3.100,~0.948]


wordLength clustering for semantic vs. feature-rich (late lists):
t(125) = -0.045,~p = 0.964,~d = -0.008,~\mathrm{CI} = [-1.959,~1.870]


firstLetter clustering for semantic vs. feature-rich (late lists):
t(125) = -0.369,~p = 0.713,~d = -0.066,~\mathrm{CI} = [-2.338,~1.630]


color clustering for semantic vs. feature-rich (late lists):
t(125) = -0.602,~p = 0.548,~d = -0.107,~\mathrm{CI} = [-2.541,~1.273]


location clustering for semantic vs. feature-rich (late lists):
t(125) = -0.521,~p = 0.603,~d = -0.093,~\mathrm{CI} = [-2.592,~1.565]


--- 

category clustering for lexicographic vs. feature-rich (late lists):
t(125) = 0.678,~p = 0.499,~d = 0.121,~\mathrm{CI} = [-1.240,~2.608]


size clustering for lexicographic vs. fea

# Individual difference feature clustering analyses, part 1

Compute the correlations (across participants) between feature clustering, recall probability, and temporal clustering for early and late lists.  As a summary, also compute the correlations (across conditions) between the per-condition averages.  Note: for each feature clustering score, consider only the condition of interest-- e.g., for the category condition consider category clustering, for the length condition consider length clustering, and so on.

Start by creating a dataframe that combines across all of the order manipulation conditions:
  - index: subject, list group --- but rename subjects so they're unique across conditions
  - columns:
    - feature clustering score: pick out the appropriate element of that list/subject's fingerprint, based on the current condition
    - temporal clustering score
    - recall probability
    - condition

In [51]:
clustering_results = create_clustering_df(results)
clustering_results

Unnamed: 0,Subject,List,Condition,Feature clustering score,Corrected feature clustering score,Temporal clustering score,Recall probability
0,0,Early,Feature-rich,0.495229,0.485292,0.562750,0.484375
1,1,Early,Feature-rich,0.559542,0.582875,0.486750,0.859375
2,2,Early,Feature-rich,0.538500,0.532000,0.660500,0.625000
3,3,Early,Feature-rich,0.519375,0.474292,0.596500,0.531250
4,4,Early,Feature-rich,0.566917,0.565833,0.680000,0.546875
...,...,...,...,...,...,...,...
491,243,Late,Location,0.407500,0.388000,0.698875,0.484375
492,244,Late,Location,0.553750,0.503000,0.603500,0.671875
493,245,Late,Location,0.598250,0.563750,0.768250,0.609375
494,246,Late,Location,0.405000,0.428750,0.683750,0.539062


In [52]:
def corr_helper(clustering_results, x='Feature clustering score', y='Recall probability', xlists=None, ylists=None):
    clustering_results = clustering_results.query('Condition != "feature-rich"')

    def print_corr(a, b, label=None, n_iter=1000):
        corr = stats.pearsonr(a, b)

        # compute bootstrap-estimated 95% confidence interval
        bootstrapped = []        
        max_tries = 10
        tries = 0
        for i in range(n_iter):
            inds = np.random.randint(0, len(a), len(a))
            while len(np.unique(a.iloc[inds])) == 1 and len(np.unique(b.iloc[inds])) == 1 and tries < max_tries:
                inds = np.random.randint(0, len(a), len(a))
                tries += 1
            if tries >= max_tries:
                pass
            tries = 0
            bootstrapped.append(stats.pearsonr(a.iloc[inds], b.iloc[inds])[0])
        low, high = np.percentile(bootstrapped, [2.5, 97.5])

        if label is None:
            prefix = ''
        else:
            prefix = label + ': '

        if corr.pvalue < 0.001:
            p_string = 'p < 0.001'
        else:
            p_string = f'p = {corr.pvalue:.3f}'

        print(f'\t{prefix}: r({len(a) - 2}) = {corr.statistic:.3f},~{p_string},~' + '\mathrm{CI}' + f' = [{low:.3f},~{high:.3f}]')
        return corr

    if xlists is not None:
        x_results = clustering_results.query('List == @xlists')
    else:
        x_results = clustering_results
    
    if ylists is not None:
        y_results = clustering_results.query('List == @ylists')
    else:
        y_results = clustering_results
    
    print(f'Correlations between {x} and {y} (x lists: {xlists}, y lists: {ylists})')
    # Combine across all conditions, compute correlation across subjects
    print_corr(x_results[x], y_results[y], label='Combined (across subjects)')

    # Per-condition (across subjects)
    print('\n')
    min_r = np.inf
    max_p = -np.inf

    for c in clustering_results['Condition'].unique():
        corr = print_corr(x_results.query('Condition == @c')[x], y_results.query('Condition == @c')[y], label=f'{c} (across subjects)')
        min_r = min(min_r, corr.statistic)
        max_p = max(max_p, corr.pvalue)
    print(f'\tWithin condition, across subjects: all $r$s $\geq {min_r:.3f}$, all $p$s $\leq {max_p:.3f}$')

    # Across-condition correlation
    print('\n')
    x_ave = x_results.groupby('Condition').mean(numeric_only=True)[x]
    y_ave = y_results.groupby('Condition').mean(numeric_only=True)[y]
    print_corr(x_ave, y_ave, label='Across conditions')

## Recall probability vs. feature clustering

### Early vs. early

In [53]:
corr_helper(clustering_results, x='Feature clustering score', y='Recall probability', xlists='Early', ylists='Early')

Correlations between Feature clustering score and Recall probability (x lists: Early, y lists: Early)
	Combined (across subjects): : r(246) = 0.495,~p < 0.001,~\mathrm{CI} = [0.378,~0.601]


	Feature-rich (across subjects): : r(65) = 0.758,~p < 0.001,~\mathrm{CI} = [0.540,~0.868]
	Category (across subjects): : r(28) = 0.862,~p < 0.001,~\mathrm{CI} = [0.774,~0.921]
	Size (across subjects): : r(28) = 0.897,~p < 0.001,~\mathrm{CI} = [0.807,~0.947]
	Length (across subjects): : r(28) = 0.481,~p = 0.007,~\mathrm{CI} = [0.051,~0.714]
	First letter (across subjects): : r(28) = 0.414,~p = 0.023,~\mathrm{CI} = [0.017,~0.745]
	Color (across subjects): : r(29) = 0.331,~p = 0.069,~\mathrm{CI} = [-0.043,~0.628]
	Location (across subjects): : r(28) = 0.360,~p = 0.051,~\mathrm{CI} = [0.055,~0.642]
	Within condition, across subjects: all $r$s $\geq 0.331$, all $p$s $\leq 0.069$


	Across conditions: : r(5) = 0.512,~p = 0.240,~\mathrm{CI} = [-0.915,~0.965]


### Late vs. late

In [54]:
corr_helper(clustering_results, x='Feature clustering score', y='Recall probability', xlists='Late', ylists='Late')

Correlations between Feature clustering score and Recall probability (x lists: Late, y lists: Late)
	Combined (across subjects): : r(246) = 0.470,~p < 0.001,~\mathrm{CI} = [0.348,~0.581]


	Feature-rich (across subjects): : r(65) = 0.825,~p < 0.001,~\mathrm{CI} = [0.717,~0.903]
	Category (across subjects): : r(28) = 0.662,~p < 0.001,~\mathrm{CI} = [0.361,~0.887]
	Size (across subjects): : r(28) = 0.744,~p < 0.001,~\mathrm{CI} = [0.470,~0.912]
	Length (across subjects): : r(28) = 0.520,~p = 0.003,~\mathrm{CI} = [0.238,~0.741]
	First letter (across subjects): : r(28) = 0.404,~p = 0.027,~\mathrm{CI} = [-0.014,~0.735]
	Color (across subjects): : r(29) = 0.532,~p = 0.002,~\mathrm{CI} = [0.256,~0.737]
	Location (across subjects): : r(28) = 0.419,~p = 0.021,~\mathrm{CI} = [0.065,~0.655]
	Within condition, across subjects: all $r$s $\geq 0.404$, all $p$s $\leq 0.027$


	Across conditions: : r(5) = -0.292,~p = 0.526,~\mathrm{CI} = [-0.809,~0.591]


### Late vs. Early

In [55]:
corr_helper(clustering_results, x='Feature clustering score', y='Recall probability', xlists='Early', ylists='Late')

Correlations between Feature clustering score and Recall probability (x lists: Early, y lists: Late)
	Combined (across subjects): : r(246) = 0.283,~p < 0.001,~\mathrm{CI} = [0.145,~0.417]


	Feature-rich (across subjects): : r(65) = 0.591,~p < 0.001,~\mathrm{CI} = [0.275,~0.768]
	Category (across subjects): : r(28) = 0.474,~p = 0.008,~\mathrm{CI} = [0.197,~0.698]
	Size (across subjects): : r(28) = 0.574,~p < 0.001,~\mathrm{CI} = [0.319,~0.746]
	Length (across subjects): : r(28) = 0.405,~p = 0.027,~\mathrm{CI} = [0.102,~0.615]
	First letter (across subjects): : r(28) = 0.385,~p = 0.035,~\mathrm{CI} = [-0.057,~0.759]
	Color (across subjects): : r(29) = 0.212,~p = 0.251,~\mathrm{CI} = [-0.194,~0.510]
	Location (across subjects): : r(28) = 0.320,~p = 0.085,~\mathrm{CI} = [0.017,~0.591]
	Within condition, across subjects: all $r$s $\geq 0.212$, all $p$s $\leq 0.251$


	Across conditions: : r(5) = -0.145,~p = 0.757,~\mathrm{CI} = [-0.898,~0.841]


### Early vs. Late

In [56]:
corr_helper(clustering_results, x='Feature clustering score', y='Recall probability', xlists='Late', ylists='Early')

Correlations between Feature clustering score and Recall probability (x lists: Late, y lists: Early)
	Combined (across subjects): : r(246) = 0.495,~p < 0.001,~\mathrm{CI} = [0.359,~0.601]


	Feature-rich (across subjects): : r(65) = 0.693,~p < 0.001,~\mathrm{CI} = [0.436,~0.837]
	Category (across subjects): : r(28) = 0.687,~p < 0.001,~\mathrm{CI} = [0.475,~0.821]
	Size (across subjects): : r(28) = 0.561,~p = 0.001,~\mathrm{CI} = [0.304,~0.770]
	Length (across subjects): : r(28) = 0.438,~p = 0.015,~\mathrm{CI} = [0.051,~0.731]
	First letter (across subjects): : r(28) = 0.377,~p = 0.040,~\mathrm{CI} = [0.031,~0.653]
	Color (across subjects): : r(29) = 0.431,~p = 0.016,~\mathrm{CI} = [0.108,~0.656]
	Location (across subjects): : r(28) = 0.395,~p = 0.031,~\mathrm{CI} = [0.126,~0.649]
	Within condition, across subjects: all $r$s $\geq 0.377$, all $p$s $\leq 0.040$


	Across conditions: : r(5) = 0.445,~p = 0.317,~\mathrm{CI} = [-0.740,~0.986]


## Temporal clustering vs. feature clustering

### Early vs. early

In [57]:
corr_helper(clustering_results, x='Feature clustering score', y='Temporal clustering score', xlists='Early', ylists='Early')

Correlations between Feature clustering score and Temporal clustering score (x lists: Early, y lists: Early)
	Combined (across subjects): : r(246) = 0.838,~p < 0.001,~\mathrm{CI} = [0.777,~0.885]


	Feature-rich (across subjects): : r(65) = 0.538,~p < 0.001,~\mathrm{CI} = [0.087,~0.751]
	Category (across subjects): : r(28) = 0.965,~p < 0.001,~\mathrm{CI} = [0.912,~0.993]
	Size (across subjects): : r(28) = 0.926,~p < 0.001,~\mathrm{CI} = [0.869,~0.963]
	Length (across subjects): : r(28) = 0.945,~p < 0.001,~\mathrm{CI} = [0.886,~0.977]
	First letter (across subjects): : r(28) = 0.855,~p < 0.001,~\mathrm{CI} = [0.727,~0.929]
	Color (across subjects): : r(29) = 0.817,~p < 0.001,~\mathrm{CI} = [0.665,~0.901]
	Location (across subjects): : r(28) = 0.883,~p < 0.001,~\mathrm{CI} = [0.783,~0.947]
	Within condition, across subjects: all $r$s $\geq 0.538$, all $p$s $\leq 0.000$


	Across conditions: : r(5) = 0.941,~p = 0.002,~\mathrm{CI} = [0.643,~0.995]


### Late vs. late

In [58]:
corr_helper(clustering_results, x='Feature clustering score', y='Temporal clustering score', xlists='Late', ylists='Late')

Correlations between Feature clustering score and Temporal clustering score (x lists: Late, y lists: Late)
	Combined (across subjects): : r(246) = 0.302,~p < 0.001,~\mathrm{CI} = [0.169,~0.424]


	Feature-rich (across subjects): : r(65) = 0.467,~p < 0.001,~\mathrm{CI} = [0.027,~0.729]
	Category (across subjects): : r(28) = 0.293,~p = 0.116,~\mathrm{CI} = [-0.082,~0.588]
	Size (across subjects): : r(28) = 0.307,~p = 0.099,~\mathrm{CI} = [-0.107,~0.663]
	Length (across subjects): : r(28) = 0.353,~p = 0.056,~\mathrm{CI} = [-0.009,~0.687]
	First letter (across subjects): : r(28) = 0.660,~p < 0.001,~\mathrm{CI} = [0.437,~0.817]
	Color (across subjects): : r(29) = 0.333,~p = 0.068,~\mathrm{CI} = [-0.061,~0.669]
	Location (across subjects): : r(28) = 0.235,~p = 0.212,~\mathrm{CI} = [-0.142,~0.523]
	Within condition, across subjects: all $r$s $\geq 0.235$, all $p$s $\leq 0.212$


	Across conditions: : r(5) = -0.192,~p = 0.681,~\mathrm{CI} = [-0.833,~0.657]


### Late vs. early

In [59]:
corr_helper(clustering_results, x='Feature clustering score', y='Temporal clustering score', xlists='Early', ylists='Late')

Correlations between Feature clustering score and Temporal clustering score (x lists: Early, y lists: Late)
	Combined (across subjects): : r(246) = 0.269,~p < 0.001,~\mathrm{CI} = [0.114,~0.405]


	Feature-rich (across subjects): : r(65) = 0.358,~p = 0.003,~\mathrm{CI} = [-0.195,~0.649]
	Category (across subjects): : r(28) = 0.298,~p = 0.110,~\mathrm{CI} = [0.022,~0.521]
	Size (across subjects): : r(28) = 0.314,~p = 0.091,~\mathrm{CI} = [0.001,~0.562]
	Length (across subjects): : r(28) = 0.535,~p = 0.002,~\mathrm{CI} = [0.238,~0.768]
	First letter (across subjects): : r(28) = 0.443,~p = 0.014,~\mathrm{CI} = [0.057,~0.740]
	Color (across subjects): : r(29) = 0.491,~p = 0.005,~\mathrm{CI} = [0.050,~0.726]
	Location (across subjects): : r(28) = 0.355,~p = 0.054,~\mathrm{CI} = [0.042,~0.612]
	Within condition, across subjects: all $r$s $\geq 0.298$, all $p$s $\leq 0.110$


	Across conditions: : r(5) = 0.154,~p = 0.742,~\mathrm{CI} = [-0.506,~0.934]


### Early vs. late

In [60]:
corr_helper(clustering_results, x='Feature clustering score', y='Temporal clustering score', xlists='Late', ylists='Early')

Correlations between Feature clustering score and Temporal clustering score (x lists: Late, y lists: Early)
	Combined (across subjects): : r(246) = 0.493,~p < 0.001,~\mathrm{CI} = [0.373,~0.600]


	Feature-rich (across subjects): : r(65) = 0.336,~p = 0.005,~\mathrm{CI} = [-0.159,~0.659]
	Category (across subjects): : r(28) = 0.613,~p < 0.001,~\mathrm{CI} = [0.382,~0.773]
	Size (across subjects): : r(28) = 0.392,~p = 0.032,~\mathrm{CI} = [0.055,~0.655]
	Length (across subjects): : r(28) = 0.271,~p = 0.148,~\mathrm{CI} = [-0.093,~0.570]
	First letter (across subjects): : r(28) = 0.345,~p = 0.062,~\mathrm{CI} = [-0.050,~0.669]
	Color (across subjects): : r(29) = 0.005,~p = 0.980,~\mathrm{CI} = [-0.417,~0.415]
	Location (across subjects): : r(28) = 0.240,~p = 0.201,~\mathrm{CI} = [-0.087,~0.511]
	Within condition, across subjects: all $r$s $\geq 0.005$, all $p$s $\leq 0.980$


	Across conditions: : r(5) = 0.823,~p = 0.023,~\mathrm{CI} = [-0.084,~0.985]


## Feature clustering on early vs. late lists

In [61]:
corr_helper(clustering_results, x='Feature clustering score', y='Feature clustering score', xlists='Late', ylists='Early')

Correlations between Feature clustering score and Feature clustering score (x lists: Late, y lists: Early)
	Combined (across subjects): : r(246) = 0.551,~p < 0.001,~\mathrm{CI} = [0.431,~0.648]


	Feature-rich (across subjects): : r(65) = 0.706,~p < 0.001,~\mathrm{CI} = [0.261,~0.872]
	Category (across subjects): : r(28) = 0.590,~p < 0.001,~\mathrm{CI} = [0.366,~0.754]
	Size (across subjects): : r(28) = 0.488,~p = 0.006,~\mathrm{CI} = [0.156,~0.723]
	Length (across subjects): : r(28) = 0.384,~p = 0.036,~\mathrm{CI} = [0.032,~0.667]
	First letter (across subjects): : r(28) = 0.202,~p = 0.284,~\mathrm{CI} = [-0.252,~0.660]
	Color (across subjects): : r(29) = -0.183,~p = 0.325,~\mathrm{CI} = [-0.560,~0.253]
	Location (across subjects): : r(28) = 0.031,~p = 0.870,~\mathrm{CI} = [-0.235,~0.301]
	Within condition, across subjects: all $r$s $\geq -0.183$, all $p$s $\leq 0.870$


	Across conditions: : r(5) = 0.861,~p = 0.013,~\mathrm{CI} = [-0.404,~0.998]


# Difference analyses (early - late lists)

In [62]:
clustering_result_diffs = clustering_results.query('List == "Early"').copy()
clustering_result_diffs['List'] = 'Early - Late'
clustering_result_diffs['Feature clustering score'] = clustering_result_diffs['Feature clustering score'].values - clustering_results.query('List == "Late"')['Feature clustering score'].values
clustering_result_diffs['Temporal clustering score'] = clustering_result_diffs['Temporal clustering score'].values - clustering_results.query('List == "Late"')['Temporal clustering score'].values
clustering_result_diffs['Recall probability'] = clustering_result_diffs['Recall probability'].values - clustering_results.query('List == "Late"')['Recall probability'].values
clustering_result_diffs

Unnamed: 0,Subject,List,Condition,Feature clustering score,Corrected feature clustering score,Temporal clustering score,Recall probability
0,0,Early - Late,Feature-rich,-0.050729,0.485292,-0.045000,0.039062
1,1,Early - Late,Feature-rich,-0.058125,0.582875,-0.039750,-0.031250
2,2,Early - Late,Feature-rich,0.092667,0.532000,0.259625,0.148438
3,3,Early - Late,Feature-rich,0.070625,0.474292,-0.019375,0.125000
4,4,Early - Late,Feature-rich,0.089312,0.565833,0.194500,0.101562
...,...,...,...,...,...,...,...
461,243,Early - Late,Location,0.272875,0.397500,0.011625,-0.031250
462,244,Early - Late,Location,0.229750,0.527500,0.248500,-0.046875
463,245,Early - Late,Location,-0.001750,0.320750,-0.017250,-0.062500
464,246,Early - Late,Location,0.436625,0.647750,0.162500,0.109375


## Recall probability versus feature clustering (differences)

In [63]:
corr_helper(clustering_result_diffs, x='Feature clustering score', y='Recall probability', xlists='Early - Late', ylists='Early - Late')

Correlations between Feature clustering score and Recall probability (x lists: Early - Late, y lists: Early - Late)


	Combined (across subjects): : r(246) = 0.291,~p < 0.001,~\mathrm{CI} = [0.140,~0.434]


	Feature-rich (across subjects): : r(65) = 0.658,~p < 0.001,~\mathrm{CI} = [0.534,~0.784]
	Category (across subjects): : r(28) = 0.350,~p = 0.058,~\mathrm{CI} = [-0.007,~0.644]
	Size (across subjects): : r(28) = 0.708,~p < 0.001,~\mathrm{CI} = [0.429,~0.869]
	Length (across subjects): : r(28) = 0.205,~p = 0.276,~\mathrm{CI} = [-0.128,~0.489]
	First letter (across subjects): : r(28) = 0.081,~p = 0.672,~\mathrm{CI} = [-0.396,~0.605]
	Color (across subjects): : r(29) = 0.155,~p = 0.406,~\mathrm{CI} = [-0.153,~0.534]
	Location (across subjects): : r(28) = 0.052,~p = 0.787,~\mathrm{CI} = [-0.303,~0.381]
	Within condition, across subjects: all $r$s $\geq 0.052$, all $p$s $\leq 0.787$


	Across conditions: : r(5) = 0.337,~p = 0.459,~\mathrm{CI} = [-0.943,~0.912]


### Temporal clustering versus featuer clustering (differences)

In [64]:
corr_helper(clustering_result_diffs, x='Feature clustering score', y='Temporal clustering score', xlists='Early - Late', ylists='Early - Late')

Correlations between Feature clustering score and Temporal clustering score (x lists: Early - Late, y lists: Early - Late)
	Combined (across subjects): : r(246) = 0.436,~p < 0.001,~\mathrm{CI} = [0.319,~0.533]


	Feature-rich (across subjects): : r(65) = 0.427,~p < 0.001,~\mathrm{CI} = [0.188,~0.622]
	Category (across subjects): : r(28) = 0.110,~p = 0.564,~\mathrm{CI} = [-0.277,~0.439]
	Size (across subjects): : r(28) = 0.447,~p = 0.013,~\mathrm{CI} = [0.056,~0.725]
	Length (across subjects): : r(28) = 0.482,~p = 0.007,~\mathrm{CI} = [0.246,~0.707]
	First letter (across subjects): : r(28) = 0.584,~p < 0.001,~\mathrm{CI} = [0.255,~0.764]
	Color (across subjects): : r(29) = 0.406,~p = 0.023,~\mathrm{CI} = [0.078,~0.689]
	Location (across subjects): : r(28) = 0.498,~p = 0.005,~\mathrm{CI} = [0.282,~0.680]
	Within condition, across subjects: all $r$s $\geq 0.110$, all $p$s $\leq 0.564$


	Across conditions: : r(5) = 0.564,~p = 0.188,~\mathrm{CI} = [-0.483,~0.950]


# Adaptive condition

## Accuracy: comparing across each list type

### Stabilize vs. random

In [65]:
ttest(results['accuracy']['adaptive'], results['accuracy']['adaptive'], x_lists='stabilize', y_lists='random', independent_sample=False);

t(59) = 1.740,~p = 0.087,~d = 0.095,~\mathrm{CI} = [-0.184,~3.856]


### Destabilize vs. random

In [66]:
ttest(results['accuracy']['adaptive'], results['accuracy']['adaptive'], x_lists='destabilize', y_lists='random', independent_sample=False);

t(59) = -0.249,~p = 0.804,~d = -0.017,~\mathrm{CI} = [-2.327,~1.650]


### Stabilize vs. destabilize

In [67]:
ttest(results['accuracy']['adaptive'], results['accuracy']['adaptive'], x_lists='stabilize', y_lists='destabilize', independent_sample=False);

t(59) = 1.714,~p = 0.092,~d = 0.114,~\mathrm{CI} = [-0.057,~4.224]


## Temporal clustering: comparing across each list type

### Stabilize vs. random

In [68]:
ttest(results['fingerprint']['adaptive'], results['fingerprint']['adaptive'], x_lists='stabilize', y_lists='random', independent_sample=False, x_col='temporal', y_col='temporal');

t(59) = 3.428,~p = 0.001,~d = 0.306,~\mathrm{CI} = [1.623,~5.588]


### Destabilize vs. random

In [69]:
ttest(results['fingerprint']['adaptive'], results['fingerprint']['adaptive'], x_lists='destabilize', y_lists='random', independent_sample=False, x_col='temporal', y_col='temporal');

t(59) = -0.880,~p = 0.382,~d = -0.081,~\mathrm{CI} = [-3.157,~1.057]


### Stabilize vs. destabilize

In [70]:
ttest(results['fingerprint']['adaptive'], results['fingerprint']['adaptive'], x_lists='stabilize', y_lists='destabilize', independent_sample=False, x_col='temporal', y_col='temporal');

t(59) = 4.174,~p < 0.001,~d = 0.374,~\mathrm{CI} = [2.048,~6.661]


## Correlations between accuracy and temporal clustering (adaptive condition)

In [71]:
df = results['accuracy']['adaptive'].data.reset_index().query('List not in ["init"]').rename({0: 'Recall probability'}, axis=1)
df['Temporal clustering score'] = results['fingerprint']['adaptive'].data.reset_index().query('List not in ["init"]')['temporal']
df.rename({'List': 'Condition'}, axis=1, inplace=True)
df['List'] = 'All'
df

Unnamed: 0,Subject,Condition,Recall probability,Temporal clustering score,List
0,0,destabilize,0.578125,0.47100,All
1,1,destabilize,0.250000,0.34775,All
2,2,destabilize,0.515625,0.51500,All
3,3,destabilize,0.625000,0.86400,All
4,4,destabilize,0.500000,0.46650,All
...,...,...,...,...,...
235,55,stabilize,0.406250,0.58750,All
236,56,stabilize,0.703125,0.81600,All
237,57,stabilize,0.343750,0.41925,All
238,58,stabilize,0.343750,0.56250,All


In [72]:
corr_helper(df, x='Temporal clustering score', y='Recall probability', xlists='All', ylists='All')

Correlations between Temporal clustering score and Recall probability (x lists: All, y lists: All)
	Combined (across subjects): : r(178) = 0.701,~p < 0.001,~\mathrm{CI} = [0.579,~0.792]


	destabilize (across subjects): : r(58) = 0.674,~p < 0.001,~\mathrm{CI} = [0.454,~0.814]
	random (across subjects): : r(58) = 0.651,~p < 0.001,~\mathrm{CI} = [0.369,~0.832]
	stabilize (across subjects): : r(58) = 0.784,~p < 0.001,~\mathrm{CI} = [0.616,~0.881]
	Within condition, across subjects: all $r$s $\geq 0.651$, all $p$s $\leq 0.000$


	Across conditions: : r(1) = 0.998,~p = 0.044,~\mathrm{CI} = [0.998,~1.000]


# Fingerprint stability analysis (feature-rich)

Each participant in the feature-rich condition studied and recalled a total of 16 lists, yielding 16 sets of “fingerprints” for that participant.  Below we asked: holding out one of these fingerprints at a time, could we “match up” which participant it belonged to?  Specifically, we created two distributions of correlations.  The first distribution comprised “within-participant” correlations between the fingerprint from a held-out list and the average fingerprint from all remaining lists.  Each participant contributes a total of 16 correlations to this distribution.  The second distribution comprise  “across-participant” correlations between one the fingerprint from one held-out list from one participant, and the average fingerprints (across all lists) for each other participant.  We repeat these across-participant comparisons for each pairing of lists (from the “template” participant) and other participants.  Therefore each participant contributes $16 \times (N - 1)$ correlations to this second distribution (one per list, times $N - 1$--- i.e., the number of participants excluding the template participant). 

In [73]:
# load in *per list* fingerprints for the feature-rich condition
from analyze import results_file, analyze_data

raw_results, _, _ = analyze_data(savefile=results_file)
fingerprints = raw_results['fingerprint']['feature-rich'].data.reset_index()

include = features
include.append('temporal')

within_corrs = []
across_corrs = []

# for each participant, for each list, compute the correlation between the fingerprint for that list and the average fingerprint for the other lists
for x in tqdm(fingerprints['Subject'].unique()):
    for y in fingerprints.query('Subject == @x')['List'].unique():
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            ref = fingerprints.query('Subject == @x and List == @y')[include].mean().values
            within = fingerprints.query('Subject == @x and List != @y')[include].mean().values
            within_corrs.append(stats.pearsonr(ref, within)[0])

            for z in fingerprints['Subject'].unique():
                if z == x:
                    continue
                across = fingerprints.query('Subject == @z')[include].mean().values
                across_corrs.append(stats.pearsonr(ref, across)[0])


100%|██████████| 67/67 [00:52<00:00,  1.27it/s]


In [74]:
def ttest_helper(a, b, independent=True, n_iter=1000, alpha=0.05, prefix=''):
    if independent:
        results = stats.ttest_ind(a, b)
        df = len(a) + len(b) - 2
    else:
        results = stats.ttest_rel(a, b)
        df = len(a) - 1
    t, p = results.statistic, results.pvalue
    
    # compute bootstrap-estimated 95% confidence interval
    bootstrapped = []
    n_iter = 1000
    for i in range(n_iter):
        if independent:
            bootstrapped.append(stats.ttest_ind(np.random.choice(a, len(a), replace=True), np.random.choice(b, len(b), replace=True)).statistic)
        else:
            inds = np.random.randint(0, len(a), len(b))
            bootstrapped.append(stats.ttest_rel(a[inds], b[inds]).statistic)
    
    low, high = np.percentile(bootstrapped, [alpha * 50, 100 - (alpha * 50)])

    if p < 0.001:
        p_string = 'p < 0.001'
    else:
        p_string = f'p = {p:.3f}'

    d = (np.mean(within_corrs) - np.mean(across_corrs)) / np.sqrt((np.std(within_corrs) ** 2 + np.std(across_corrs) ** 2) / 2)

    print(f'{prefix}t({df}) = {t:.3f},~{p_string},~d = {d:.3f},~' + '\mathrm{CI}' + f' = [{low:.3f},~{high:.3f}]')
    return t, df, d, p, low, high

In [75]:
within_corrs = np.array(within_corrs)
across_corrs = np.array(across_corrs)

# remove nans
within_corrs = within_corrs[~np.isnan(within_corrs)]
across_corrs = across_corrs[~np.isnan(across_corrs)]

print(f'Within list correlation: {np.mean(within_corrs):.3f} +/- {np.std(within_corrs):.3f}')
print(f'Across list correlation: {np.mean(across_corrs):.3f} +/- {np.std(across_corrs):.3f}')

ttest_helper(within_corrs, across_corrs, independent=True, prefix='Within vs. across list correlation: ');

Within list correlation: 0.483 +/- 0.357
Across list correlation: 0.424 +/- 0.374
Within vs. across list correlation: t(70280) = 5.077,~p < 0.001,~d = 0.162,~\mathrm{CI} = [3.160,~6.856]


# Clustering by feature rank

In [76]:
# sort each row in descending order and rename the columns to match the sort ranks

x = results_by_list['fingerprint']['feature-rich'].data
x = x.groupby('Subject').mean().apply(lambda x: x.sort_values(ascending=False, ignore_index=True), axis=1)
x.columns = [f'Rank {i}' for i in range(1, len(x.columns) + 1)]
x

Unnamed: 0_level_0,Rank 1,Rank 2,Rank 3,Rank 4,Rank 5,Rank 6,Rank 7
Subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.759750,0.620000,0.585250,0.497000,0.487000,0.384125,0.375688
1,0.993250,0.906000,0.506625,0.464875,0.418250,0.401500,0.347750
2,0.730625,0.705000,0.530687,0.470625,0.423937,0.339438,0.283375
3,0.606187,0.576000,0.550312,0.546312,0.501312,0.369000,0.361437
4,0.675000,0.587500,0.582750,0.518000,0.469062,0.443000,0.441000
...,...,...,...,...,...,...,...
62,0.623812,0.602125,0.564813,0.464250,0.460813,0.363875,0.335500
63,0.724562,0.577125,0.435937,0.416500,0.399125,0.366250,0.358438
64,0.885250,0.762375,0.569000,0.455875,0.438375,0.408375,0.390875
65,0.723187,0.501437,0.461313,0.449437,0.440250,0.431812,0.409250


In [77]:
rank_table = TTestTable()

for rank in x.columns:
    rank_table.add_stat(f'{rank}', ttest_helper(x[rank], 0.5 * np.ones_like(x[rank]), independent=False, prefix=f'{rank}: '))

rank_table.to_latex(table_dir.joinpath('rank_table.tex'), alpha=0.05, method='fdr_bh')

Rank 1: t(66) = 12.751,~p < 0.001,~d = 0.162,~\mathrm{CI} = [8.741,~19.718]
Rank 2: t(66) = 8.196,~p < 0.001,~d = 0.162,~\mathrm{CI} = [4.849,~13.291]
Rank 3: t(66) = 3.243,~p = 0.002,~d = 0.162,~\mathrm{CI} = [1.049,~6.795]
Rank 4: t(66) = -3.112,~p = 0.003,~d = 0.162,~\mathrm{CI} = [-5.161,~-1.909]
Rank 5: t(66) = -7.154,~p < 0.001,~d = 0.162,~\mathrm{CI} = [-12.551,~-5.426]
Rank 6: t(66) = -12.608,~p < 0.001,~d = 0.162,~\mathrm{CI} = [-21.801,~-9.261]
Rank 7: t(66) = -18.397,~p < 0.001,~d = 0.162,~\mathrm{CI} = [-27.415,~-14.103]
