In [1]:
try:
    import davos
except:
    %pip install davos
    import davos

davos.config.suppress_stdout = False

In [2]:
smuggle pandas as pd # pip: pandas==1.5.1
smuggle numpy as np # pip: numpy==1.22.3
smuggle seaborn as sns # pip: seaborn==0.12.1
smuggle dill as pickle # pip: dill==0.3.6

smuggle h5py # pip: h5py==3.7.0
smuggle pathos # pip: pathos==0.3.0
smuggle quail # pip: quail==0.2.2
smuggle requests # pip: requests==2.28.1
smuggle os
smuggle warnings
smuggle string

from tqdm smuggle tqdm # pip: tqdm==4.64.1
from matplotlib smuggle pyplot as plt #pip: matplotlib==3.6.2
from matplotlib.ticker smuggle MaxNLocator
from pathos.multiprocessing smuggle ProcessingPool as Pool # pip: pathos==0.3.0
from multiprocessing smuggle cpu_count  # pip: multiprocess==0.70.14
from sklearn.decomposition smuggle IncrementalPCA as PCA # pip: scikit-learn==1.1.3
from scipy smuggle stats # pip: scipy==1.10.0

# local functions
from dataloader import datadir, grouping, feature_groupings, descriptions, sort_by_grouping, fetch_data
from analyze import analyze_data, recover_fingerprint_features, organize_by_listgroup, random, adaptive, non_adaptive_exclude_random, \
                    select_conds, select_lists, filter, get_diffs, stack_diffs, pnr_matrix, accuracy2df, adaptive_listnum2cond, \
                    clustering_matrices, average_by_cond, rename_features, fingerprint2temporal, get_boundaries, \
                    recall_accuracy_near_boundaries, results, results_by_list, analyses, listgroups, orders, ttest, merge_results, \
                    create_clustering_df

# Performance on *feature rich* versus *reduced* lists:
  - accuracy
  - temporal clustering
  - non-visual feature-based clustering (category, size, length, first letter)

In [3]:
print('Accuracy for feature rich vs. reduced (all lists):')
ttest(results['accuracy']['feature rich'], results['accuracy']['reduced'])

Accuracy for feature rich vs. reduced (all lists):
t(126) = -0.290, p = 0.772, d = -0.051, CI = [-2.372, 1.644]


In [4]:
print('Temporal clustering for feature rich vs. reduced (all lists):')
ttest(results['fingerprint']['feature rich'], results['fingerprint']['reduced'], x_col='temporal', y_col='temporal')

Temporal clustering for feature rich vs. reduced (all lists):
t(126) = 10.632, p < 0.001, d = 1.882, CI = [7.796, 13.872]


In [5]:
features = ['category', 'size', 'wordLength', 'firstLetter']
for i, f in enumerate(features):
    print(f'{f} clustering for feature rich vs. reduced (all lists):')
    ttest(results['fingerprint']['feature rich'], results['fingerprint']['reduced'], x_col=f, y_col=f)

    if i < len(features) - 1:
        print('\n')

category clustering for feature rich vs. reduced (all lists):
t(126) = 10.148, p < 0.001, d = 1.796, CI = [7.524, 13.695]


size clustering for feature rich vs. reduced (all lists):
t(126) = 12.033, p < 0.001, d = 2.129, CI = [8.955, 15.618]


wordLength clustering for feature rich vs. reduced (all lists):
t(126) = 10.720, p < 0.001, d = 1.897, CI = [7.492, 15.148]


firstLetter clustering for feature rich vs. reduced (all lists):
t(126) = 6.679, p < 0.001, d = 1.182, CI = [4.372, 9.383]


# Performance on {*feature rich*, *reduced*} lists versus *reduced ({early, late})* lists
We'll compare early and late lists separately (e.g., early to early, late to late, early to late, etc.)

Metrics:
  - accuracy
  - temporal clustering
  - non-visual feature-based clustering (category, size, length, first letter)

## Early vs. late (all conditions and metrics)

### Accuracy

In [6]:
print('Accuracy for feature rich (early lists) vs. feature rich (late lists):')
ttest(results['accuracy']['feature rich'], results['accuracy']['feature rich'], x_lists='Early', y_lists='Late', independent_sample=False)

Accuracy for feature rich (early lists) vs. feature rich (late lists):
t(66) = 4.553, p < 0.001, d = 0.233, CI = [2.425, 7.132]


In [7]:
print('Accuracy for reduced (early lists) vs. reduced (late lists):')
ttest(results['accuracy']['reduced'], results['accuracy']['reduced'], x_lists='Early', y_lists='Late', independent_sample=False)

Accuracy for reduced (early lists) vs. reduced (late lists):
t(60) = 2.434, p = 0.018, d = 0.134, CI = [0.361, 4.873]


In [8]:
print('Accuracy for reduced (early) (early lists) vs. reduced (early) (late lists):')
ttest(results['accuracy']['reduced (early)'], results['accuracy']['reduced (early)'], x_lists='Early', y_lists='Late', independent_sample=False)

Accuracy for reduced (early) (early lists) vs. reduced (early) (late lists):
t(41) = 1.499, p = 0.141, d = 0.098, CI = [-0.419, 3.499]


In [9]:
print('Accuracy for reduced (late) (early lists) vs. reduced (late) (late lists):')
ttest(results['accuracy']['reduced (late)'], results['accuracy']['reduced (late)'], x_lists='Early', y_lists='Late', independent_sample=False)

Accuracy for reduced (late) (early lists) vs. reduced (late) (late lists):
t(40) = 1.462, p = 0.152, d = 0.121, CI = [-0.301, 2.864]


### Temporal clustering

In [10]:
print('Temporal clustering for feature rich (early lists) vs. feature rich (late lists):')
ttest(results['fingerprint']['feature rich'], results['fingerprint']['feature rich'], x_col='temporal', y_col='temporal', x_lists='Early', y_lists='Late', independent_sample=False)

Temporal clustering for feature rich (early lists) vs. feature rich (late lists):
t(66) = 2.268, p = 0.027, d = 0.181, CI = [0.272, 4.450]


In [11]:
print('Temporal clustering for reduced (early lists) vs. reduced (late lists):')
ttest(results['fingerprint']['reduced'], results['fingerprint']['reduced'], x_col='temporal', y_col='temporal', x_lists='Early', y_lists='Late', independent_sample=False)

Temporal clustering for reduced (early lists) vs. reduced (late lists):


t(60) = 0.986, p = 0.328, d = 0.061, CI = [-0.980, 3.190]


In [12]:
print('Temporal clustering for reduced (early) (early lists) vs. reduced (early) (late lists):')
ttest(results['fingerprint']['reduced (early)'], results['fingerprint']['reduced (early)'], x_col='temporal', y_col='temporal', x_lists='Early', y_lists='Late', independent_sample=False)

Temporal clustering for reduced (early) (early lists) vs. reduced (early) (late lists):


t(41) = 0.857, p = 0.396, d = 0.068, CI = [-1.030, 2.929]


In [13]:
print('Temporal clustering for reduced (late) (early lists) vs. reduced (late) (late lists):')
ttest(results['fingerprint']['reduced (late)'], results['fingerprint']['reduced (late)'], x_col='temporal', y_col='temporal', x_lists='Early', y_lists='Late', independent_sample=False)

Temporal clustering for reduced (late) (early lists) vs. reduced (late) (late lists):
t(40) = 1.244, p = 0.221, d = 0.128, CI = [-0.794, 3.152]


### Non-visual feature based clustering

In [14]:
for i, f in enumerate(features):
    print(f'{f} clustering for feature rich (early lists) vs. feature rich (late lists):')
    ttest(results['fingerprint']['feature rich'], results['fingerprint']['feature rich'], x_col=f, y_col=f, x_lists='Early', y_lists='Late', independent_sample=False)

    if i < len(features) - 1:
        print('\n')

category clustering for feature rich (early lists) vs. feature rich (late lists):


t(66) = 3.684, p < 0.001, d = 0.220, CI = [1.829, 5.851]


size clustering for feature rich (early lists) vs. feature rich (late lists):
t(66) = 1.629, p = 0.108, d = 0.100, CI = [-0.333, 3.859]


wordLength clustering for feature rich (early lists) vs. feature rich (late lists):
t(66) = -0.100, p = 0.921, d = -0.010, CI = [-2.099, 1.870]


firstLetter clustering for feature rich (early lists) vs. feature rich (late lists):
t(66) = -0.412, p = 0.681, d = -0.045, CI = [-2.388, 1.636]


In [15]:
for i, f in enumerate(features):
    print(f'{f} clustering for reduced (early lists) vs. reduced (late lists):')
    ttest(results['fingerprint']['reduced'], results['fingerprint']['reduced'], x_col=f, y_col=f, x_lists='Early', y_lists='Late', independent_sample=False)

    if i < len(features) - 1:
        print('\n')

category clustering for reduced (early lists) vs. reduced (late lists):
t(60) = 2.755, p = 0.008, d = 0.177, CI = [0.781, 5.266]


size clustering for reduced (early lists) vs. reduced (late lists):
t(60) = 3.081, p = 0.003, d = 0.201, CI = [1.204, 5.389]


wordLength clustering for reduced (early lists) vs. reduced (late lists):
t(60) = 3.762, p < 0.001, d = 0.261, CI = [1.694, 6.639]


firstLetter clustering for reduced (early lists) vs. reduced (late lists):
t(60) = 1.721, p = 0.090, d = 0.175, CI = [-0.121, 4.149]


In [16]:
for i, f in enumerate(features):
    print(f'{f} clustering for reduced (early) (early lists) vs. reduced (early) (late lists):')
    ttest(results['fingerprint']['reduced (early)'], results['fingerprint']['reduced (early)'], x_col=f, y_col=f, x_lists='Early', y_lists='Late', independent_sample=False)

    if i < len(features) - 1:
        print('\n')

category clustering for reduced (early) (early lists) vs. reduced (early) (late lists):


t(41) = 0.707, p = 0.484, d = 0.068, CI = [-1.307, 2.659]


size clustering for reduced (early) (early lists) vs. reduced (early) (late lists):
t(41) = 0.803, p = 0.427, d = 0.079, CI = [-1.214, 2.875]


wordLength clustering for reduced (early) (early lists) vs. reduced (early) (late lists):
t(41) = 0.461, p = 0.648, d = 0.060, CI = [-1.619, 2.350]


firstLetter clustering for reduced (early) (early lists) vs. reduced (early) (late lists):
t(41) = 0.781, p = 0.439, d = 0.101, CI = [-1.112, 2.967]


In [17]:
for i, f in enumerate(features):
    print(f'{f} clustering for reduced (late) (early lists) vs. reduced (late) (late lists):')
    ttest(results['fingerprint']['reduced (late)'], results['fingerprint']['reduced (late)'], x_col=f, y_col=f, x_lists='Early', y_lists='Late', independent_sample=False)

    if i < len(features) - 1:
        print('\n')

category clustering for reduced (late) (early lists) vs. reduced (late) (late lists):


t(40) = -0.101, p = 0.920, d = -0.009, CI = [-2.279, 1.760]


size clustering for reduced (late) (early lists) vs. reduced (late) (late lists):
t(40) = 0.555, p = 0.582, d = 0.058, CI = [-1.675, 2.458]


wordLength clustering for reduced (late) (early lists) vs. reduced (late) (late lists):
t(40) = 1.482, p = 0.146, d = 0.126, CI = [-0.468, 3.756]


firstLetter clustering for reduced (late) (early lists) vs. reduced (late) (late lists):
t(40) = -0.143, p = 0.887, d = -0.017, CI = [-2.425, 1.811]


## Feature rich vs. reduced ({early, late})

### Accuracy

In [18]:
print('Accuracy for feature rich vs. reduced (early) (all lists):')
ttest(results['accuracy']['feature rich'], results['accuracy']['reduced (early)'])

Accuracy for feature rich vs. reduced (early) (all lists):
t(107) = -2.230, p = 0.028, d = -0.439, CI = [-4.282, -0.199]


In [19]:
print('Accuracy for feature rich vs. reduced (late) (all lists):')
ttest(results['accuracy']['feature rich'], results['accuracy']['reduced (late)'])

Accuracy for feature rich vs. reduced (late) (all lists):
t(106) = -0.638, p = 0.525, d = -0.126, CI = [-2.710, 1.255]


### Temporal clustering

In [20]:
print('Temporal clustering for feature rich vs. reduced (early) (all lists):')
ttest(results['fingerprint']['feature rich'], results['fingerprint']['reduced (early)'], x_col='temporal', y_col='temporal')

Temporal clustering for feature rich vs. reduced (early) (all lists):
t(107) = -1.379, p = 0.171, d = -0.271, CI = [-3.361, 0.658]


In [21]:
print('Temporal clustering for feature rich vs. reduced (late) (all lists):')
ttest(results['fingerprint']['feature rich'], results['fingerprint']['reduced (late)'], x_col='temporal', y_col='temporal')

Temporal clustering for feature rich vs. reduced (late) (all lists):
t(106) = -0.535, p = 0.593, d = -0.106, CI = [-2.554, 1.367]


### Non-visual feature based clustering

In [22]:
features = ['category', 'size', 'wordLength', 'firstLetter']
for i, f in enumerate(features):
    print(f'{f} clustering for feature rich vs. reduced (early) (all lists):')
    ttest(results['fingerprint']['feature rich'], results['fingerprint']['reduced (early)'], x_col=f, y_col=f)

    if i < len(features) - 1:
        print('\n')

category clustering for feature rich vs. reduced (early) (all lists):
t(107) = 0.013, p = 0.989, d = 0.003, CI = [-1.776, 2.098]


size clustering for feature rich vs. reduced (early) (all lists):
t(107) = -0.349, p = 0.728, d = -0.069, CI = [-2.186, 1.601]


wordLength clustering for feature rich vs. reduced (early) (all lists):
t(107) = -0.581, p = 0.563, d = -0.114, CI = [-2.273, 1.451]


firstLetter clustering for feature rich vs. reduced (early) (all lists):
t(107) = 0.636, p = 0.526, d = 0.125, CI = [-1.279, 2.909]


In [23]:
features = ['category', 'size', 'wordLength', 'firstLetter']
for i, f in enumerate(features):
    print(f'{f} clustering for feature rich vs. reduced (late) (all lists):')
    ttest(results['fingerprint']['feature rich'], results['fingerprint']['reduced (late)'], x_col=f, y_col=f)

    if i < len(features) - 1:
        print('\n')

category clustering for feature rich vs. reduced (late) (all lists):
t(106) = -1.345, p = 0.181, d = -0.267, CI = [-3.492, 0.530]


size clustering for feature rich vs. reduced (late) (all lists):
t(106) = -1.441, p = 0.153, d = -0.286, CI = [-3.673, 0.509]


wordLength clustering for feature rich vs. reduced (late) (all lists):
t(106) = -1.261, p = 0.210, d = -0.250, CI = [-3.692, 0.886]


firstLetter clustering for feature rich vs. reduced (late) (all lists):
t(106) = 0.939, p = 0.350, d = 0.186, CI = [-1.046, 3.016]


## Reduced vs. reduced ({early, late})

### Accuracy

In [24]:
print('Accuracy for reduced vs. reduced (early) (all lists):')
ttest(results['accuracy']['reduced'], results['accuracy']['reduced (early)'])

Accuracy for reduced vs. reduced (early) (all lists):
t(101) = -2.045, p = 0.043, d = -0.410, CI = [-4.086, -0.154]


In [25]:
print('Accuracy for reduced vs. reduced (late) (all lists):')
ttest(results['accuracy']['reduced'], results['accuracy']['reduced (late)'])

Accuracy for reduced vs. reduced (late) (all lists):
t(100) = -0.407, p = 0.685, d = -0.082, CI = [-2.440, 1.623]


### Temporal clustering

In [26]:
print('Temporal clustering for reduced vs. reduced (early) (all lists):')
ttest(results['fingerprint']['reduced'], results['fingerprint']['reduced (early)'], x_col='temporal', y_col='temporal')

Temporal clustering for reduced vs. reduced (early) (all lists):
t(101) = -10.689, p < 0.001, d = -2.143, CI = [-13.488, -8.527]


In [27]:
print('Temporal clustering for reduced vs. reduced (late) (all lists):')
ttest(results['fingerprint']['reduced'], results['fingerprint']['reduced (late)'], x_col='temporal', y_col='temporal')

Temporal clustering for reduced vs. reduced (late) (all lists):
t(100) = -9.885, p < 0.001, d = -1.996, CI = [-14.408, -6.722]


### Non-visual feature based clustering

In [28]:
features = ['category', 'size', 'wordLength', 'firstLetter']
for i, f in enumerate(features):
    print(f'{f} clustering for reduced vs. reduced (early) (all lists):')
    ttest(results['fingerprint']['reduced'], results['fingerprint']['reduced (early)'], x_col=f, y_col=f)

    if i < len(features) - 1:
        print('\n')

category clustering for reduced vs. reduced (early) (all lists):
t(101) = -9.538, p < 0.001, d = -1.912, CI = [-12.128, -7.347]


size clustering for reduced vs. reduced (early) (all lists):
t(101) = -12.222, p < 0.001, d = -2.451, CI = [-15.488, -9.883]


wordLength clustering for reduced vs. reduced (early) (all lists):
t(101) = -10.620, p < 0.001, d = -2.129, CI = [-13.777, -8.273]


firstLetter clustering for reduced vs. reduced (early) (all lists):
t(101) = -5.213, p < 0.001, d = -1.045, CI = [-7.416, -3.326]


In [29]:
features = ['category', 'size', 'wordLength', 'firstLetter']
for i, f in enumerate(features):
    print(f'{f} clustering for reduced vs. reduced (late) (all lists):')
    ttest(results['fingerprint']['reduced'], results['fingerprint']['reduced (late)'], x_col=f, y_col=f)

    if i < len(features) - 1:
        print('\n')

category clustering for reduced vs. reduced (late) (all lists):
t(100) = -10.436, p < 0.001, d = -2.107, CI = [-15.817, -7.047]


size clustering for reduced vs. reduced (late) (all lists):
t(100) = -12.413, p < 0.001, d = -2.507, CI = [-18.340, -8.377]


wordLength clustering for reduced vs. reduced (late) (all lists):
t(100) = -9.672, p < 0.001, d = -1.953, CI = [-14.421, -6.222]


firstLetter clustering for reduced vs. reduced (late) (all lists):
t(100) = -4.555, p < 0.001, d = -0.920, CI = [-7.238, -2.377]


# Order manipulation analyses

- When lists are sorted by a given feature, how is memory performance affected (relative to feature rich -- early lists only)?
- Do some order manipulations matter more than others?  E.g. compare semantic vs. lexicographic vs. visual -- early lists only

### Accuracy

In [30]:
print('Accuracy for category vs. feature rich (early lists):')
ttest(results['accuracy']['category'], results['accuracy']['feature rich'], x_lists='Early', y_lists='Early')

print('\nAccuracy for size vs. feature rich (early lists):')
ttest(results['accuracy']['size'], results['accuracy']['feature rich'], x_lists='Early', y_lists='Early')

print('\nAccuracy for length vs. feature rich (early lists):')
ttest(results['accuracy']['length'], results['accuracy']['feature rich'], x_lists='Early', y_lists='Early')

print('\nAccuracy for first letter vs. feature rich (early lists):')
ttest(results['accuracy']['first letter'], results['accuracy']['feature rich'], x_lists='Early', y_lists='Early')

print('\nAccuracy for color vs. feature rich (early lists):')
ttest(results['accuracy']['color'], results['accuracy']['feature rich'], x_lists='Early', y_lists='Early')

print('\nAccuracy for location vs. feature rich (early lists):')
ttest(results['accuracy']['location'], results['accuracy']['feature rich'], x_lists='Early', y_lists='Early')

Accuracy for category vs. feature rich (early lists):


t(95) = 3.034, p = 0.003, d = 0.667, CI = [1.168, 5.159]

Accuracy for size vs. feature rich (early lists):
t(95) = -1.013, p = 0.314, d = -0.223, CI = [-3.276, 0.801]

Accuracy for length vs. feature rich (early lists):
t(95) = -0.550, p = 0.584, d = -0.121, CI = [-2.559, 1.434]

Accuracy for first letter vs. feature rich (early lists):
t(95) = -0.690, p = 0.492, d = -0.152, CI = [-2.646, 1.348]

Accuracy for color vs. feature rich (early lists):
t(96) = 1.850, p = 0.067, d = 0.402, CI = [0.047, 3.857]

Accuracy for location vs. feature rich (early lists):
t(95) = 0.043, p = 0.966, d = 0.010, CI = [-1.838, 1.743]


### Temporal clustering

In [31]:
print('Temporal clustering for category vs. feature rich (early lists):')
ttest(results['fingerprint']['category'], results['fingerprint']['feature rich'], x_lists='Early', y_lists='Early', x_col='temporal', y_col='temporal')

print('\nTemporal clustering for size vs. feature rich (early lists):')
ttest(results['fingerprint']['size'], results['fingerprint']['feature rich'], x_lists='Early', y_lists='Early', x_col='temporal', y_col='temporal')

print('\nTemporal clustering for length vs. feature rich (early lists):')
ttest(results['fingerprint']['length'], results['fingerprint']['feature rich'], x_lists='Early', y_lists='Early', x_col='temporal', y_col='temporal')

print('\nTemporal clustering for first letter vs. feature rich (early lists):')
ttest(results['fingerprint']['first letter'], results['fingerprint']['feature rich'], x_lists='Early', y_lists='Early', x_col='temporal', y_col='temporal')

print('\nTemporal clustering for color vs. feature rich (early lists):')
ttest(results['fingerprint']['color'], results['fingerprint']['feature rich'], x_lists='Early', y_lists='Early', x_col='temporal', y_col='temporal')

print('\nTemporal clustering for location vs. feature rich (early lists):')
ttest(results['fingerprint']['location'], results['fingerprint']['feature rich'], x_lists='Early', y_lists='Early', x_col='temporal', y_col='temporal')

Temporal clustering for category vs. feature rich (early lists):
t(95) = 8.813, p < 0.001, d = 1.936, CI = [6.758, 12.404]

Temporal clustering for size vs. feature rich (early lists):
t(95) = 2.630, p = 0.010, d = 0.578, CI = [0.632, 4.810]

Temporal clustering for length vs. feature rich (early lists):
t(95) = -1.547, p = 0.125, d = -0.340, CI = [-3.697, 0.270]

Temporal clustering for first letter vs. feature rich (early lists):
t(95) = 2.858, p = 0.005, d = 0.628, CI = [1.133, 4.906]

Temporal clustering for color vs. feature rich (early lists):
t(96) = -1.339, p = 0.184, d = -0.291, CI = [-3.074, 0.308]

Temporal clustering for location vs. feature rich (early lists):
t(95) = 1.705, p = 0.092, d = 0.374, CI = [-0.088, 3.604]


### Feature-based clustering

In [32]:
conds = ['category', 'size', 'length', 'first letter', 'color', 'location']
features = ['category', 'size', 'wordLength', 'firstLetter', 'color', 'location']

for i, c in enumerate(conds):
    for j, f in enumerate(features):
        print(f'{f} clustering for {c} vs. feature rich (early lists):')
        ttest(results['fingerprint'][c], results['fingerprint']['feature rich'], x_col=f, y_col=f, x_lists='Early', y_lists='Early')

        if (j < len(features) - 1) or (i < len(conds) - 1):
            print('\n')
    
    if i < len(conds) - 1:
        print('--- \n')

category clustering for category vs. feature rich (early lists):
t(95) = 4.429, p < 0.001, d = 0.973, CI = [3.027, 6.216]


size clustering for category vs. feature rich (early lists):
t(95) = 3.727, p < 0.001, d = 0.819, CI = [2.129, 5.753]


wordLength clustering for category vs. feature rich (early lists):
t(95) = 0.154, p = 0.878, d = 0.034, CI = [-1.797, 1.909]


firstLetter clustering for category vs. feature rich (early lists):
t(95) = -1.610, p = 0.111, d = -0.354, CI = [-3.880, 0.257]


color clustering for category vs. feature rich (early lists):
t(95) = -0.375, p = 0.709, d = -0.082, CI = [-2.648, 1.503]


location clustering for category vs. feature rich (early lists):
t(95) = -0.347, p = 0.730, d = -0.076, CI = [-2.237, 1.449]


--- 

category clustering for size vs. feature rich (early lists):
t(95) = 0.330, p = 0.742, d = 0.073, CI = [-1.783, 2.241]


size clustering for size vs. feature rich (early lists):
t(95) = 2.421, p = 0.017, d = 0.532, CI = [0.583, 4.484]


wordL

## Combine "categories" of features:
  - semantic = category + size
  - lexicographic = length + first letter
  - visual = color + location

In [33]:
groups = {k: v for k, v in feature_groupings.items() if k in ['semantic', 'lexicographic', 'visual']}

merged_results = {}
for k, v in results.items():
    if k not in ['fingerprint', 'accuracy']:
        continue
    merged_results[k] = merge_results(v, groups)

## Compare semantic vs. feature rich

### Accuracy

In [34]:
print('Accuracy for semantic vs. feature rich (early lists):')
ttest(merged_results['accuracy']['semantic'], results['accuracy']['feature rich'], x_lists='Early', y_lists='Early')

print('\nAccuracy for lexicographic vs. feature rich (early lists):')
ttest(merged_results['accuracy']['lexicographic'], results['accuracy']['feature rich'], x_lists='Early', y_lists='Early')

print('\nAccuracy for visual vs. feature rich (early lists):')
ttest(merged_results['accuracy']['visual'], results['accuracy']['feature rich'], x_lists='Early', y_lists='Early')

Accuracy for semantic vs. feature rich (early lists):
t(125) = 1.197, p = 0.233, d = 0.213, CI = [-0.827, 3.186]

Accuracy for lexicographic vs. feature rich (early lists):
t(125) = -0.776, p = 0.439, d = -0.138, CI = [-2.780, 1.294]

Accuracy for visual vs. feature rich (early lists):
t(126) = 1.256, p = 0.212, d = 0.222, CI = [-0.620, 2.964]


In [35]:
print('Accuracy for semantic vs. feature rich (early lists):')
ttest(merged_results['accuracy']['semantic'], results['accuracy']['feature rich'], x_lists='Early', y_lists='Early')

print('\nAccuracy for lexicographic vs. feature rich (early lists):')
ttest(merged_results['accuracy']['lexicographic'], results['accuracy']['feature rich'], x_lists='Early', y_lists='Early')

print('\nAccuracy for visual vs. feature rich (early lists):')
ttest(merged_results['accuracy']['visual'], results['accuracy']['feature rich'], x_lists='Early', y_lists='Early')

Accuracy for semantic vs. feature rich (early lists):
t(125) = 1.197, p = 0.233, d = 0.213, CI = [-0.619, 3.149]

Accuracy for lexicographic vs. feature rich (early lists):
t(125) = -0.776, p = 0.439, d = -0.138, CI = [-2.815, 1.047]

Accuracy for visual vs. feature rich (early lists):
t(126) = 1.256, p = 0.212, d = 0.222, CI = [-0.771, 3.156]


### Temporal clustering

In [36]:
print('Temporal clustering for semantic vs. feature rich (early lists):')
ttest(merged_results['fingerprint']['semantic'], results['fingerprint']['feature rich'], x_lists='Early', y_lists='Early', x_col='temporal', y_col='temporal')

print('\nTemporal clustering for lexicographic vs. feature rich (early lists):')
ttest(merged_results['fingerprint']['lexicographic'], results['fingerprint']['feature rich'], x_lists='Early', y_lists='Early', x_col='temporal', y_col='temporal')

print('\nTemporal clustering for visual vs. feature rich (early lists):')
ttest(merged_results['fingerprint']['visual'], results['fingerprint']['feature rich'], x_lists='Early', y_lists='Early', x_col='temporal', y_col='temporal')

Temporal clustering for semantic vs. feature rich (early lists):
t(125) = 6.474, p < 0.001, d = 1.151, CI = [4.411, 9.003]

Temporal clustering for lexicographic vs. feature rich (early lists):
t(125) = 0.786, p = 0.433, d = 0.140, CI = [-1.196, 2.779]

Temporal clustering for visual vs. feature rich (early lists):
t(126) = 0.254, p = 0.800, d = 0.045, CI = [-1.687, 2.183]


### Feature based clustering

In [37]:
conds = ['semantic', 'lexicographic', 'visual']
features = ['category', 'size', 'wordLength', 'firstLetter', 'color', 'location']

for i, c in enumerate(conds):
    for j, f in enumerate(features):
        print(f'{f} clustering for {c} vs. feature rich (early lists):')
        ttest(merged_results['fingerprint'][c], results['fingerprint']['feature rich'], x_col=f, y_col=f, x_lists='Early', y_lists='Early')

        if (j < len(features) - 1) or (i < len(conds) - 1):
            print('\n')
    
    if i < len(conds) - 1:
        print('--- \n')

category clustering for semantic vs. feature rich (early lists):
t(125) = 2.722, p = 0.007, d = 0.484, CI = [0.863, 4.894]


size clustering for semantic vs. feature rich (early lists):
t(125) = 3.866, p < 0.001, d = 0.687, CI = [2.054, 5.916]


wordLength clustering for semantic vs. feature rich (early lists):
t(125) = 0.521, p = 0.603, d = 0.093, CI = [-1.419, 2.313]


firstLetter clustering for semantic vs. feature rich (early lists):
t(125) = -0.842, p = 0.401, d = -0.150, CI = [-2.948, 1.254]


color clustering for semantic vs. feature rich (early lists):
t(125) = -0.650, p = 0.517, d = -0.116, CI = [-2.836, 1.236]


location clustering for semantic vs. feature rich (early lists):
t(125) = -0.251, p = 0.802, d = -0.045, CI = [-2.378, 1.738]


--- 

category clustering for lexicographic vs. feature rich (early lists):
t(125) = -1.040, p = 0.301, d = -0.185, CI = [-3.154, 0.948]


size clustering for lexicographic vs. feature rich (early lists):
t(125) = 0.006, p = 0.995, d = 0.001,

## Compare each pair of (category of) order manipulation conditions (semantic, lexicographic, and visual)

### Accuracy

In [38]:
for i, c1 in enumerate(conds):
    for c2 in conds[i+1:]:
        print(f'Accuracy for {c1} vs. {c2} (early lists):')
        ttest(merged_results['accuracy'][c1], merged_results['accuracy'][c2], x_lists='Early', y_lists='Early')

        print(f'\nAccuracy for {c1} vs. {c2} (late lists):')
        ttest(merged_results['accuracy'][c1], merged_results['accuracy'][c2], x_lists='Late', y_lists='Late')

        print('\n\n')

Accuracy for semantic vs. lexicographic (early lists):
t(118) = 1.936, p = 0.055, d = 0.353, CI = [0.199, 4.017]

Accuracy for semantic vs. lexicographic (late lists):
t(118) = -0.388, p = 0.699, d = -0.071, CI = [-2.462, 1.566]



Accuracy for semantic vs. visual (early lists):
t(119) = 0.113, p = 0.910, d = 0.021, CI = [-1.881, 2.053]

Accuracy for semantic vs. visual (late lists):
t(119) = -0.833, p = 0.407, d = -0.151, CI = [-2.862, 1.110]



Accuracy for lexicographic vs. visual (early lists):
t(119) = -2.145, p = 0.034, d = -0.390, CI = [-4.151, -0.212]

Accuracy for lexicographic vs. visual (late lists):
t(119) = -0.352, p = 0.726, d = -0.064, CI = [-2.239, 1.747]





### Temporal clustering

In [39]:
for i, c1 in enumerate(conds):
    for c2 in conds[i+1:]:
        print(f'Temporal clustering for {c1} vs. {c2} (early lists):')
        ttest(merged_results['fingerprint'][c1], merged_results['fingerprint'][c2], x_lists='Early', y_lists='Early', x_col='temporal', y_col='temporal')

        print(f'\nTemporal clustering for {c1} vs. {c2} (late lists):')
        ttest(merged_results['fingerprint'][c1], merged_results['fingerprint'][c2], x_lists='Late', y_lists='Late', x_col='temporal', y_col='temporal')

        print('\n')

Temporal clustering for semantic vs. lexicographic (early lists):


t(118) = 5.620, p < 0.001, d = 1.026, CI = [3.596, 8.202]

Temporal clustering for semantic vs. lexicographic (late lists):
t(118) = -0.758, p = 0.450, d = -0.138, CI = [-2.756, 1.150]


Temporal clustering for semantic vs. visual (early lists):
t(119) = 6.613, p < 0.001, d = 1.202, CI = [4.354, 9.324]

Temporal clustering for semantic vs. visual (late lists):
t(119) = -0.322, p = 0.748, d = -0.059, CI = [-2.419, 1.715]


Temporal clustering for lexicographic vs. visual (early lists):
t(119) = 0.589, p = 0.557, d = 0.107, CI = [-1.461, 2.574]

Temporal clustering for lexicographic vs. visual (late lists):
t(119) = 0.562, p = 0.575, d = 0.102, CI = [-1.560, 2.538]




### Feature-based clustering

In [40]:
for i, c1 in enumerate(conds):
    for c2 in conds[i+1:]:
        for f in features:
            print(f'{f} clustering for {c1} vs. {c2} (early lists):')
            ttest(merged_results['fingerprint'][c1], merged_results['fingerprint'][c2], x_col=f, y_col=f, x_lists='Early', y_lists='Early')

            print(f'\n{f} clustering for {c1} vs. {c2} (late lists):')
            ttest(merged_results['fingerprint'][c1], merged_results['fingerprint'][c2], x_col=f, y_col=f, x_lists='Late', y_lists='Late')

            print('\n')

category clustering for semantic vs. lexicographic (early lists):
t(118) = 3.667, p < 0.001, d = 0.670, CI = [1.704, 5.943]

category clustering for semantic vs. lexicographic (late lists):
t(118) = -0.720, p = 0.473, d = -0.131, CI = [-2.697, 1.131]


size clustering for semantic vs. lexicographic (early lists):
t(118) = 4.043, p < 0.001, d = 0.738, CI = [2.073, 6.171]

size clustering for semantic vs. lexicographic (late lists):
t(118) = -1.897, p = 0.060, d = -0.346, CI = [-3.929, 0.058]


wordLength clustering for semantic vs. lexicographic (early lists):
t(118) = -3.390, p < 0.001, d = -0.619, CI = [-5.503, -1.532]

wordLength clustering for semantic vs. lexicographic (late lists):
t(118) = 1.153, p = 0.251, d = 0.211, CI = [-0.837, 3.244]


firstLetter clustering for semantic vs. lexicographic (early lists):
t(118) = -5.705, p < 0.001, d = -1.042, CI = [-7.825, -3.781]

firstLetter clustering for semantic vs. lexicographic (late lists):
t(118) = -0.880, p = 0.381, d = -0.161, CI 

# When *early* lists are sorted by a given feature, how is memory performance on *late* lists affected (relative to *feature rich* late lists)

### Accuracy

In [41]:
print('Accuracy for semantic vs. feature rich (late lists):')
ttest(merged_results['accuracy']['semantic'], results['accuracy']['feature rich'], x_lists='Late', y_lists='Late')

print('\nAccuracy for lexicographic vs. feature rich (late lists):')
ttest(merged_results['accuracy']['lexicographic'], results['accuracy']['feature rich'], x_lists='Late', y_lists='Late')

print('\nAccuracy for visual vs. feature rich (late lists):')
ttest(merged_results['accuracy']['visual'], results['accuracy']['feature rich'], x_lists='Late', y_lists='Late')

Accuracy for semantic vs. feature rich (late lists):
t(125) = 0.487, p = 0.627, d = 0.087, CI = [-1.444, 2.375]

Accuracy for lexicographic vs. feature rich (late lists):
t(125) = 0.878, p = 0.382, d = 0.156, CI = [-1.086, 2.872]

Accuracy for visual vs. feature rich (late lists):
t(126) = 1.437, p = 0.153, d = 0.254, CI = [-0.613, 3.496]


### Temporal clustering

In [42]:
print('Temporal clustering for semantic vs. feature rich (late lists):')
ttest(merged_results['fingerprint']['semantic'], results['fingerprint']['feature rich'], x_lists='Late', y_lists='Late', x_col='temporal', y_col='temporal')

print('\nTemporal clustering for lexicographic vs. feature rich (late lists):')
ttest(merged_results['fingerprint']['lexicographic'], results['fingerprint']['feature rich'], x_lists='Late', y_lists='Late', x_col='temporal', y_col='temporal')

print('\nTemporal clustering for visual vs. feature rich (late lists):')
ttest(merged_results['fingerprint']['visual'], results['fingerprint']['feature rich'], x_lists='Late', y_lists='Late', x_col='temporal', y_col='temporal')

Temporal clustering for semantic vs. feature rich (late lists):


t(125) = 0.157, p = 0.875, d = 0.028, CI = [-1.852, 1.958]

Temporal clustering for lexicographic vs. feature rich (late lists):
t(125) = 0.998, p = 0.320, d = 0.177, CI = [-0.966, 2.967]

Temporal clustering for visual vs. feature rich (late lists):
t(126) = 0.548, p = 0.585, d = 0.097, CI = [-1.662, 2.399]


## Feature-based clustering

In [43]:
conds = ['semantic', 'lexicographic', 'visual']
features = ['category', 'size', 'wordLength', 'firstLetter', 'color', 'location']

for i, c in enumerate(conds):
    for j, f in enumerate(features):
        print(f'{f} clustering for {c} vs. feature rich (late lists):')
        ttest(merged_results['fingerprint'][c], results['fingerprint']['feature rich'], x_col=f, y_col=f, x_lists='Late', y_lists='Late')

        if (j < len(features) - 1) or (i < len(conds) - 1):
            print('\n')
    
    if i < len(conds) - 1:
        print('--- \n')

category clustering for semantic vs. feature rich (late lists):
t(125) = -0.041, p = 0.967, d = -0.007, CI = [-2.011, 1.903]


size clustering for semantic vs. feature rich (late lists):
t(125) = -0.989, p = 0.324, d = -0.176, CI = [-3.042, 1.049]


wordLength clustering for semantic vs. feature rich (late lists):
t(125) = -0.045, p = 0.964, d = -0.008, CI = [-2.108, 1.821]


firstLetter clustering for semantic vs. feature rich (late lists):
t(125) = -0.369, p = 0.713, d = -0.066, CI = [-2.430, 1.570]


color clustering for semantic vs. feature rich (late lists):
t(125) = -0.602, p = 0.548, d = -0.107, CI = [-2.762, 1.335]


location clustering for semantic vs. feature rich (late lists):
t(125) = -0.521, p = 0.603, d = -0.093, CI = [-2.558, 1.284]


--- 

category clustering for lexicographic vs. feature rich (late lists):
t(125) = 0.678, p = 0.499, d = 0.121, CI = [-1.379, 2.591]


size clustering for lexicographic vs. feature rich (late lists):
t(125) = 0.915, p = 0.362, d = 0.163, C

# Individual difference feature clustering analyses, part 1

Compute the correlations (across participants) between feature clustering, recall probability, and temporal clustering for early and late lists.  As a summary, also compute the correlations (across conditions) between the per-condition averages.  Note: for each feature clustering score, consider only the condition of interest-- e.g., for the category condition consider category clustering, for the length condition consider length clustering, and so on.

Start by creating a dataframe that combines across all of the order manipulation conditions:
  - index: subject, list group --- but rename subjects so they're unique across conditions
  - columns:
    - feature clustering score: pick out the appropriate element of that list/subject's fingerprint, based on the current condition
    - temporal clustering score
    - recall probability
    - condition

In [44]:
clustering_results = create_clustering_df(results)
clustering_results

Unnamed: 0,Subject,List,Condition,Feature clustering score,Corrected feature clustering score,Temporal clustering score,Recall probability
0,0,Early,Feature rich,0.495229,0.485292,0.562750,0.484375
1,1,Early,Feature rich,0.559542,0.582875,0.486750,0.859375
2,2,Early,Feature rich,0.538500,0.532000,0.660500,0.625000
3,3,Early,Feature rich,0.519375,0.474292,0.596500,0.531250
4,4,Early,Feature rich,0.566917,0.565833,0.680000,0.546875
...,...,...,...,...,...,...,...
491,243,Late,Location,0.407500,0.388000,0.698875,0.484375
492,244,Late,Location,0.553750,0.503000,0.603500,0.671875
493,245,Late,Location,0.598250,0.563750,0.768250,0.609375
494,246,Late,Location,0.405000,0.428750,0.683750,0.539062


In [45]:
def corr_helper(clustering_results, x='Feature clustering score', y='Recall probability', xlists=None, ylists=None):
    clustering_results = clustering_results.query('Condition != "Feature rich"')

    def print_corr(a, b, label=None, n_iter=1000):
        corr = stats.pearsonr(a, b)

        # compute bootstrap-estimated 95% confidence interval
        bootstrapped = []        
        max_tries = 10
        tries = 0
        for i in range(n_iter):
            inds = np.random.randint(0, len(a), len(a))
            while len(np.unique(a.iloc[inds])) == 1 and len(np.unique(b.iloc[inds])) == 1 and tries < max_tries:
                inds = np.random.randint(0, len(a), len(a))
                tries += 1
            if tries >= max_tries:
                pass
            tries = 0
            bootstrapped.append(stats.pearsonr(a.iloc[inds], b.iloc[inds])[0])
        low, high = np.percentile(bootstrapped, [2.5, 97.5])

        if label is None:
            prefix = ''
        else:
            prefix = label + ': '

        if corr.pvalue < 0.001:
            p_string = 'p < 0.001'
        else:
            p_string = f'p = {corr.pvalue:.3f}'

        print(f'\t{prefix}: r({len(a) - 2}) = {corr.statistic:.3f}, {p_string}, CI = [{low:.3f}, {high:.3f}]')
        return corr

    if xlists is not None:
        x_results = clustering_results.query('List == @xlists')
    else:
        x_results = clustering_results
    
    if ylists is not None:
        y_results = clustering_results.query('List == @ylists')
    else:
        y_results = clustering_results
    
    print(f'Correlations between {x} and {y} (x lists: {xlists}, y lists: {ylists})')
    # Combine across all conditions, compute correlation across subjects
    print_corr(x_results[x], y_results[y], label='Combined (across subjects)')

    # Per-condition (across subjects)
    print('\n')
    min_r = np.inf
    max_p = -np.inf

    for c in clustering_results['Condition'].unique():
        corr = print_corr(x_results.query('Condition == @c')[x], y_results.query('Condition == @c')[y], label=f'{c} (across subjects)')
        min_r = min(min_r, corr.statistic)
        max_p = max(max_p, corr.pvalue)
    print(f'\tWithin condition, across subjects: all $r$s $\geq {min_r:.3f}$, all $p$s $\leq {max_p:.3f}$')

    # Across-condition correlation
    print('\n')
    x_ave = x_results.groupby('Condition').mean(numeric_only=True)[x]
    y_ave = y_results.groupby('Condition').mean(numeric_only=True)[y]
    print_corr(x_ave, y_ave, label='Across conditions')

## Recall probability vs. feature clustering

### Early vs. early

In [46]:
corr_helper(clustering_results, x='Feature clustering score', y='Recall probability', xlists='Early', ylists='Early')

Correlations between Feature clustering score and Recall probability (x lists: Early, y lists: Early)


	Combined (across subjects): : r(179) = 0.492, p < 0.001, CI = [0.363, 0.607]


	Category (across subjects): : r(28) = 0.862, p < 0.001, CI = [0.774, 0.921]
	Size (across subjects): : r(28) = 0.897, p < 0.001, CI = [0.808, 0.948]
	Length (across subjects): : r(28) = 0.481, p = 0.007, CI = [0.077, 0.721]
	First letter (across subjects): : r(28) = 0.414, p = 0.023, CI = [-0.011, 0.733]
	Color (across subjects): : r(29) = 0.331, p = 0.069, CI = [-0.039, 0.619]
	Location (across subjects): : r(28) = 0.360, p = 0.051, CI = [0.042, 0.620]
	Within condition, across subjects: all $r$s $\geq 0.331$, all $p$s $\leq 0.069$


	Across conditions: : r(4) = 0.511, p = 0.300, CI = [-0.999, 0.997]


### Late vs. late

In [47]:
corr_helper(clustering_results, x='Feature clustering score', y='Recall probability', xlists='Late', ylists='Late')

Correlations between Feature clustering score and Recall probability (x lists: Late, y lists: Late)


	Combined (across subjects): : r(179) = 0.403, p < 0.001, CI = [0.263, 0.522]


	Category (across subjects): : r(28) = 0.662, p < 0.001, CI = [0.343, 0.871]
	Size (across subjects): : r(28) = 0.744, p < 0.001, CI = [0.488, 0.916]
	Length (across subjects): : r(28) = 0.520, p = 0.003, CI = [0.238, 0.730]
	First letter (across subjects): : r(28) = 0.404, p = 0.027, CI = [-0.033, 0.721]
	Color (across subjects): : r(29) = 0.532, p = 0.002, CI = [0.263, 0.745]
	Location (across subjects): : r(28) = 0.419, p = 0.021, CI = [0.089, 0.658]
	Within condition, across subjects: all $r$s $\geq 0.404$, all $p$s $\leq 0.027$


	Across conditions: : r(4) = -0.304, p = 0.559, CI = [-0.875, 0.684]


### Late vs. Early

In [48]:
corr_helper(clustering_results, x='Feature clustering score', y='Recall probability', xlists='Early', ylists='Late')

Correlations between Feature clustering score and Recall probability (x lists: Early, y lists: Late)
	Combined (across subjects): : r(179) = 0.230, p = 0.002, CI = [0.081, 0.369]


	Category (across subjects): : r(28) = 0.474, p = 0.008, CI = [0.207, 0.699]
	Size (across subjects): : r(28) = 0.574, p < 0.001, CI = [0.383, 0.747]
	Length (across subjects): : r(28) = 0.405, p = 0.027, CI = [0.113, 0.622]
	First letter (across subjects): : r(28) = 0.385, p = 0.035, CI = [-0.038, 0.751]
	Color (across subjects): : r(29) = 0.212, p = 0.251, CI = [-0.191, 0.533]
	Location (across subjects): : r(28) = 0.320, p = 0.085, CI = [-0.025, 0.603]
	Within condition, across subjects: all $r$s $\geq 0.212$, all $p$s $\leq 0.251$


	Across conditions: : r(4) = -0.338, p = 0.512, CI = [-0.988, 0.657]


### Early vs. Late

In [49]:
corr_helper(clustering_results, x='Feature clustering score', y='Recall probability', xlists='Late', ylists='Early')

Correlations between Feature clustering score and Recall probability (x lists: Late, y lists: Early)
	Combined (across subjects): : r(179) = 0.464, p < 0.001, CI = [0.326, 0.580]


	Category (across subjects): : r(28) = 0.687, p < 0.001, CI = [0.478, 0.815]
	Size (across subjects): : r(28) = 0.561, p = 0.001, CI = [0.269, 0.754]
	Length (across subjects): : r(28) = 0.438, p = 0.015, CI = [0.074, 0.714]
	First letter (across subjects): : r(28) = 0.377, p = 0.040, CI = [0.057, 0.682]
	Color (across subjects): : r(29) = 0.431, p = 0.016, CI = [0.160, 0.655]
	Location (across subjects): : r(28) = 0.395, p = 0.031, CI = [0.112, 0.635]
	Within condition, across subjects: all $r$s $\geq 0.377$, all $p$s $\leq 0.040$


	Across conditions: : r(4) = 0.451, p = 0.369, CI = [-0.853, 0.998]


## Temporal clustering vs. feature clustering

### Early vs. early

In [50]:
corr_helper(clustering_results, x='Feature clustering score', y='Temporal clustering score', xlists='Early', ylists='Early')

Correlations between Feature clustering score and Temporal clustering score (x lists: Early, y lists: Early)
	Combined (across subjects): : r(179) = 0.916, p < 0.001, CI = [0.888, 0.937]


	Category (across subjects): : r(28) = 0.965, p < 0.001, CI = [0.915, 0.993]
	Size (across subjects): : r(28) = 0.926, p < 0.001, CI = [0.869, 0.963]
	Length (across subjects): : r(28) = 0.945, p < 0.001, CI = [0.886, 0.976]
	First letter (across subjects): : r(28) = 0.855, p < 0.001, CI = [0.726, 0.934]
	Color (across subjects): : r(29) = 0.817, p < 0.001, CI = [0.685, 0.900]
	Location (across subjects): : r(28) = 0.883, p < 0.001, CI = [0.759, 0.950]
	Within condition, across subjects: all $r$s $\geq 0.817$, all $p$s $\leq 0.000$


	Across conditions: : r(4) = 0.946, p = 0.004, CI = [0.545, 1.000]


### Late vs. late

In [51]:
corr_helper(clustering_results, x='Feature clustering score', y='Temporal clustering score', xlists='Late', ylists='Late')

Correlations between Feature clustering score and Temporal clustering score (x lists: Late, y lists: Late)


	Combined (across subjects): : r(179) = 0.273, p < 0.001, CI = [0.151, 0.381]


	Category (across subjects): : r(28) = 0.293, p = 0.116, CI = [-0.133, 0.582]
	Size (across subjects): : r(28) = 0.307, p = 0.099, CI = [-0.087, 0.653]
	Length (across subjects): : r(28) = 0.353, p = 0.056, CI = [-0.025, 0.668]
	First letter (across subjects): : r(28) = 0.660, p < 0.001, CI = [0.452, 0.812]
	Color (across subjects): : r(29) = 0.333, p = 0.068, CI = [-0.024, 0.667]
	Location (across subjects): : r(28) = 0.235, p = 0.212, CI = [-0.141, 0.533]
	Within condition, across subjects: all $r$s $\geq 0.235$, all $p$s $\leq 0.212$


	Across conditions: : r(4) = -0.190, p = 0.718, CI = [-0.958, 0.768]


### Late vs. early

In [52]:
corr_helper(clustering_results, x='Feature clustering score', y='Temporal clustering score', xlists='Early', ylists='Late')

Correlations between Feature clustering score and Temporal clustering score (x lists: Early, y lists: Late)
	Combined (across subjects): : r(179) = 0.266, p < 0.001, CI = [0.140, 0.394]


	Category (across subjects): : r(28) = 0.298, p = 0.110, CI = [0.018, 0.520]
	Size (across subjects): : r(28) = 0.314, p = 0.091, CI = [0.013, 0.572]
	Length (across subjects): : r(28) = 0.535, p = 0.002, CI = [0.227, 0.776]
	First letter (across subjects): : r(28) = 0.443, p = 0.014, CI = [0.058, 0.753]
	Color (across subjects): : r(29) = 0.491, p = 0.005, CI = [0.116, 0.717]
	Location (across subjects): : r(28) = 0.355, p = 0.054, CI = [0.031, 0.624]
	Within condition, across subjects: all $r$s $\geq 0.298$, all $p$s $\leq 0.110$


	Across conditions: : r(4) = 0.064, p = 0.903, CI = [-0.920, 1.000]


### Early vs. late

In [53]:
corr_helper(clustering_results, x='Feature clustering score', y='Temporal clustering score', xlists='Late', ylists='Early')

Correlations between Feature clustering score and Temporal clustering score (x lists: Late, y lists: Early)
	Combined (across subjects): : r(179) = 0.549, p < 0.001, CI = [0.434, 0.640]


	Category (across subjects): : r(28) = 0.613, p < 0.001, CI = [0.401, 0.778]
	Size (across subjects): : r(28) = 0.392, p = 0.032, CI = [0.086, 0.639]
	Length (across subjects): : r(28) = 0.271, p = 0.148, CI = [-0.063, 0.574]
	First letter (across subjects): : r(28) = 0.345, p = 0.062, CI = [-0.062, 0.657]
	Color (across subjects): : r(29) = 0.005, p = 0.980, CI = [-0.421, 0.379]
	Location (across subjects): : r(28) = 0.240, p = 0.201, CI = [-0.046, 0.516]
	Within condition, across subjects: all $r$s $\geq 0.005$, all $p$s $\leq 0.980$


	Across conditions: : r(4) = 0.855, p = 0.030, CI = [0.191, 0.996]


## Feature clustering on early vs. late lists

In [54]:
corr_helper(clustering_results, x='Feature clustering score', y='Feature clustering score', xlists='Late', ylists='Early')

Correlations between Feature clustering score and Feature clustering score (x lists: Late, y lists: Early)


	Combined (across subjects): : r(179) = 0.591, p < 0.001, CI = [0.478, 0.687]


	Category (across subjects): : r(28) = 0.590, p < 0.001, CI = [0.361, 0.758]
	Size (across subjects): : r(28) = 0.488, p = 0.006, CI = [0.138, 0.728]
	Length (across subjects): : r(28) = 0.384, p = 0.036, CI = [0.053, 0.678]
	First letter (across subjects): : r(28) = 0.202, p = 0.284, CI = [-0.227, 0.646]
	Color (across subjects): : r(29) = -0.183, p = 0.325, CI = [-0.533, 0.233]
	Location (across subjects): : r(28) = 0.031, p = 0.870, CI = [-0.256, 0.297]
	Within condition, across subjects: all $r$s $\geq -0.183$, all $p$s $\leq 0.870$


	Across conditions: : r(4) = 0.942, p = 0.005, CI = [0.488, 1.000]


# Difference analyses (early - late lists)

In [55]:
clustering_result_diffs = clustering_results.query('List == "Early"').copy()
clustering_result_diffs['List'] = 'Early - Late'
clustering_result_diffs['Feature clustering score'] = clustering_result_diffs['Feature clustering score'].values - clustering_results.query('List == "Late"')['Feature clustering score'].values
clustering_result_diffs['Temporal clustering score'] = clustering_result_diffs['Temporal clustering score'].values - clustering_results.query('List == "Late"')['Temporal clustering score'].values
clustering_result_diffs['Recall probability'] = clustering_result_diffs['Recall probability'].values - clustering_results.query('List == "Late"')['Recall probability'].values
clustering_result_diffs

Unnamed: 0,Subject,List,Condition,Feature clustering score,Corrected feature clustering score,Temporal clustering score,Recall probability
0,0,Early - Late,Feature rich,-0.050729,0.485292,-0.045000,0.039062
1,1,Early - Late,Feature rich,-0.058125,0.582875,-0.039750,-0.031250
2,2,Early - Late,Feature rich,0.092667,0.532000,0.259625,0.148438
3,3,Early - Late,Feature rich,0.070625,0.474292,-0.019375,0.125000
4,4,Early - Late,Feature rich,0.089312,0.565833,0.194500,0.101562
...,...,...,...,...,...,...,...
461,243,Early - Late,Location,0.272875,0.397500,0.011625,-0.031250
462,244,Early - Late,Location,0.229750,0.527500,0.248500,-0.046875
463,245,Early - Late,Location,-0.001750,0.320750,-0.017250,-0.062500
464,246,Early - Late,Location,0.436625,0.647750,0.162500,0.109375


## Recall probability versus feature clustering (differences)

In [56]:
corr_helper(clustering_result_diffs, x='Feature clustering score', y='Recall probability', xlists='Early - Late', ylists='Early - Late')

Correlations between Feature clustering score and Recall probability (x lists: Early - Late, y lists: Early - Late)


	Combined (across subjects): : r(179) = 0.307, p < 0.001, CI = [0.136, 0.469]


	Category (across subjects): : r(28) = 0.350, p = 0.058, CI = [0.028, 0.630]
	Size (across subjects): : r(28) = 0.708, p < 0.001, CI = [0.462, 0.876]
	Length (across subjects): : r(28) = 0.205, p = 0.276, CI = [-0.111, 0.479]
	First letter (across subjects): : r(28) = 0.081, p = 0.672, CI = [-0.416, 0.593]
	Color (across subjects): : r(29) = 0.155, p = 0.406, CI = [-0.142, 0.515]
	Location (across subjects): : r(28) = 0.052, p = 0.787, CI = [-0.301, 0.360]
	Within condition, across subjects: all $r$s $\geq 0.052$, all $p$s $\leq 0.787$


	Across conditions: : r(4) = 0.635, p = 0.176, CI = [-0.927, 0.983]


### Temporal clustering versus featuer clustering (differences)

In [57]:
corr_helper(clustering_result_diffs, x='Feature clustering score', y='Temporal clustering score', xlists='Early - Late', ylists='Early - Late')

Correlations between Feature clustering score and Temporal clustering score (x lists: Early - Late, y lists: Early - Late)
	Combined (across subjects): : r(179) = 0.426, p < 0.001, CI = [0.287, 0.536]


	Category (across subjects): : r(28) = 0.110, p = 0.564, CI = [-0.303, 0.447]
	Size (across subjects): : r(28) = 0.447, p = 0.013, CI = [0.077, 0.733]
	Length (across subjects): : r(28) = 0.482, p = 0.007, CI = [0.257, 0.703]
	First letter (across subjects): : r(28) = 0.584, p < 0.001, CI = [0.246, 0.764]
	Color (across subjects): : r(29) = 0.406, p = 0.023, CI = [0.055, 0.707]
	Location (across subjects): : r(28) = 0.498, p = 0.005, CI = [0.274, 0.695]
	Within condition, across subjects: all $r$s $\geq 0.110$, all $p$s $\leq 0.564$


	Across conditions: : r(4) = 0.649, p = 0.163, CI = [-0.784, 0.997]


# Adaptive condition

## Accuracy: comparing across each list type

### Stabilize vs. random

In [58]:
ttest(results['accuracy']['adaptive'], results['accuracy']['adaptive'], x_lists='stabilize', y_lists='random', independent_sample=False)

t(59) = 1.740, p = 0.087, d = 0.095, CI = [-0.161, 3.980]


### Destabilize vs. random

In [59]:
ttest(results['accuracy']['adaptive'], results['accuracy']['adaptive'], x_lists='destabilize', y_lists='random', independent_sample=False)

t(59) = -0.249, p = 0.804, d = -0.017, CI = [-2.431, 1.561]


### Stabilize vs. destabilize

In [60]:
ttest(results['accuracy']['adaptive'], results['accuracy']['adaptive'], x_lists='stabilize', y_lists='destabilize', independent_sample=False)

t(59) = 1.714, p = 0.092, d = 0.114, CI = [-0.237, 4.258]


## Temporal clustering: comparing across each list type

### Stabilize vs. random

In [61]:
ttest(results['fingerprint']['adaptive'], results['fingerprint']['adaptive'], x_lists='stabilize', y_lists='random', independent_sample=False, x_col='temporal', y_col='temporal')

t(59) = 3.428, p = 0.001, d = 0.306, CI = [1.616, 5.335]


### Destabilize vs. random

In [62]:
ttest(results['fingerprint']['adaptive'], results['fingerprint']['adaptive'], x_lists='destabilize', y_lists='random', independent_sample=False, x_col='temporal', y_col='temporal')

t(59) = -0.880, p = 0.382, d = -0.081, CI = [-3.166, 0.991]


### Stabilize vs. destabilize

In [63]:
ttest(results['fingerprint']['adaptive'], results['fingerprint']['adaptive'], x_lists='stabilize', y_lists='destabilize', independent_sample=False, x_col='temporal', y_col='temporal')

t(59) = 4.174, p < 0.001, d = 0.374, CI = [2.072, 6.931]


## Correlations between accuracy and temporal clustering (adaptive condition)

In [64]:
df = results['accuracy']['adaptive'].data.reset_index().query('List not in ["init"]').rename({0: 'Recall probability'}, axis=1)
df['Temporal clustering score'] = results['fingerprint']['adaptive'].data.reset_index().query('List not in ["init"]')['temporal']
df.rename({'List': 'Condition'}, axis=1, inplace=True)
df['List'] = 'All'
df

Unnamed: 0,Subject,Condition,Recall probability,Temporal clustering score,List
0,0,destabilize,0.578125,0.47100,All
1,1,destabilize,0.250000,0.34775,All
2,2,destabilize,0.515625,0.51500,All
3,3,destabilize,0.625000,0.86400,All
4,4,destabilize,0.500000,0.46650,All
...,...,...,...,...,...
235,55,stabilize,0.406250,0.58750,All
236,56,stabilize,0.703125,0.81600,All
237,57,stabilize,0.343750,0.41925,All
238,58,stabilize,0.343750,0.56250,All


In [65]:
corr_helper(df, x='Temporal clustering score', y='Recall probability', xlists='All', ylists='All')

Correlations between Temporal clustering score and Recall probability (x lists: All, y lists: All)
	Combined (across subjects): : r(178) = 0.701, p < 0.001, CI = [0.584, 0.787]


	destabilize (across subjects): : r(58) = 0.674, p < 0.001, CI = [0.446, 0.812]
	random (across subjects): : r(58) = 0.651, p < 0.001, CI = [0.410, 0.838]
	stabilize (across subjects): : r(58) = 0.784, p < 0.001, CI = [0.616, 0.883]
	Within condition, across subjects: all $r$s $\geq 0.651$, all $p$s $\leq 0.000$


	Across conditions: : r(1) = 0.998, p = 0.044, CI = [0.998, 1.000]


# Difference scores between fingerprints and temporally corrected fingerprints