# Summary Table for Selected Experiments

### This script filters and summarizes experiments from the DARPA RAM dataset according to different criteria.

In [1]:
# from imports import * 
from cmlreaders import CMLReader, get_data_index
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
import scipy
from scipy import stats
pd.set_option('display.max_columns', None)

Load data from all DARPA RAM experiments

In [2]:
df1 = get_data_index("r1")
df2 = get_data_index("pyfr")
df = pd.concat([df1,df2])
assert len(df) == len(df1)+len(df2)

In [3]:
df

Unnamed: 0,Recognition,all_events,contacts,experiment,import_type,localization,math_events,montage,original_experiment,original_session,pairs,ps4_events,session,subject,subject_alias,system_version,task_events
0,,protocols/r1/subjects/FBG490/experiments/EFRCo...,protocols/r1/subjects/FBG490/localizations/0/m...,EFRCourierOpenLoop,build,0,,0,,,protocols/r1/subjects/FBG490/localizations/0/m...,,0,FBG490,FBG490,4.0,protocols/r1/subjects/FBG490/experiments/EFRCo...
1,,protocols/r1/subjects/FBG490/experiments/EFRCo...,protocols/r1/subjects/FBG490/localizations/0/m...,EFRCourierOpenLoop,build,0,,0,,,protocols/r1/subjects/FBG490/localizations/0/m...,,1,FBG490,FBG490,4.0,protocols/r1/subjects/FBG490/experiments/EFRCo...
2,,protocols/r1/subjects/FBG490/experiments/EFRCo...,protocols/r1/subjects/FBG490/localizations/0/m...,EFRCourierOpenLoop,build,0,,0,,,protocols/r1/subjects/FBG490/localizations/0/m...,,2,FBG490,FBG490,4.0,protocols/r1/subjects/FBG490/experiments/EFRCo...
3,,protocols/r1/subjects/FBG490/experiments/EFRCo...,protocols/r1/subjects/FBG490/localizations/0/m...,EFRCourierReadOnly,build,0,,0,,,protocols/r1/subjects/FBG490/localizations/0/m...,,0,FBG490,FBG490,4.0,protocols/r1/subjects/FBG490/experiments/EFRCo...
4,,protocols/r1/subjects/FBG491/experiments/EFRCo...,protocols/r1/subjects/FBG491/localizations/0/m...,EFRCourierOpenLoop,build,0,,0,,,protocols/r1/subjects/FBG491/localizations/0/m...,,1,FBG491,FBG491,4.0,protocols/r1/subjects/FBG491/experiments/EFRCo...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
373,,,,pyFR,,0,,0,,,,,2,UP045,,,
374,,,,pyFR,,0,,0,,,,,3,UP045,,,
375,,,,pyFR,,0,,0,,,,,0,UP046,,,
376,,,,pyFR,,0,,0,,,,,1,UP046,,,


In [20]:
df.columns

Index(['Recognition', 'all_events', 'contacts', 'experiment', 'import_type',
       'localization', 'math_events', 'montage', 'original_experiment',
       'original_session', 'pairs', 'ps4_events', 'session', 'subject',
       'subject_alias', 'system_version', 'task_events'],
      dtype='object')

In [4]:
exp_types = df.experiment.unique()
exp_types

array(['EFRCourierOpenLoop', 'EFRCourierReadOnly', 'FR1', 'FR2', 'PAL1',
       'YC1', 'PAL2', 'catFR1', 'YC2', 'catFR2', 'PS1', 'ICatFR1',
       'ICatFR6', 'IFR1', 'IFR6', 'PS3', 'PS2', 'TH1', 'FR3', 'PS2.1',
       'PAL3', 'TH3', 'OPS', 'RepFR1', 'catFR3', 'FR5', 'PS4_catFR',
       'THR', 'PS4_FR', 'PAL5', 'THR1', 'catFR5', 'PS4_catFR5', 'FR6',
       'PS5_catFR', 'catFR6', 'TICL_FR', 'LocationSearch', 'TICL_catFR',
       'DBOY1', 'CatFR6', 'RepFR2', 'CPS', 'pyFR'], dtype=object)

## Count all verbal memory stimulation experiments

In [32]:
import pandas as pd
import re

# --- Step 1: Define experiment types from the dataframe ---
exp_types = df['experiment'].unique()

# --- Step 2: Function to filter experiments ---
def exp_filter(exp_name):
    # Convert to upper case for case-insensitive matching
    exp_upper = exp_name.upper()
    
    # Exclude unwanted substrings
    exclude_keywords = ['YC', 'THR', 'TH', 'COURIER', 'DBOY']
    if any(k in exp_upper for k in exclude_keywords):
        return False
    
    # Include only experiments with a number > 1
    match = re.search(r'(\d+)', exp_name)
    if match:
        num = int(match.group(1))
        return num > 1
    return False

# --- Step 3: Apply filtering to the dataframe ---
df_filtered = df[df['experiment'].apply(exp_filter)].copy()

# --- Step 4: Create summary table ---
summary_filtered = (
    df_filtered.groupby('experiment')
        .agg(
            total_rows=('experiment', 'size'),
            n_subjects=('subject', pd.Series.nunique),
            total_sessions=('session', 'count')
        )
        .sort_index()
)

# --- Step 5: Compute overall totals ---
n_subjects_total = df_filtered['subject'].nunique()
n_sessions_total = df_filtered['session'].count()

# --- Step 6: Print results ---
print("\nSummary Table (Experiments with number >1, excluding YC/THR/Courier/DBOY):")
print(summary_filtered)
print(f"\nTotal unique subjects: {n_subjects_total}")
print(f"Total sessions: {n_sessions_total}")



Summary Table (Experiments with number >1, excluding YC/THR/Courier/DBOY):
            total_rows  n_subjects  total_sessions
experiment                                        
CatFR6               1           1               1
FR2                 79          38              79
FR3                 32          18              32
FR5                 24          16              24
FR6                  9           5               9
ICatFR6              8           5               8
IFR6                11           7              11
PAL2                26          20              26
PAL3                15           9              15
PAL5                 4           4               4
PS2                158          53             158
PS2.1              141          38             141
PS3                 45          24              45
PS4_FR               9           5               9
PS4_catFR            7           4               7
PS4_catFR5           5           3               5
PS5_ca