In [12]:
import re
import os.path as op
from glob import glob

import numpy as np
import nibabel as nib
import pandas as pd


def get_run(file_):
    """
    Get run substring from filename
    """
    return re.findall('(run-[0-9]+)_', file_)[0]

In [11]:
# Determine number of runs per subject and print out any missing data
subs = sorted(glob('first-levels/sub-*'))
for sub in subs:
    files = sorted(glob(op.join(sub, '*.tsv')))
    runs = sorted(list(set([get_run(f) for f in files])))
    n_runs = len(runs)
    if n_runs != 4:
        print('{0}: {1}'.format(op.basename(sub), n_runs))

In [15]:
# Determine number of censored volumes per run per subject
subs = sorted(glob('first-levels/sub-*'))
out_df = pd.DataFrame(columns=['run-01', 'run-02', 'run-03', 'run-04'])
for sub in subs:
    files = sorted(glob(op.join(sub, '*.tsv')))
    for f in files:
        run = get_run(f)
        df = pd.read_csv(f, sep='\t')
        cens_cols = [c for c in df.columns if c.startswith('censor')]
        out_df.loc[op.basename(sub), run] = len(cens_cols)

In [23]:
print(out_df.mean(axis=1).mean())
print(out_df.sum(axis=1).mean())

6.627314814814815
26.50925925925926
