In [1]:
from pathlib import Path
import polars as pl

In [2]:
to_model_dir = Path('/data/MLDSST/nielsond/cogmood/data/20250611_pilot/to_model')
cleaned_dir = Path('/data/MLDSST/nielsond/cogmood/data/20250611_pilot/modeling/data_cleaned_20260127')
cleaned_dir.mkdir(exist_ok=True)
results_dir = Path('/data/MLDSST/nielsond/cogmood/data/20250611_pilot/modeling/results_20260127')
results_dir.mkdir(exist_ok=True)

code_dir = Path("/data/MLDSST/nielsond/cogmood/analysis/cogmood_analysis/packages/SupremePilot/")
swarm_dir = Path('/data/MLDSST/nielsond/cogmood/swarm/')
swarm_dir.mkdir(exist_ok=True)
swarm_cmds = swarm_dir / 'swarm_commands'
swarm_cmds.mkdir(exist_ok=True)
swarm_logs = swarm_dir / 'swarm_logs'
swarm_logs.mkdir(exist_ok=True)

In [3]:
subjects = (
    "11nuj5ty67ojohm39cmzbt23", 
    "1ts935dhccck7dgtssvxv9nd",
    "2upuqdbw3wdpk3q43x89zysp",
    "48juqsgxp4m2o7797zvjxln9",
    "60pixcark57tgonq4abwctvs",
    "81987885tpc29718g2d8evdm",
    "d4hsof73ftqmz1sbm3vc82f6",
    "h3q7g3g6za07rl9qnhd87hoq",
    "hvann18ezp9i2kq8bvqivehs",
    "l8eyqget2wsecwew6bwabn1h",
    "lpbs1m834j0r6sbezpcotnei",
    "mglomvxjfi6gya3jmrt7o09w",
    "mjff7puqxr95bh6d945ru7z2",
    "ol8u2qd7k4zi7cd0p7idogr4",
    "p5t6r1pmhqfffx93vv5h8vxc",
    "q86m1zrqk9q16o5e3dfvb8yx",
    "qjvaxvijfmumaq0czvg4m55x",
    "qx2vl559ytpgxjwh26fre2fo",
    "reqevxyh9eqa3jyc8wvucdi2",
    "s7szmm610ygsyon54c67mlvj",
    "sz1p1qr5v5saov60ia90oqlh",
    "u5sdtc6ljckmewbo4acifldp",
    "v0pbbk0rbplhgr4tsqqrlvzm",
    "ww2y0qu0joyyxy7lsxirxvm4",
    "wxabwlnirq69pf95h6573i6y",
    "y6d1crfpg0rh1wpohfgi959w",
    "ycbg09io1hcmzyiosg50hgl8",
    "ztgwa65cvika8vs9szlbpbnt"
)

# run CAB model

In [20]:
model = 'cab'
cab_cmds = []
bad_subs = []
for subj in subjects:
    subj_cleaned_dir = cleaned_dir / subj
    task_csv = to_model_dir / f'{model}-{subj}.csv'
    if (not task_csv.exists()):
        print(f"Subject {subj} missing required files")
        bad_subj.append(subj)
        continue

    # the below if allows this cell to be rerun to re-run failed jobs
    out_dict = results_dir / f'cab_results_dict_{subj}.json'
    if not out_dict.exists():
        cmd = f'cd {code_dir} ' \
               ' && export JAX_NUM_CPU_DEVICES=8' \
              f' && uv run python -m {model}.{model}_data_prep "{subj}" "{task_csv}" "{subj_cleaned_dir.as_posix()}"' \
              f' && uv run python -m {model}.{model}_subj_fit "{subj}" "{subj_cleaned_dir.as_posix()}" "{results_dir.as_posix()}"'
        
        cab_cmds.append(cmd)

In [22]:
len(cab_cmds)

5

In [28]:
swarm_file = swarm_dir / f'{model}_cmds'
swarm_file.write_text('\n'.join(cab_cmds[:2]))

1261

In [29]:
!swarm -g 5 -t 10 -J cab_test --log {swarm_logs} --partition quick --time 01:00:00 {swarm_file}

10497375


# Run RDM

In [25]:
model = 'rdm'
rdm_cmds = []
bad_subs = []
for subj in subjects:
    subj_cleaned_dir = cleaned_dir / subj
    task_csv = to_model_dir / f'{model}-{subj}.csv'
    if (not task_csv.exists()):
        print(f"Subject {subj} missing required files")
        bad_subj.append(subj)
        continue

    # the below if allows this cell to be rerun to re-run failed jobs
    out_dict = results_dir / f'rdm_results_dict_{subj}.json'
    if not out_dict.exists():
        cmd = f'cd {code_dir} ' \
               ' && export JAX_NUM_CPU_DEVICES=8' \
              f' && uv run python -m {model}.{model}_data_prep "{subj}" "{task_csv}" "{subj_cleaned_dir.as_posix()}"' \
              f' && uv run python -m {model}.{model}_subj_fit "{subj}" "{subj_cleaned_dir.as_posix()}" "{results_dir.as_posix()}"'
        
        rdm_cmds.append(cmd)
print(len(rdm_cmds))

2


In [30]:
swarm_file = swarm_dir / f'{model}_cmds'
swarm_file.write_text('\n'.join(rdm_cmds))

1261

In [31]:
!swarm -g 5 -t 10 -J rdm --log {swarm_logs} --partition quick --time 01:00:00 {swarm_file}

10497386


# Run bart models

In [36]:
model = 'bart'
bart_cmds = []
bad_subjs = []
for subj in subjects:
    subj_cleaned_dir = cleaned_dir / subj
    task_csv = to_model_dir / f'{model}-{subj}.csv'
    pickle_dir = to_model_dir / f'{model}-{subj}_pickles'
    if (not pickle_dir.exists()) or (not task_csv.exists()):
        print(f"Subject {subj} missing required files")
        bad_subjs.append(subj)
        continue

    # the below if allows this cell to be rerun to re-run failed jobs
    out_dict = results_dir / f'bart_results_dict_{subj}.json'
    if not out_dict.exists():
        cmd = f'cd {code_dir} ' \
               ' && export JAX_NUM_CPU_DEVICES=8' \
              f' && uv run python -m {model}.{model}_data_prep "{subj}" "{task_csv}" "{subj_cleaned_dir.as_posix()}"' \
              f' && uv run python -m {model}.{model}_subj_fit "{subj}" "{subj_cleaned_dir.as_posix()}" "{results_dir.as_posix()}"'
        
        bart_cmds.append(cmd)
print(len(bart_cmds))

Subject 11nuj5ty67ojohm39cmzbt23 missing required files
Subject qjvaxvijfmumaq0czvg4m55x missing required files
Subject s7szmm610ygsyon54c67mlvj missing required files
Subject ztgwa65cvika8vs9szlbpbnt missing required files
0


In [20]:
swarm_file = swarm_dir / f'{model}_cmds'
swarm_file.write_text('\n'.join(bart_cmds))

15263

In [21]:
!swarm -g 5 -t 10 -b 5 -J bart --log {swarm_logs} --partition quick --time 00:10:00 {swarm_file}

10308524
