# Renaming ExtraLong-2021 sessions on Flywheel to preserve scanid and original project
**Project:** ExtraLong-2021 <br>
**Author:** Katja Zoner <br>
**Date:** 09/22/2021 <br>

## **Setup:** Create logger, get Flywheel client, etc.

In [143]:
import logging
import numpy as np
import pandas as pd
import flywheel

In [144]:
# Instantiate a logger
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
log = logging.getLogger('root')

In [145]:
# Get client
fw = flywheel.Client()
assert fw, "Your Flywheel CLI credentials aren't set!"

In [146]:
# Metadata for new project 
PROJECT_LABEL = "ExtraLong_2021"
project = fw.projects.find_first('label="{}"'.format(PROJECT_LABEL))
assert project, "Project not found!" 



## **Some Useful Functions:** 
- `updateSubject()`:            updates subject label / info and returns subject object from Flywheel
- `updateSession()`:            updates session label / info and returns session object from Flywheel

In [117]:
def updateSubject(subid, label, info=None, dry_run = True):
    sub = project.subjects.find_first(f"label={subid}")
    if sub:
        log.info(f'Updating subject "{sub.label}"')
        if not dry_run:
            sub.update(label=label)
            if info:
                sub.update(info=info)
        return sub
    else:
            print(f"Subject {subid} not found on Flywheel!")

In [194]:
def updateSession(sub, sesid, label, info=None, dry_run = True):
    ses = sub.sessions.find_first(f'label="{sesid}"')
    if ses:
        log.info(f'Updating session "{ses.label}" from subject {ses.subject.label}')
        if not dry_run:
            ses.update(label=label)
            if info:
                ses.update(info = info)
        return ses
    else:
        print(f"Subject {sub.label} Session {sesid} not found on Flywheel!")

In [147]:
# Read in ExtraLong 2021 csv
fname = "./csv/ExtraLong-Datafreeze-2021-Updated-20210920.csv"
xl = pd.read_csv(fname)
xl

Unnamed: 0,bblid,scanid,sesid,scan_protocol,project,filename,acqid,doscan,timepoint,ntimepoints,scanage_months,sex,race,ethnic
0,11399,3468,DAY,808799 - DAY2,ExtraLong,sub-11399_ses-DAY21_T1w.nii.gz,5d9dfbaaa54d350032b1708a,2010-06-29,1,2,414.0,2.0,2.0,2.0
1,11399,3592,DAY,808799 - DAY2,ExtraLong,sub-11399_ses-DAY22_T1w.nii.gz,5d9dfbb3a54d350044c10aef,2010-07-29,2,2,415.0,2.0,2.0,2.0
2,11801,5145,DAY,808799 - DAY2,ExtraLong,sub-11801_ses-DAY21_T1w.nii.gz,5d9dfc1aa54d350040ba6838,2011-06-06,1,3,370.0,1.0,1.0,2.0
3,11801,5200,FNDM,810211 - FNDM,ExtraLong,sub-11801_ses-FNDM21_T1w.nii.gz,5d9e0cdea54d35003cb5bf19,2011-06-10,2,3,370.0,1.0,1.0,2.0
4,11801,8591,NEFF,818028 - Effort,NEFF_818028,MPRAGE_TI1100_ipat2_2.nii.gz,5c9e6639f546b60028eee5e1,2013-10-23,3,3,399.0,1.0,1.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2448,139272,10109,GRMPY,822831 - GRMPY,ExtraLong,sub-139272_ses-10109_T1w.nii.gz,5d9cac7ea54d35003eb749f4,2016-04-07,6,6,259.0,2.0,2.0,2.0
2449,139490,8461,PNC,810336 - Big GO,ExtraLong,sub-139490_ses-PNC1_T1w.nii.gz,5d9cd114a54d350039b28ece,2013-08-30,1,2,105.0,1.0,2.0,2.0
2450,139490,10564,CONTE,815814 - Conte,ExtraLong,sub-139490_ses-CONTE1_acq-moco_T1w.nii.gz,5d9ca3cfa54d350028b04b1d,2017-04-29,2,2,149.0,1.0,2.0,2.0
2451,139553,8410,PNC,810336 - Big GO,ExtraLong,sub-139553_ses-PNC1_T1w.nii.gz,5d9d14b1a54d350042bcbddd,2013-08-23,1,2,107.0,2.0,2.0,2.0


In [148]:
# Clean up csv --> no longer need project column, change sesid column to "orig_proj"
del xl["project"]
xl = xl.rename(columns={"sesid": "orig_proj"})
xl

Unnamed: 0,bblid,scanid,orig_proj,scan_protocol,filename,acqid,doscan,timepoint,ntimepoints,scanage_months,sex,race,ethnic
0,11399,3468,DAY,808799 - DAY2,sub-11399_ses-DAY21_T1w.nii.gz,5d9dfbaaa54d350032b1708a,2010-06-29,1,2,414.0,2.0,2.0,2.0
1,11399,3592,DAY,808799 - DAY2,sub-11399_ses-DAY22_T1w.nii.gz,5d9dfbb3a54d350044c10aef,2010-07-29,2,2,415.0,2.0,2.0,2.0
2,11801,5145,DAY,808799 - DAY2,sub-11801_ses-DAY21_T1w.nii.gz,5d9dfc1aa54d350040ba6838,2011-06-06,1,3,370.0,1.0,1.0,2.0
3,11801,5200,FNDM,810211 - FNDM,sub-11801_ses-FNDM21_T1w.nii.gz,5d9e0cdea54d35003cb5bf19,2011-06-10,2,3,370.0,1.0,1.0,2.0
4,11801,8591,NEFF,818028 - Effort,MPRAGE_TI1100_ipat2_2.nii.gz,5c9e6639f546b60028eee5e1,2013-10-23,3,3,399.0,1.0,1.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2448,139272,10109,GRMPY,822831 - GRMPY,sub-139272_ses-10109_T1w.nii.gz,5d9cac7ea54d35003eb749f4,2016-04-07,6,6,259.0,2.0,2.0,2.0
2449,139490,8461,PNC,810336 - Big GO,sub-139490_ses-PNC1_T1w.nii.gz,5d9cd114a54d350039b28ece,2013-08-30,1,2,105.0,1.0,2.0,2.0
2450,139490,10564,CONTE,815814 - Conte,sub-139490_ses-CONTE1_acq-moco_T1w.nii.gz,5d9ca3cfa54d350028b04b1d,2017-04-29,2,2,149.0,1.0,2.0,2.0
2451,139553,8410,PNC,810336 - Big GO,sub-139553_ses-PNC1_T1w.nii.gz,5d9d14b1a54d350042bcbddd,2013-08-23,1,2,107.0,2.0,2.0,2.0


In [125]:
# Loop through subjects
for bblid in xl.bblid.unique():

    # Get new zero-padded subject label
    subLabel = str(bblid).zfill(6)

    sub = updateSubject(
        f"sub-{bblid}",
        subLabel,
        dry_run = False
    )

    for scan in xl[xl.bblid == bblid].itertuples():
        # Get new zero-padded session label
        sesLabel = str(scan.scanid).zfill(5)

        op = scan.orig_proj
        if op == "DAY":
            op = "DAY2"

        ses = updateSession(
            sub,
            f"ses-{scan.orig_proj}{scan.timepoint}",
            sesLabel,
            info = {
                "OriginalProject": op,
                "Timepoint": scan.timepoint
            },
            dry_run = False
        )
    

2021-09-24 11:45:57,225 INFO Updating subject "sub-11801"
2021-09-24 11:45:57,601 INFO Updating session "ses-DAY1" from subject 011801
2021-09-24 11:45:58,807 INFO Updating session "ses-FNDM2" from subject 011801
2021-09-24 11:46:00,030 INFO Updating session "ses-NEFF3" from subject 011801
2021-09-24 11:46:01,276 INFO Updating subject "sub-12073"
2021-09-24 11:46:01,599 INFO Updating session "ses-FNDM1" from subject 012073
2021-09-24 11:46:02,812 INFO Updating session "ses-FNDM2" from subject 012073
2021-09-24 11:46:04,038 INFO Updating subject "sub-12202"
2021-09-24 11:46:04,357 INFO Updating session "ses-DAY1" from subject 012202
2021-09-24 11:46:05,877 INFO Updating session "ses-FNDM2" from subject 012202
2021-09-24 11:46:07,538 INFO Updating session "ses-NEFF3" from subject 012202
2021-09-24 11:46:09,243 INFO Updating subject "sub-12835"
2021-09-24 11:46:09,720 INFO Updating session "ses-DAY1" from subject 012835
2021-09-24 11:46:11,007 INFO Updating session "ses-FNDM2" from subjec

In [150]:
# Fix DAY2 project label
xl.loc[xl.orig_proj == "DAY", "orig_proj"] = "DAY2"
xl.orig_proj.unique()

## For each T1w scan, add `orig_proj` to `StudyDescription` metadata field
Need to do this to be able to assign BIDS filenames with original project in acquisition field. Only certain metadata fields are exposed in the heuristic file, `StudyDescription` being one of them.

In [187]:
done = []

In [189]:
# For each scan in xl dataframe, add orig_proj as series description to file on flywheel
for session in project.sessions():

    # Get t1w file object
    acq = session.acquisitions()[0]
    acq = acq.reload()
    try:
        t1w = acq.files[0]
    except:
        print(f"FAILED for scan: {session.label}")

    if t1w:
        # Change study_description to orig_proj    
        scanid = int(session.label)
        proj = xl[xl.scanid == scanid].orig_proj.values[0]
        t1w.update_info({"StudyDescription": proj})
        done.append(scanid)

FAILED for scan: 02750
FAILED for scan: 04202


In [182]:
# Re-export cleaned csv to original filename
xl.to_csv(fname)

'DAY2'