# Creating the ExtraLong-2021 Project on Flywheel
### To generate a Flywheel project for the new July 1st, 2021 datafreeze, for each scan in the ExtraLong-2021 csv:
1. Download scan from original project
2. Upload to ExtraLong-2021 on Flywheel

**Project:** ExtraLong <br>
**Author:** Katja Zoner <br>
**Date:** 08/23/2021 <br>

# 0.  Setup

In [2]:
import os
import sys
import logging
import time
from dateutil import tz
from datetime import datetime
import string 
import numpy as np
import pandas as pd
import flywheel

In [2]:
# Instantiate a logger
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
log = logging.getLogger('root')

In [4]:
# Get client
fw = flywheel.Client()
assert fw, "Your Flywheel CLI credentials aren't set!"

In [4]:
# Metadata for new project 
GROUP = "BBL"
PROJECT_LABEL = "ExtraLong_2021"
project = fw.projects.find_first('label="{}"'.format(PROJECT_LABEL))
assert project, "Project not found!"  



In [9]:
# Read in ExtraLong 2021 csv
# fname = "./csv/ExtraLong-Datafreeze-20210831.csv"
fname = "./csv/ExtraLong-Datafreeze-2021-Updated-20210920.csv"
xl = pd.read_csv(fname)
xl.sort_values(by=['bblid','timepoint'],inplace=True)

In [None]:
# data = '''
# bblid	scanid	sesid	scan_protocol	project	filename	acqid	doscan	timepoint	ntimepoints	scanage_months	sex	race	ethnic
# 0	13473	8170	CONTE1	815814 - Conte	ExtraLong	sub-13473_ses-CONTE1_acq-moco_T1w.nii.gz	5da8c99e6cbeb00078783a0a	2013-06-27	1	3	331.0	2.0	4.0	2.0
# 1	13473	8679	NaN	818028 - Effort	NEFF_818028	MPRAGE_TI1100_ipat2_2.nii.gz	5c9e6634f546b60039efc766	2013-12-13	2	3	336.0	2.0	4.0	2.0
# 2	13473	9923	CONTE1	815814 - Conte	ExtraLong	sub-13473_ses-CONTE1_acq-moco_T1w.nii.gz	5da8c99e6cbeb00078783a0a	2015-11-02	3	3	359.0	2.0	4.0	2.0
# 3	13550	9093	CONTE1	815814 - Conte	ExtraLong	sub-13550_ses-CONTE1_acq-moco_T1w.nii.gz	5d9ca3a3a54d350040b973b1	2014-10-06	1	2	288.0	1.0	1.0	1.0
# 4	13550	9239	CONTE2	815814 - Conte	ExtraLong	sub-13550_ses-CONTE2_acq-moco_T1w.nii.gz	5d9ca39da54d350040b9739e	2015-01-15	2	2	291.0	1.0	1.0	1.0
# 5	139490	8461	PNC1	810336 - Big GO	ExtraLong	sub-139490_ses-PNC1_T1w.nii.gz	5d9cd114a54d350039b28ece	2013-08-30	1	2	105.0	1.0	2.0	2.0
# 6	139490	10564	CONTE1	815814 - Conte	ExtraLong	sub-139490_ses-CONTE1_acq-moco_T1w.nii.gz	5d9ca3cfa54d350028b04b1d	2017-04-29	2	2	149.0	1.0	2.0	2.0
# 7	139553	8410	PNC1	810336 - Big GO	ExtraLong	sub-139553_ses-PNC1_T1w.nii.gz	5d9d14b1a54d350042bcbddd	2013-08-23	1	2	107.0	2.0	2.0	2.0
# 8	139553	9822	PNC2	810336 - Go3	ExtraLong	sub-139553_ses-PNC2_T1w.nii.gz	5d9f6460ccb6a50054468422	2015-09-12	2	2	132.0	2.0	2.0	2.0
# '''

# lines = data.split("\n")
# lines.remove("")
# lines.remove("")

# for i in range(len(lines)):
#     lines[i] = lines[i].split("\t")
#     if i > 0:
#         lines[i].pop(0)

# data = pd.DataFrame(lines[1:],columns = lines[0])

In [None]:
SESSION_LABEL_DICT = {
    "22q_Midline_834246": "22QMID",
    "AGGY_808689": "AGGY",
    "CONTE_815814": "CONTE",
    "EONSX_810366": "EONSX",
    "GRMPY_822831": "GRMPY",
    "MOTIVE": "MOTIVE",
    "NEFF_818028": "NEFF",
    "PNC_CS_810336": "PNC",
    "PNC_LG_810336": "PNC",
    "SYRP_818621": "SYRP",
    "Evolution_833922": "EVOL"
}

In [None]:
# Clean the sesid field for the entire dataframe
for index,scan in xl.iterrows():
    
    sesid = str(scan.sesid)
    # Check for existing ses label
    
    # All numeric sesid's from old ExtraLong should be GRMPY
    if sesid.isdecimal():
        new = "GRMPY"
    # Change motive to all caps to match

    elif sesid == 'motive1':
        new = "MOTIVE"
    
    elif sesid.lower() == 'nan':
        new = SESSION_LABEL_DICT[scan.project]
    
    else:
        new = sesid.rstrip(string.digits)
    
    xl.loc[index, "sesid"] = new
xl

In [16]:
from pandas.api.types import CategoricalDtype

# Clean dataframe
xl.sex = xl.sex.astype("category")
xl.sex = xl.sex.cat.rename_categories({1.0: 'male', 2.0: 'female'})
# data.race = data.race.astype("float64")
# data.ethnic = data.ethnic.astype("float64")
xl

Unnamed: 0,bblid,scanid,sesid,scan_protocol,project,filename,acqid,doscan,timepoint,ntimepoints,scanage_months,sex,race,ethnic
0,11399,3468,DAY,808799 - DAY2,ExtraLong,sub-11399_ses-DAY21_T1w.nii.gz,5d9dfbaaa54d350032b1708a,2010-06-29,1,2,414.0,female,2.0,2.0
1,11399,3592,DAY,808799 - DAY2,ExtraLong,sub-11399_ses-DAY22_T1w.nii.gz,5d9dfbb3a54d350044c10aef,2010-07-29,2,2,415.0,female,2.0,2.0
2,11801,5145,DAY,808799 - DAY2,ExtraLong,sub-11801_ses-DAY21_T1w.nii.gz,5d9dfc1aa54d350040ba6838,2011-06-06,1,3,370.0,male,1.0,2.0
3,11801,5200,FNDM,810211 - FNDM,ExtraLong,sub-11801_ses-FNDM21_T1w.nii.gz,5d9e0cdea54d35003cb5bf19,2011-06-10,2,3,370.0,male,1.0,2.0
4,11801,8591,NEFF,818028 - Effort,NEFF_818028,MPRAGE_TI1100_ipat2_2.nii.gz,5c9e6639f546b60028eee5e1,2013-10-23,3,3,399.0,male,1.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2448,139272,10109,GRMPY,822831 - GRMPY,ExtraLong,sub-139272_ses-10109_T1w.nii.gz,5d9cac7ea54d35003eb749f4,2016-04-07,6,6,259.0,female,2.0,2.0
2449,139490,8461,PNC,810336 - Big GO,ExtraLong,sub-139490_ses-PNC1_T1w.nii.gz,5d9cd114a54d350039b28ece,2013-08-30,1,2,105.0,male,2.0,2.0
2450,139490,10564,CONTE,815814 - Conte,ExtraLong,sub-139490_ses-CONTE1_acq-moco_T1w.nii.gz,5d9ca3cfa54d350028b04b1d,2017-04-29,2,2,149.0,male,2.0,2.0
2451,139553,8410,PNC,810336 - Big GO,ExtraLong,sub-139553_ses-PNC1_T1w.nii.gz,5d9d14b1a54d350042bcbddd,2013-08-23,1,2,107.0,female,2.0,2.0


In [41]:
def getT1w(scan):
    # Get scan's acquisition label
    acq = fw.get_acquisition(scan.acqid)
    # Get file from acquisition
    t1w = acq.read_file(scan.filename)
    # Get file metadata
    metadata = acq.get_file(scan.filename)
    return t1w, metadata

def get_or_create_subject(project, label, update=True, **kwargs):
    """Get the Subject container if it exists, else create a new Subject container.
    
    Args:
        project (flywheel.Project): A Flywheel Project.
        label (str): The subject label.
        update (bool): If true, update container with key/value passed as kwargs.
        kwargs (dict): Any key/value properties of subject you would like to update.

    Returns:
        (flywheel.Subject): A Flywheel Subject container.
    """
    
    if not label:
        raise ValueError(f'label is required (currently "{label}"")')
        
    subject = project.subjects.find_first(f'label="{label}"')
    if not subject:
        subject = project.add_subject(label=label)
        
    if update and kwargs:
        subject.update(**kwargs)

    if subject:
        subject = subject.reload()

    return subject

def get_or_create_session(subject, label, update=True, scanid=None, **kwargs):
    """Get the Session container if it exists, else create a new Session container.
    
    Args:
        subject (flywheel.Subject): A Flywheel Subject.
        label (str): The session label.
        update (bool): If true, update container with key/value passed as kwargs.        
        kwargs (dict): Any key/value properties of Session you would like to update.

    Returns:
        (flywheel.Session): A flywheel Session container.
    """
    
    if not label:
        raise ValueError(f'label is required (currently "{label}"")')
        
    session = subject.sessions.find_first(f'label="{label}"')
    if not session:
        session = subject.add_session(label=label)
        
    if update and kwargs:
            session.update(**kwargs)
    
    if update and scanid:
            session.update_info({"scanid": scanid})

    if session:
        session = session.reload()

    return session

def get_or_create_acquisition(session, label, update=True, **kwargs):
    """Get the Acquisition container if it exists, else create a new Acquisition container.
    
    Args:
        session (flywheel.Session): A Flywheel Session.
        label (str): The Acquisition label.
        update (bool): If true, update container with key/value passed as kwargs.        
        kwargs (dict): Any key/value properties of Acquisition you would like to update.

    Returns:
        (flywheel.Acquisition): A Flywheel Acquisition container.
    """
    
    if not label:
        raise ValueError(f'label is required (currently {label})')
        
    acquisition = session.acquisitions.find_first(f'label={label}')
    if not acquisition:
        acquisition = session.add_acquisition(label=label)
        
    if update and kwargs:
        acquisition.update(**kwargs)

    if acquisition:
        acquisition = acquisition.reload()

    return acquisition

def upload_file_to_acquistion(acquistion, f, filename, update=True, metadata=None, **kwargs):
    """Upload file to Acquisition container and update info if `update=True`
    
    Args:
        acquisition (flywheel.Acquisition): A Flywheel Acquisition
        fp (Path-like): Path to file to upload
        update (bool): If true, update container with key/value passed as kwargs.        
        kwargs (dict): Any key/value properties of Acquisition you would like to update.        
    """
    # basename = os.path.basename(fp)
    # if not os.path.isfile(fp):
    #     raise ValueError(f'{fp} is not file.')
        
    if acquistion.get_file(filename):
        log.info(f'File {filename} already exists in container. Skipping.')
    
    else:
        log.info(f'Uploading {filename} to acquisition {acquistion.id}')
        file_spec = flywheel.FileSpec(filename, f)
        acquistion.upload_file(file_spec)
        while not acquistion.get_file(filename):   # to make sure the file is available before performing an update
            acquistion = acquistion.reload()
            time.sleep(1)
            
    if update:
        f = acquisition.get_file(filename)
        if kwargs:
            f.update(**kwargs)
        if metadata:
            f.update_info(metadata)


In [None]:
def isUploaded(scan):
    sub = project.subjects.find_first(f"label=sub-{scan.bblid}")
    if sub:
        ses = sub.sessions.find_first(f"label=ses-{scan.sesid}{scan.timepoint}")
        if ses:
            return True
    return False

In [None]:
# uploaded = []

# for scan in xl.itertuples():
#     if isUploaded(scan):
#         uploaded.append(scan.scanid)

In [None]:
len(uploaded)

In [25]:
retry_scanids = [2750, 4202]
retry = xl[xl.scanid.isin(retry_scanids)]
retry

retry_bblids = [81043, 110166]
retry = xl[xl.bblid.isin(retry_bblids)]
retry

Unnamed: 0,bblid,scanid,sesid,scan_protocol,project,filename,acqid,doscan,timepoint,ntimepoints,scanage_months,sex,race,ethnic
323,81043,2750,PNC,810336 - Big GO,ExtraLong,sub-81043_ses-PNC1_T1w.nii.gz,5d9d158ca54d350039b29fe3,2010-01-16,1,2,249.0,female,2.0,2.0
324,81043,7244,PNC,810336 - Go2 Supplement,ExtraLong,sub-81043_ses-PNC2_T1w.nii.gz,5d9f6506ccb6a500544685a5,2012-09-17,2,2,281.0,female,2.0,2.0
1649,110166,4202,PNC,810336 - Big GO,ExtraLong,sub-110166_ses-PNC1_T1w.nii.gz,5d9cca4ca54d350036b1eb71,2010-12-02,1,3,164.0,male,1.0,2.0
1650,110166,7525,PNC,810336 - Go2 Supplement,ExtraLong,sub-110166_ses-PNC2_T1w.nii.gz,5d9eda08ccb6a5003f45cfcf,2012-12-08,2,3,189.0,male,1.0,2.0
1651,110166,8968,CONTE,815814 - Conte,ExtraLong,sub-110166_ses-CONTE1_acq-moco_T1w.nii.gz,5d9ca169a54d350032b14609,2014-07-14,3,3,208.0,male,1.0,2.0


In [42]:
# For each scan in the dataframe:
# - load t1w nifti file into memory from fw
# - upload to extralong2021

# for scan in xl[~xl.scanid.isin(uploaded)].itertuples():
for scan in retry.itertuples():

    t1w, metadata = getT1w(scan)
    
    timestamp = datetime.fromisoformat(scan.doscan)
    timestamp = timestamp.replace(tzinfo=tz.gettz("US/Eastern"))    

    label = str(scan.bblid).zfill(6)
    # Get or create subject object on Flywheel
    subject = get_or_create_subject(
        project, 
        label, 
        
        update=True, 
        type='human', 
        sex=scan.sex
    )
    
    session = get_or_create_session(
        subject, 
        str(scan.scanid).zfill(5),
        update=True,
        scanid=scan.scanid,
        timestamp=timestamp
    )

    acquisition = get_or_create_acquisition(
        session, 
        "T1w"
    )
    
    upload_file_to_acquistion(
        acquisition,
        f=t1w,
        filename=scan.filename
    )

    acquisition.replace_file_info(
        scan.filename, 
        metadata['info']
    )

    acquisition.replace_file_classification(
        scan.filename, 
        classification={'Intent': ['Structural'], 'Measurement': ['T1']}, 
        modality='MR'
    )

    # uploaded.append(scan.scanid)
    print(f'Finished uploading scan: {scan.scanid}')
    

2021-09-27 09:50:06,476 INFO File sub-81043_ses-PNC1_T1w.nii.gz already exists in container. Skipping.


Finished uploading scan: 2750


2021-09-27 09:50:13,037 INFO Uploading sub-81043_ses-PNC2_T1w.nii.gz to acquisition 6151cc234dcec15ab168d55e


Finished uploading scan: 7244


2021-09-27 09:50:25,736 INFO Uploading sub-110166_ses-PNC1_T1w.nii.gz to acquisition 6151cc2f381369a64ea80ff4


Finished uploading scan: 4202


2021-09-27 09:50:36,489 INFO Uploading sub-110166_ses-PNC2_T1w.nii.gz to acquisition 6151cc3abde5fce5f3490f7e


Finished uploading scan: 7525


2021-09-27 09:50:47,241 INFO Uploading sub-110166_ses-CONTE1_acq-moco_T1w.nii.gz to acquisition 6151cc45087944cbf7525eca


Finished uploading scan: 8968


In [30]:
label = str(scan.bblid).zfill(6)
label

'081043'

In [40]:
project.subjects.find_first(f'label="081043"')


{'age': None,
 'analyses': None,
 'code': '081043',
 'cohort': None,
 'created': datetime.datetime(2021, 9, 27, 13, 43, 26, 463000, tzinfo=tzutc()),
 'ethnicity': None,
 'files': [],
 'firstname': None,
 'id': '6151ca7e8b9f5fb1eeeb3f3d',
 'info': {},
 'info_exists': False,
 'label': '081043',
 'lastname': None,
 'master_code': None,
 'modified': datetime.datetime(2021, 9, 27, 13, 43, 29, 96000, tzinfo=tzutc()),
 'notes': [],
 'parents': {'acquisition': None,
             'analysis': None,
             'group': 'bbl',
             'project': '6127c4eb96cd532e0ef68b60',
             'session': None,
             'subject': None},
 'permissions': [{'access': None,
                  'id': 'kzoner@upenn.edu',
                  'role_ids': ['5ef07972374bc20010a37aa3',
                               '5ef07972374bc20010a37aa4']}],
 'project': '6127c4eb96cd532e0ef68b60',
 'race': None,
 'revision': 4,
 'sex': None,
 'species': None,
 'strain': None,
 'tags': [],
 'type': 'human'}