# Populating the LongitudinalDiffusion Project on Flywheel
### For each scan in the LongitudinalDiffusion `scans_for_download_upload.csv`:
1. Download scan from original project
2. Upload to LongitudinalDiffusion on Flywheel, creating new subject/session/acquisition where necessary.

**Project:** LongitudinalDiffusion <br>
**Author:** Katja Zoner <br>
**Date:** 02/24/2022 <br>

# 0.  Setup

In [13]:
import os
import time
import glob
import logging
import flywheel
import numpy as np
import pandas as pd
from dateutil import tz
from datetime import datetime

In [17]:
# Instantiate a logger
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
log = logging.getLogger('root')

In [11]:
# Get client
fw = flywheel.Client()
assert fw, "Your Flywheel CLI credentials aren't set!"

In [12]:
# Metadata for new project 
GROUP = "BBL"
PROJECT_LABEL = "LongitudinalDiffusion"
project = fw.projects.find_first('label="{}"'.format(PROJECT_LABEL))
assert project, "Project not found!"  

In [31]:
# Read in LongitudinalDiffusion csv
data_dir = "/Users/kzoner/BBL/projects/LongitudinalDiffusion/data/organize/"
fname = glob.glob(data_dir + "scans_for_download_upload_*.csv")[-1]
df = pd.read_csv(fname)
df.sort_values(by=['bblid','timepoint'],inplace=True)
df

Unnamed: 0,fileid,project,bblid,sesid,acqid,folder,modality,filename,ntimepoints,timepoint
0,265b5c62-ce67-4726-94c5-ace6bc842391,22Q_812481,15305,8932,5c8fb164f546b6002cbd9f9a,anat,T1w,MPRAGE_TI1100_ipat2_2.nii.gz,2,1
1,35346e48-ef41-4051-a701-6926ad04f709,22Q_812481,15305,8932,5c8fb164f546b6002fbdafeb,dwi,dwi,DTI_2x32_36_11.nii.gz,2,1
2,4a8dd468-3aae-4b66-a3a9-50cc35edfaf8,22Q_812481,15305,8932,5c8fb164f546b6002fbdafeb,dwi,dwi,DTI_2x32_36_11.bvec,2,1
3,0576830c-8bfe-42c3-8413-bf5c077b4860,22Q_812481,15305,8932,5c8fb164f546b6002fbdafeb,dwi,dwi,DTI_2x32_36_11.bval,2,1
4,e7357eb0-48ce-480d-9377-690fc687b962,22Q_812481,15305,8932,5c8fb164f546b6002fbdafec,dwi,dwi,DTI_2x32_35_10.nii.gz,2,1
...,...,...,...,...,...,...,...,...,...,...
14578,2540c846-df7a-47c4-ba9f-039cb95a77a2,PNC_LG_810336,139272,10040,5c76ef2dba2580003839f9fe,dwi,dwi,DTI_2x32_35_13.bvec,2,2
14579,a8036e95-7521-4472-90cf-b0df2bd3e689,PNC_LG_810336,139272,10040,5c76ef2dba2580003839f9fe,dwi,dwi,DTI_2x32_35_13.bval,2,2
14580,d3ff6180-1138-454c-a950-a2d2364125c3,PNC_LG_810336,139272,10040,5c76ef2dba2580002938f366,fmap,phasediff,B0map_v4_9_ph.nii.gz,2,2
14581,5ac4cb6e-6777-456b-bf39-72a5c6f494ce,PNC_LG_810336,139272,10040,5c76ef2dba2580003839f9f7,fmap,magnitude2,B0map_v4_8_e2.nii.gz,2,2


In [50]:
def getFile(row):
    # Get scan's acquisition label
    acq = fw.get_acquisition(row.acqid)
   
    # Get acq timestamp
    try:
        timestamp = acq.timestamp
    except:
        timestamp = None
   
    # Get file from acquisition
    scan = acq.read_file(row.filename)
    
    # Get file metadata
    metadata = acq.get_file(row.filename)
    
    return scan, metadata, timestamp

def get_or_create_subject(project, label, update=True, **kwargs):
    """Get the Subject container if it exists, else create a new Subject container.
    
    Args:
        project (flywheel.Project): A Flywheel Project.
        label (str): The subject label.
        update (bool): If true, update container with key/value passed as kwargs.
        kwargs (dict): Any key/value properties of subject you would like to update.

    Returns:
        (flywheel.Subject): A Flywheel Subject container.
    """
    
    if not label:
        raise ValueError(f'label is required (currently "{label}"")')
        
    subject = project.subjects.find_first(f'label="{label}"')
    if not subject:
        subject = project.add_subject(label=label)
        
    if update and kwargs:
        subject.update(**kwargs)

    if subject:
        subject = subject.reload()

    return subject

def get_or_create_session(subject, label, update=True, scanid=None, **kwargs):
    """Get the Session container if it exists, else create a new Session container.
    
    Args:
        subject (flywheel.Subject): A Flywheel Subject.
        label (str): The session label.
        update (bool): If true, update container with key/value passed as kwargs.        
        kwargs (dict): Any key/value properties of Session you would like to update.

    Returns:
        (flywheel.Session): A flywheel Session container.
    """
    
    if not label:
        raise ValueError(f'label is required (currently "{label}"")')
        
    session = subject.sessions.find_first(f'label="{label}"')
    if not session:
        session = subject.add_session(label=label)
        
    if update and kwargs:
            session.update(**kwargs)
    
    if update and scanid:
            session.update_info({"scanid": scanid})

    if session:
        session = session.reload()

    return session

def get_or_create_acquisition(session, label, update=True, **kwargs):
    """Get the Acquisition container if it exists, else create a new Acquisition container.
    
    Args:
        session (flywheel.Session): A Flywheel Session.
        label (str): The Acquisition label.
        update (bool): If true, update container with key/value passed as kwargs.        
        kwargs (dict): Any key/value properties of Acquisition you would like to update.

    Returns:
        (flywheel.Acquisition): A Flywheel Acquisition container.
    """
    
    if not label:
        raise ValueError(f'label is required (currently {label})')
        
    acquisition = session.acquisitions.find_first(f'label={label}')
    if not acquisition:
        acquisition = session.add_acquisition(label=label)
        
    if update and kwargs:
        acquisition.update(**kwargs)

    if acquisition:
        acquisition = acquisition.reload()

    return acquisition

def upload_file_to_acquistion(acquistion, f, filename, update=True, metadata=None, **kwargs):
    """Upload file to Acquisition container and update info if `update=True`
    
    Args:
        acquisition (flywheel.Acquisition): A Flywheel Acquisition
        fp (Path-like): Path to file to upload
        update (bool): If true, update container with key/value passed as kwargs.        
        kwargs (dict): Any key/value properties of Acquisition you would like to update.        
    """
        
    if acquistion.get_file(filename):
        log.info(f'File {filename} already exists in container. Skipping.')
    
    else:
        log.info(f'Uploading {filename} to acquisition {acquistion.id}')
        file_spec = flywheel.FileSpec(filename, f)
        acquistion.upload_file(file_spec)
        while not acquistion.get_file(filename):   # to make sure the file is available before performing an update
            acquistion = acquistion.reload()
            time.sleep(1)
            
    if update:
        f = acquisition.get_file(filename)
        if kwargs:
            f.update(**kwargs)
        if metadata:
            f.update_info(metadata)


In [25]:
def isUploaded(scan):
    sub = project.subjects.find_first(f"label=sub-{scan.bblid}")
    if sub:
        ses = sub.sessions.find_first(f"label=ses-{scan.sesid}")
        if ses:
            return True
    return False

In [None]:
uploaded = []

In [26]:
for scan in df.itertuples():
    if isUploaded(scan):
        uploaded.append(scan.scanid)

KeyboardInterrupt: 

In [55]:
len(df)

14583

In [56]:
len(uploaded)

969

In [57]:
# For each row in the dataframe:
#   - Locate file in original Flywheel project
#   - Download file into memory
#   - Upload file to LongitudinalDiffusion Flywheel project

for row in df[~df.fileid.isin(uploaded)].itertuples():

    scan, metadata, timestamp = getFile(row)   
    
    # Get or create subject object on Flywheel
    subject = get_or_create_subject(
        project, 
        label=str(row.bblid).zfill(6), 
        update=True, 
        type='human'
    )
    
    session = get_or_create_session(
        subject, 
        label=str(row.sesid).zfill(5),
        timestamp=timestamp
    )

    acquisition = get_or_create_acquisition(
        session, 
        label=row.folder,
        timestamp=timestamp
    )
    
    upload_file_to_acquistion(
        acquisition,
        f=scan,
        filename=row.filename
    )

    acquisition.replace_file_info(
        row.filename, 
        metadata['info']
    )

    # acquisition.replace_file_classification(
    #     row.filename, 
    #     classification={'Intent': ['Structural'], 'Measurement': ['T1']}, 
    #     modality='MR'
    # )

    uploaded.append(row.fileid)
    print(f'Finished uploading {row.filename} from sub-{row.bblid} ses-{row.sesid}')
    

2022-03-07 11:26:33,025 INFO Uploading B0map_onesizefitsall_v3_6_e1.nii.gz to acquisition 6222b95fbbe49a5b571d5026


Finished uploading B0map_onesizefitsall_v3_6_e1.nii.gz from sub-81903 ses-7482
