In [95]:
import logging
import numpy as np
import pandas as pd
import flywheel

In [77]:
# Instantiate a logger
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
log = logging.getLogger('root')

In [8]:
# # Get API_KEY from FW profile
API_KEY = "upenn.flywheel.io:47vhOSDkwMxGRNxFq0"

# Get client
fw = flywheel.Client(API_KEY)
assert fw, "Your Flywheel CLI credentials aren't set!"

In [9]:
# Metadata for new project 
GROUP = "BBL"
PROJECT_LABEL = "ExtraLong_2021"
project = fw.projects.find_first('label="{}"'.format(PROJECT_LABEL))
assert project, "Project not found!" 



In [21]:
def filterBySessionCount(df, thresh):

    # Get scan counts for each subject (bblid).
    bblid_counts = df.groupby(["bblid"]).size().reset_index()
    bblid_counts.columns = ["bblid", "scan_count"]
    bblid_counts.sort_values("scan_count")

    # Get list of bblids that should be included
    include_bblids = bblid_counts.bblid[bblid_counts["scan_count"]>=thresh]

    # Filter dataframe to only include subjects that meet session requirements
    df = df[df["bblid"].isin(include_bblids)].copy()

    # Update ntimepoints column to indicate number of sessions in ExtraLong 2021 for each subject.
    for bblid in df.bblid.unique():

        # Get subject's number of timepoints in ExtraLong 2021
        num_tps = bblid_counts[bblid_counts.bblid == bblid].scan_count.item()

        # Update ntimeponts in ExtraLong dataframe
        df.loc[df.bblid==bblid, 'ntimepoints'] = num_tps

    # Update timepoint column to indicate timepoint number for each subject
    df["timepoint"] = df.groupby("bblid").cumcount()+1

    return df.sort_values(["bblid","doscan"])

In [48]:
def get_row_diff(bigger, smaller):
    return bigger[(~bigger.scanid.isin(smaller.scanid))]

In [57]:
def getSession(scan):
    sub = project.subjects.find_first(f"label=sub-{scan.bblid}")
    if sub:
        ses = sub.sessions.find_first(f"label=ses-{scan.sesid}{scan.timepoint}")
        if ses:
            return ses
        else:
            print(f"Subject {scan.bblid} Session {scan.sesid} not found on Flywheel!")
    else:
            print(f"Subject {scan.bblid} not found on Flywheel!")

In [72]:
def getSubject(bblid):
    sub = project.subjects.find_first(f"label=sub-{bblid}")
    if sub:
        return sub
    else:
        print(f"Subject sub-{bblid} not found on Flywheel!")

In [63]:
def delete_session(session, dry_run=True):
    """Returns True if session got deleted.
    
    Args:
        session (object): A Flywheel Session.
        dry_run (bool): If true, container is not deleted.    
        
    Returns:
        bool: True if container got deleted, False otherwise.
    """        
    log.info(f'Deleting session "{session.label}" from subject {session.subject.label}')
    if not dry_run:
        fw.delete_session(session.id)
        return True
    return False

In [82]:
def delete_empty_subject(subject, dry_run=True):
    """Returns True if subject was empty and got deleted.
    
    Args:
        subject (object): A Flywheel Subject.
        dry_run (bool): If true, container is not deleted.    
        
    Returns:
        bool: True if container got deleted, False otherwise.
    """        
    log.info(f'Checking if subject "{subject.label}" is empty')
    num_sessions = len(subject.sessions())
    log.info(f'Found {num_sessions} sessions')    
    delete_subject = (num_sessions == 0)
    if delete_subject:
        log.info(f'Deleting subject "{subject.label}"')
        if not dry_run:        
            fw.delete_subject(subject.id)
    return delete_subject

## Dataset should only include subjects with 2+ timepoints. Subjects should be < 35 yrs old at all timepoints.
### 1. Filter csv to find subjects 

In [49]:
# Read in ExtraLong 2021 csv
fname = "./csv/ExtraLong-Datafreeze-20210831.csv"
xl = pd.read_csv(fname)
#xl.sort_values(by=['bblid','timepoint'],inplace=True)
xl

Unnamed: 0,bblid,scanid,sesid,scan_protocol,project,filename,acqid,doscan,timepoint,ntimepoints,scanage_months,sex,race,ethnic
0,10410,6776,FNDM,810211 - FNDM,ExtraLong,sub-10410_ses-FNDM11_T1w.nii.gz,5d9e0666a54d350038b39e11,2012-04-24,1,2,556.0,2.0,2.0,2.0
1,10410,6843,FNDM,810211 - FNDM,ExtraLong,sub-10410_ses-FNDM21_T1w.nii.gz,5d9e0972a54d350044c11e33,2012-05-11,2,2,557.0,2.0,2.0,2.0
2,11186,3395,DAY,808799 - DAY2,ExtraLong,sub-11186_ses-DAY21_T1w.nii.gz,5d9dfb8ba54d350042bd8142,2010-06-10,1,2,534.0,1.0,1.0,2.0
3,11186,6378,FNDM,810211 - FNDM,ExtraLong,sub-11186_ses-FNDM21_T1w.nii.gz,5d9e0c47a54d350042bd95b4,2012-02-03,2,2,554.0,1.0,1.0,2.0
4,11242,3360,DAY,808799 - DAY2,ExtraLong,sub-11242_ses-DAY21_T1w.nii.gz,5d9dfb97a54d35003eb7f374,2010-06-02,1,2,694.0,1.0,1.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2587,139272,10109,GRMPY,822831 - GRMPY,ExtraLong,sub-139272_ses-10109_T1w.nii.gz,5d9cac7ea54d35003eb749f4,2016-04-07,6,6,259.0,2.0,2.0,2.0
2588,139490,8461,PNC,810336 - Big GO,ExtraLong,sub-139490_ses-PNC1_T1w.nii.gz,5d9cd114a54d350039b28ece,2013-08-30,1,2,105.0,1.0,2.0,2.0
2589,139490,10564,CONTE,815814 - Conte,ExtraLong,sub-139490_ses-CONTE1_acq-moco_T1w.nii.gz,5d9ca3cfa54d350028b04b1d,2017-04-29,2,2,149.0,1.0,2.0,2.0
2590,139553,8410,PNC,810336 - Big GO,ExtraLong,sub-139553_ses-PNC1_T1w.nii.gz,5d9d14b1a54d350042bcbddd,2013-08-23,1,2,107.0,2.0,2.0,2.0


In [54]:
# Get age maximum in months: 35 years old * 12 months
max_age = 35 * 12 

# Filter xl to only include subjects < 35 years old with 2+ sessions
keep = xl[xl.scanage_months < max_age].copy()
keep_filtered = filterBySessionCount(keep, 2)

# Get dataframe of sessions to remove based on age / not meeting session count
remove = xl[xl.scanage_months >= max_age]
remove = remove.append(get_row_diff(keep,keep_filtered))
remove

Unnamed: 0,bblid,scanid,sesid,scan_protocol,project,filename,acqid,doscan,timepoint,ntimepoints,scanage_months,sex,race,ethnic
0,10410,6776,FNDM,810211 - FNDM,ExtraLong,sub-10410_ses-FNDM11_T1w.nii.gz,5d9e0666a54d350038b39e11,2012-04-24,1,2,556.0,2.0,2.0,2.0
1,10410,6843,FNDM,810211 - FNDM,ExtraLong,sub-10410_ses-FNDM21_T1w.nii.gz,5d9e0972a54d350044c11e33,2012-05-11,2,2,557.0,2.0,2.0,2.0
2,11186,3395,DAY,808799 - DAY2,ExtraLong,sub-11186_ses-DAY21_T1w.nii.gz,5d9dfb8ba54d350042bd8142,2010-06-10,1,2,534.0,1.0,1.0,2.0
3,11186,6378,FNDM,810211 - FNDM,ExtraLong,sub-11186_ses-FNDM21_T1w.nii.gz,5d9e0c47a54d350042bd95b4,2012-02-03,2,2,554.0,1.0,1.0,2.0
4,11242,3360,DAY,808799 - DAY2,ExtraLong,sub-11242_ses-DAY21_T1w.nii.gz,5d9dfb97a54d35003eb7f374,2010-06-02,1,2,694.0,1.0,1.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
361,17962,8436,FNDM,810211 - FNDM,ExtraLong,sub-17962_ses-FNDM21_T1w.nii.gz,5d9e0c21a54d350044c12343,2013-08-26,2,2,434.0,2.0,,
364,18013,8532,FNDM,810211 - FNDM,ExtraLong,sub-18013_ses-FNDM11_T1w.nii.gz,5d9e08e0a54d35003cb5bcef,2013-09-30,1,2,593.0,2.0,1.0,2.0
365,18013,8549,FNDM,810211 - FNDM,ExtraLong,sub-18013_ses-FNDM21_T1w.nii.gz,5d9e0c37a54d350044c12376,2013-10-04,2,2,593.0,2.0,1.0,2.0
1808,110689,6027,PNC,810336 - Big GO,ExtraLong,sub-110689_ses-PNC1_T1w.nii.gz,5d9d026da54d350044c00c83,2011-11-12,1,2,521.0,1.0,,


# DO THE DELETING!!

In [92]:
# 1. For each session in remove df, remove session from Flywheel.
for row in remove.itertuples():
    ses = getSession(row)
    if ses:
        delete_session(ses, dry_run=False)

Subject 10410 not found on Flywheel!
Subject 10410 not found on Flywheel!


2021-09-20 14:20:56,768 INFO Deleting session "ses-DAY1" from subject sub-11186
2021-09-20 14:20:57,762 INFO Deleting session "ses-FNDM2" from subject sub-11186
2021-09-20 14:20:58,959 INFO Deleting session "ses-DAY1" from subject sub-11242
2021-09-20 14:20:59,845 INFO Deleting session "ses-FNDM2" from subject sub-11242
2021-09-20 14:21:00,660 INFO Deleting session "ses-FNDM3" from subject sub-11399
2021-09-20 14:21:01,426 INFO Deleting session "ses-NEFF4" from subject sub-11399
2021-09-20 14:21:02,459 INFO Deleting session "ses-DAY1" from subject sub-11419
2021-09-20 14:21:03,208 INFO Deleting session "ses-FNDM2" from subject sub-11419
2021-09-20 14:21:04,012 INFO Deleting session "ses-DAY1" from subject sub-11569
2021-09-20 14:21:05,131 INFO Deleting session "ses-FNDM2" from subject sub-11569
2021-09-20 14:21:07,408 INFO Deleting session "ses-FNDM3" from subject sub-11569
2021-09-20 14:21:10,098 INFO Deleting session "ses-DAY1" from subject sub-11599
2021-09-20 14:21:11,157 INFO Dele

In [93]:
# 2. Remove empty subjects from Flywheel
check_for_empty = remove.bblid.unique()
for bblid in check_for_empty:
    sub = getSubject(bblid)
    if sub:
        delete_empty_subject(sub, dry_run=False)


Subject sub-10410 not found on Flywheel!


2021-09-20 14:24:12,096 INFO Checking if subject "sub-11186" is empty
2021-09-20 14:24:12,306 INFO Found 0 sessions
2021-09-20 14:24:12,337 INFO Deleting subject "sub-11186"
2021-09-20 14:24:12,916 INFO Checking if subject "sub-11242" is empty
2021-09-20 14:24:13,221 INFO Found 0 sessions
2021-09-20 14:24:13,222 INFO Deleting subject "sub-11242"
2021-09-20 14:24:13,751 INFO Checking if subject "sub-11399" is empty
2021-09-20 14:24:14,047 INFO Found 2 sessions
2021-09-20 14:24:14,218 INFO Checking if subject "sub-11419" is empty
2021-09-20 14:24:14,377 INFO Found 0 sessions
2021-09-20 14:24:14,382 INFO Deleting subject "sub-11419"
2021-09-20 14:24:14,864 INFO Checking if subject "sub-11569" is empty
2021-09-20 14:24:15,046 INFO Found 0 sessions
2021-09-20 14:24:15,048 INFO Deleting subject "sub-11569"
2021-09-20 14:24:15,558 INFO Checking if subject "sub-11599" is empty
2021-09-20 14:24:15,782 INFO Found 0 sessions
2021-09-20 14:24:15,783 INFO Deleting subject "sub-11599"
2021-09-20 14:

# Save new ExtraLong csv of scans we're keeping

In [94]:
xl = keep_filtered
xl.sort_values(by=['bblid','doscan'],inplace=True)
fname = f"./csv/ExtraLong-Datafreeze-2021-Updated-20210920.csv"
xl.to_csv(fname, index=False)