# How to Process Acquisitions

Here, using knowledge from our [bids querying notebook](./Querying_BIDS_Validity.ipynb) we demo how to process acquisitions

In [8]:
# modules
import flywheel
import pandas as pd
from pandas.io.json.normalize import nested_to_record
# add the script to the path
import sys
import os
sys.path.append(os.path.abspath("/Users/ttapera/Documents/BBL/Projects/bids-on-flywheel/flywheel_bids_tools/"))
import query_bids
from tqdm import tqdm
import math
import re

In [9]:
fw = flywheel.Client()

In [10]:
result = query_bids.query_fw("Q7 DSI", fw)

here
Could not find a project in flywheel with that name!


In [4]:
view = fw.View(columns='subject')
subject_df = fw.read_view_dataframe(view, result.id)

sessions = []
view = fw.View(columns='acquisition')
pbar = tqdm(total=100)
for ind, row in subject_df.iterrows():
    session = fw.read_view_dataframe(view, row["subject.id"])
    if(session.shape[0] > 0):
        sessions.append(session)
    pbar.update(10)
pbar.close()

acquisitions = pd.concat(sessions)

160it [00:00, 175.10it/s]                         


In [111]:
acquisitions.head()

Unnamed: 0,acquisition.id,acquisition.label,acquisition.timestamp,acquisition.timezone,project.id,project.label,session.id,session.label,subject.id,subject.label
0,5c8011f0df93e3002efeb11e,Localizer,2018-02-10 19:36:23,America/New_York,5c7d7616df93e30028fc5227,Q7 DSI,5c8011f0df93e3002efeb11c,2018-02-10 14:21:55,5c8011f0df93e3002dfd44b8,18.02.10-13:38:26-STD-1.3.12.2.1107.5.2.43.66044
1,5c8011f0df93e3002efeb11f,t1w_mprage_0.9mm,2018-02-10 19:44:45,America/New_York,5c7d7616df93e30028fc5227,Q7 DSI,5c8011f0df93e3002efeb11c,2018-02-10 14:21:55,5c8011f0df93e3002dfd44b8,18.02.10-13:38:26-STD-1.3.12.2.1107.5.2.43.66044
2,5c8011f0df93e3002efeb120,t2w_space_0.9mm,2018-02-10 19:50:24,America/New_York,5c7d7616df93e30028fc5227,Q7 DSI,5c8011f0df93e3002efeb11c,2018-02-10 14:21:55,5c8011f0df93e3002dfd44b8,18.02.10-13:38:26-STD-1.3.12.2.1107.5.2.43.66044
3,5c8011f0df93e30029fce04e,DSI_730dir_b5000_mb3,2018-02-10 19:52:59,America/New_York,5c7d7616df93e30028fc5227,Q7 DSI,5c8011f0df93e3002efeb11c,2018-02-10 14:21:55,5c8011f0df93e3002dfd44b8,18.02.10-13:38:26-STD-1.3.12.2.1107.5.2.43.66044
4,5c8011f0df93e3002bfcf5ee,restingBOLD_mb6_1200,2018-02-10 20:45:51,America/New_York,5c7d7616df93e30028fc5227,Q7 DSI,5c8011f0df93e3002efeb11c,2018-02-10 14:21:55,5c8011f0df93e3002dfd44b8,18.02.10-13:38:26-STD-1.3.12.2.1107.5.2.43.66044


First, fetch one of the files:

In [6]:
acquisition = fw.get("5c8011f0df93e3002efeb11e")
acquisition

{'analyses': [],
 'collections': None,
 'created': datetime.datetime(2019, 3, 6, 18, 31, 12, 422000, tzinfo=tzutc()),
 'files': [{'classification': {'Intent': ['Localizer'], 'Measurement': ['T2']},
            'created': datetime.datetime(2019, 3, 6, 18, 31, 14, 691000, tzinfo=tzutc()),
            'hash': '',
            'id': '97167590-f830-4300-9d96-fdec3ed47c67',
            'info': {'AcquisitionDate': '20180210',
                     'AcquisitionMatrix': [256, 0, 0, 192],
                     'AcquisitionNumber': 1,
                     'AcquisitionTime': '143615.282500',
                     'AngioFlag': 'N',
                     'BitsAllocated': 16,
                     'BitsStored': 12,
                     'BodyPartExamined': 'BRAIN',
                     'Columns': 256,
                     'ContentDate': '20180210',
                     'ContentTime': '143623.607000',
                     'DateOfLastCalibration': [20090304, 20090304],
                     'DeviceSerialNumber

Then, convert the file object to dictionary and flatten with `pandas.io.json`:

In [7]:
files = [x.to_dict() for x in acquisition.files]

In [8]:
from pandas.io.json.normalize import nested_to_record

flat_files = [nested_to_record(my_dict, sep='_') for my_dict in files]

Keep the acquisition ID:

In [9]:
for x in flat_files:
    x.update( {'acquisition.id': acquisition.id})

Now, filter out the keys in the flattened dictionary using regex:

In [10]:
string = 'classification'


flat_files = [{k:v for k,v in x.items() if re.match(string, k)} for x in flat_files]

In [11]:
flat_files[0]

{'classification_Intent': ['Localizer'], 'classification_Measurement': ['T2']}

As a function, this looks like:

In [12]:
def unlist_item(ls):
    
    if type(ls) is list:
        ls.sort()
        return(', '.join(x for x in ls))
    else:
        return float('nan')

def process_acquisition(acq_id):
    
    acq = fw.get(acq_id)
    files = [x.to_dict() for x in acq.files]
    flat_files = [nested_to_record(my_dict, sep='_') for my_dict in files]
    cols = r'(classification)|(^type$)|(^modality$)|(BIDS)|(RepetitionTime)|(SequenceName)|(SeriesDescription)'
    flat_files = [{k:v for k,v in my_dict.items() if re.search(cols, k)} for my_dict in flat_files]
    for x in flat_files:
        x.update( {'acquisition.id': acquisition.id})
    df = pd.DataFrame(flat_files)
    list_cols = (df.applymap(type) == list).all()
    df.loc[:,list_cols] = df.loc[:,list_cols].applymap(unlist_item)
    return df

In [13]:
acq1 = process_acquisition("5c8011f0df93e3002efeb11e")
acq1

Unnamed: 0,acquisition.id,classification_Intent,classification_Measurement,info_RepetitionTime,info_SequenceName,info_SeriesDescription,modality,type
0,5c8011f0df93e3002efeb11e,Localizer,T2,40.0,*fl2d1,Localizer,MR,dicom
1,5c8011f0df93e3002efeb11e,Localizer,T2,0.04,_fl2d1,Localizer,MR,nifti
2,5c8011f0df93e3002efeb11e,Localizer,T2,0.04,_fl2d1,Localizer,MR,nifti
3,5c8011f0df93e3002efeb11e,Localizer,T2,0.04,_fl2d1,Localizer,MR,nifti


In [14]:
acq2 = process_acquisition("5c8011f0df93e3002efeb120")
acq2

Unnamed: 0,acquisition.id,classification_Intent,classification_Measurement,info_RepetitionTime,info_SequenceName,info_SeriesDescription,modality,type
0,5c8011f0df93e3002efeb11e,Structural,T2,3200.0,*spc_314ns,t2w_space_0.9mm,MR,dicom
1,5c8011f0df93e3002efeb11e,Structural,T2,3.2,_spc_314ns,t2w_space_0.9mm,MR,nifti


In [15]:
pd.concat([acq1, acq2], ignore_index=True)

Unnamed: 0,acquisition.id,classification_Intent,classification_Measurement,info_RepetitionTime,info_SequenceName,info_SeriesDescription,modality,type
0,5c8011f0df93e3002efeb11e,Localizer,T2,40.0,*fl2d1,Localizer,MR,dicom
1,5c8011f0df93e3002efeb11e,Localizer,T2,0.04,_fl2d1,Localizer,MR,nifti
2,5c8011f0df93e3002efeb11e,Localizer,T2,0.04,_fl2d1,Localizer,MR,nifti
3,5c8011f0df93e3002efeb11e,Localizer,T2,0.04,_fl2d1,Localizer,MR,nifti
4,5c8011f0df93e3002efeb11e,Structural,T2,3200.0,*spc_314ns,t2w_space_0.9mm,MR,dicom
5,5c8011f0df93e3002efeb11e,Structural,T2,3.2,_spc_314ns,t2w_space_0.9mm,MR,nifti


In [16]:
%%time
acq_dfs = []
#temp = acquisitions.sample(n=100)
for index, row in acquisitions.iterrows():
    
    try:
        temp = process_acquisition(row["acquisition.id"])
        acq_dfs.append(temp)
    except:
        continue

CPU times: user 1.24 s, sys: 12 ms, total: 1.25 s
Wall time: 4.51 s


In [113]:
pd.concat(acq_dfs).head()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


Unnamed: 0,acquisition.id,classification_Intent,classification_Measurement,info_RepetitionTime,info_SequenceName,info_SeriesDescription,modality,type
0,5c8011f0df93e3002efeb11e,Localizer,T2,40.0,*fl2d1,Localizer,MR,dicom
1,5c8011f0df93e3002efeb11e,Localizer,T2,0.04,_fl2d1,Localizer,MR,nifti
2,5c8011f0df93e3002efeb11e,Localizer,T2,0.04,_fl2d1,Localizer,MR,nifti
3,5c8011f0df93e3002efeb11e,Localizer,T2,0.04,_fl2d1,Localizer,MR,nifti
0,5c8011f0df93e3002efeb11e,Structural,T1,2500.0,*tfl3d1_16ns,t1w_mprage_0.9mm,MR,dicom


In [86]:
grouped_data = bids_data2.groupby(groups_list, as_index=False).nth(1).reset_index(drop=True)

In [87]:
grouped_data

Unnamed: 0,acquisition.id,classification_Intent,classification_Measurement,info_RepetitionTime,info_SequenceName,info_SeriesDescription,modality,type,group_id
0,5c8011f0df93e3002efeb11e,Localizer,T2,0.04,_fl2d1,Localizer,MR,nifti,2
1,5c8011f0df93e3002efeb11e,Structural,T1,2.5,_tfl3d1_16ns,t1w_mprage_0.9mm,MR,nifti,3
2,5c8011f0df93e3002efeb11e,Structural,T2,3.2,_spc_314ns,t2w_space_0.9mm,MR,nifti,4
3,5c8011f0df93e3002efeb11e,Functional,T2*,0.5,epfid2d1_64,restingBOLD_mb6_1200,MR,nifti,1
4,5c8011f0df93e3002efeb11e,,,4300.0,ep_b3735#487,DSI_730dir_b5000_mb3,MR,dicom,0


In [88]:
grouped_data_modified = grouped_data.copy()
grouped_data_modified.loc[grouped_data_modified['classification_Measurement'].isnull(), 'classification_Measurement'] = "Diffusion"

In [89]:
grouped_data_modified

Unnamed: 0,acquisition.id,classification_Intent,classification_Measurement,info_RepetitionTime,info_SequenceName,info_SeriesDescription,modality,type,group_id
0,5c8011f0df93e3002efeb11e,Localizer,T2,0.04,_fl2d1,Localizer,MR,nifti,2
1,5c8011f0df93e3002efeb11e,Structural,T1,2.5,_tfl3d1_16ns,t1w_mprage_0.9mm,MR,nifti,3
2,5c8011f0df93e3002efeb11e,Structural,T2,3.2,_spc_314ns,t2w_space_0.9mm,MR,nifti,4
3,5c8011f0df93e3002efeb11e,Functional,T2*,0.5,epfid2d1_64,restingBOLD_mb6_1200,MR,nifti,1
4,5c8011f0df93e3002efeb11e,,Diffusion,4300.0,ep_b3735#487,DSI_730dir_b5000_mb3,MR,dicom,0


In [90]:
diff = upload_bids.get_unequal_cells(grouped_data_modified, grouped_data)
diff

[[4, 2]]

In [107]:
changes = {}

for x in diff:
    
    key = grouped_data_modified.loc[x[0], 'group_id']
    val = (grouped_data_modified.columns[x[1]], grouped_data_modified.iloc[x[0], x[1]])
    changes.update({key: val})
    
    
    
    

In [108]:
changes

{0: ('classification_Measurement', 'Diffusion')}

In [109]:
for group, change in changes.items():
    
    bids_data2.loc[bids_data2['group_id'] == group, change[0]] = change[1]

In [110]:
bids_data2

Unnamed: 0,acquisition.id,classification_Intent,classification_Measurement,info_RepetitionTime,info_SequenceName,info_SeriesDescription,modality,type,group_id
0,5c8011f0df93e3002efeb11e,Localizer,T2,40.00,*fl2d1,Localizer,MR,dicom,2
1,5c8011f0df93e3002efeb11e,Localizer,T2,0.04,_fl2d1,Localizer,MR,nifti,2
2,5c8011f0df93e3002efeb11e,Localizer,T2,0.04,_fl2d1,Localizer,MR,nifti,2
3,5c8011f0df93e3002efeb11e,Localizer,T2,0.04,_fl2d1,Localizer,MR,nifti,2
4,5c8011f0df93e3002efeb11e,Structural,T1,2500.00,*tfl3d1_16ns,t1w_mprage_0.9mm,MR,dicom,3
5,5c8011f0df93e3002efeb11e,Structural,T1,2.50,_tfl3d1_16ns,t1w_mprage_0.9mm,MR,nifti,3
6,5c8011f0df93e3002efeb11e,Structural,T2,3200.00,*spc_314ns,t2w_space_0.9mm,MR,dicom,4
7,5c8011f0df93e3002efeb11e,Structural,T2,3.20,_spc_314ns,t2w_space_0.9mm,MR,nifti,4
8,5c8011f0df93e3002efeb11e,,Diffusion,4300.00,ep_b610#41,DSI_730dir_b5000_mb3,MR,dicom,0
9,5c8011f0df93e3002efeb11e,Functional,T2*,500.00,epfid2d1_64,restingBOLD_mb6_1200,MR,dicom,1
