# Center TBI demographics & statistics extractor
Please edit login.cfg with your credentials before executing this script.

In [None]:
#
# Last update: Friday: 07/04/2017 by Stephen Larroque
#

import json
from pyxnat import Interface

In [None]:
# Loading login infos
with open('/home/brain/neuro-csg-pipelines/Projects/CTBI/centertbi-mri/login.cfg') as f:
    login_infos = json.load(f)

# Connect to XNAT db
central = Interface(server="http://tbixnat.incf.org:8080", user=login_infos['username'], password=login_infos['password'], cachedir='/tmp')
# Add schemas (allows to use .attrs() to get list of attributes)
central.manage.schemas.add('xnat.xsd')

# Get list of all centers
centers = central.select.projects()
print(centers.get())

# Select center (constraining to one center for the moment)
# TODO: loop over all centers
#cULgData_Liege_project = central.select.project('LIE')

# Show structure of project
central.inspect.structure()

In [None]:
import pandas as pd

single_subject = True
single_center = True

def add_dict_noerr(d, key, obj, attr):
    try:
        d[key] = obj.attrs.get(attr)
    except Exception as exc:
        pass
    return d

subjects_table = pd.DataFrame(columns=['id', 'label', 'center', 'age', 'gender', 'dob', 'yob', 'mri', 'fmri', 'dti', 'mri_scandate', 'mri_insertdate', 'subject_insertdate', 'scanner', 'acquisition_site'])

centers = central.select.projects()
# For each center id
for center in centers:
    center_id = center.id()
    # Select subjects for one center
    center_data = central.select.projects(id_filter=center_id)

    # For each subject id
    for subject in center_data.subjects():
        subject_id = subject.id()
        print("/project/%s/subject/%s/experiments" % (center_id, subject_id))
        exp = central.select("/project/%s/subject/%s/experiments" % (center_id, subject_id)).first()

        # Get full access to xml elements (and values)
        #root_elt = subject.xpath('/*') # useless for now but can be used to access more values
        #print(root_elt)
        #print(root_elt[0].getchildren())
        #print(root_elt[0].keys())

        # Extract subject's demographics
        # Extract first fields that always exist (no chance of failure)
        sbj_tbl = {'id': subject_id,
                   'label': subject.label(),
                   'center': center_id,
                  }
        
        # Add attributes that can fail (inexistent for some subjects)
        # TODO: complete with more attributes
        sbj_tbl = add_dict_noerr(sbj_tbl, 'age', exp, 'age') # xnat:mrSessionData/AGE
        sbj_tbl = add_dict_noerr(sbj_tbl, 'gender', subject, 'gender_text') # xnat:subjectData/GENDER_TEXT
        sbj_tbl = add_dict_noerr(sbj_tbl, 'dob', subject, 'dob') # xnat:subjectData/DOB
        sbj_tbl = add_dict_noerr(sbj_tbl, 'mri_scandate', exp, 'date') # xnat:mrSessionData/DATE
        sbj_tbl = add_dict_noerr(sbj_tbl, 'mri_insertdate', exp, 'insert_date') # xnat:mrSessionData/INSERT_DATE
        sbj_tbl = add_dict_noerr(sbj_tbl, 'subject_insertdate', subject, 'insert_date') # xnat:subjectData/INSERT_DATE
        #sbj_tbl = add_dict_noerr(sbj_tbl, 'scanner', exp, 'scanner_csv') # xnat:mrSessionData/SCANNER_CSV
        sbj_tbl['scanner'] = '%s %s' % (exp.xpath('xnat:scanner/@manufacturer')[0], exp.xpath('xnat:scanner/@model')[0])
        sbj_tbl['yob'] = subject.xpath('//xnat:yob/text()')[0]
        sbj_tbl['gender'] = subject.xpath('//xnat:gender/text()')[0]
        sbj_tbl['acquisition_site'] = subject.xpath('//xnat:acquisition_site/text()')[0]

        # Aggregated attributes
        # quality checks
        quality_checks = exp.xpath('//xnat:scan/xnat:quality/text()')
        sbj_tbl['quality_checks'] = "%i/%i" % (sum([1 if s == 'usable' else 0 for s in quality_checks]), len(quality_checks))
        # mri types
        mri_series = exp.xpath('//xnat:scan/xnat:series_description/text()')
        mri_series = [s.lower() for s in mri_series] # put all in lowercase to ease comparison
        if any('epi' in serie for serie in mri_series):
            sbj_tbl['fmri'] = 'True'
        else:
            sbj_tbl['fmri'] = 'False'
        if any('mpr' in serie for serie in mri_series) or any('t1' in serie for serie in mri_series):
            sbj_tbl['mri'] = 'True'
        else:
            sbj_tbl['mri'] = 'False'
        if any('dti' in serie for serie in mri_series):
            sbj_tbl['dti'] = 'True'
        else:
            sbj_tbl['dti'] = 'False'

        # Add to the big table of all subjects
        subjects_table = subjects_table.append(sbj_tbl, ignore_index=True)
        if single_subject: break # TODO: remove this break to work on all subjects for one center
    if single_center: break # TODO: remove this break to work on all centers

# Show all XML attributes for last subject
subjects_table

## Exploring XNAT DB
-----------

In [None]:
# Equivalencies
central.select.project("LIE")
central.select("/project/LIE/")  # XPATH

central.select.project("LIE").subject("CTBI_S00239")
central.select("/project/LIE/subject/CTBI_S00239")

central.select("/project/LIE/subject/CTBI_S00239").experiments().get()
central.select("/project/LIE/subject/CTBI_S00239/experiments").get()

central.select("/project/LIE/subject/CTBI_S00239/experiment/CTBI_E00469").scans().get()
central.select("/project/LIE/subject/CTBI_S00239/experiment/CTBI_E00469/scans").get()

central.select("/project/LIE/subject/CTBI_S00239/experiment/CTBI_E00469").scan('1')
central.select("/project/LIE/subject/CTBI_S00239/experiment/CTBI_E00469/scan/1")

In [None]:
# experiment == MRSession
print(central.select("/project/LIE/subject/CTBI_S00239/experiment/CTBI_E00469").get())
# also note xnat:scans and xnat:scan instead of MRScan when using select()

### Example of XML subtag vs attribute
```xml
<tag attr1="something">
    <subtag>something else</subtag>
</tag>
```

In [None]:
a = central.select("/project/LIE/subject/CTBI_S00239/experiment/CTBI_E00469")
# How to access attributes?
print(a.attrs.get("UID"))
print(a.xpath("/xnat:MRSession/@UID")[0])
# How to access subtags?
print(a.xpath("/xnat:MRSession/xnat:scans/xnat:scan/xnat:quality/text()"))
print(a.xpath("//xnat:quality/text()"))
# If lost, print the XML
print(a.get())

In [None]:
# Get list of all subjects
subjects = central.select('//subjects')
print(subjects.get())

In [None]:
print("Total number of subjects: %i" % len(subjects.get()))
print("Total number of centers: %i" % len(centers.get()))

In [None]:
# Get all datatypes
central.inspect.datatypes()

In [None]:
# Get all subjects data fields
print('BE CAREFUL: attributes (such as INSERT_DATE) can only be accessed in lower case (eg, insert_date)! Even if datatypes() display them in uppercase!')
central.inspect.datatypes('xnat:subjectData')

In [None]:
# same for mri fields
central.inspect.datatypes('xnat:mrSessionData')

In [None]:
# To show all possible values of any field
central.inspect.field_values('xnat:subjectData/GENDER_TEXT')

In [None]:
# Construct list of all fields and all possible values
all_vals = {}
for category in central.inspect.datatypes():
    all_vals[category] = {}
    for field in central.inspect.datatypes(category):
        try:
            all_vals[category][field] = central.inspect.field_values('%s' % field)
        except Exception as exc:
            if 'DatabaseError' in exc:
                pass
all_vals

In [None]:
central.inspect.field_values('xnat:mrSessionData/SESSION_ID')

In [None]:
central.inspect.experiment_types()

In [None]:
central.inspect.scan_types()

In [None]:
central.inspect.scan_values('xnat:mrSessionData')

In [None]:
# Test to get attributes values (via XML elements direct access)
center_data = central.select.projects(id_filter='LIE')
subj = center_data.subjects()
#subjdata = central.select("/project/LIE")
#for subj in central.select("/project/LIE/subjects").get():
    #subjdata = central.select("/project/LIE/subject/%s" % subj)
subjdata = central.select("/project/LIE/subject/CTBI_S00239")
# Get full access to xml elements (and values)
root_elt = subjdata.xpath('/*')
print(root_elt)
print(root_elt[0].getchildren())
print(root_elt[0].keys())
print(subjdata.xpath('/xnat:Subject/xnat:demographics/xnat:yob')[0].text)
print(subjdata.xpath('/xnat:Subject/xnat:demographics/xnat:gender')[0].text)
print(subjdata.xpath('/xnat:Subject/xnat:experiments/xnat:experiment')[0].keys())
print(subjdata.xpath('/xnat:Subject/xnat:experiments/xnat:experiment')[0].values())

#print(subjdata.get())
#print(subjdata.id())
#subjdata.resources().get()
#project.resource('NIFTI').file('image.nii').content()
#'T1'
# SEE: https://wiki.humanconnectome.org/display/DataUse/Exploring+ConnectomeDB+with+Python


In [None]:
mri_subjects = central.inspect.experiment_values('xnat:mrSessionData')
print(len(mri_subjects))
mri_subjects

In [None]:
# Exploring relationship between elements and datatypes
# and access attributes via PyXNAT
# see also XNAT Power User slides
pr = central.select("/projects").first()
print(pr.datatype())
print(pr.attrs.get('name'))
print('All possible fields: %s' % str(central.inspect.datatypes(pr.datatype())))
print

subj = central.select("/project/LIE/subjects").first()
print(subj.datatype())
print(subj.attrs.get('insert_date'))  # ALWAYS lowercase!
print('All possible fields: %s' % str(central.inspect.datatypes(subj.datatype())))
print

exp = central.select("/project/LIE/subject/CTBI_S00239/experiments").first()
print(exp.datatype())
print(exp.attrs.get('project'))
print('All possible fields: %s' % str(central.inspect.datatypes(exp.datatype())))
print

scans = central.select("/project/LIE/subject/CTBI_S00239/experiments/*/scans").first()
print(scans.datatype())
#print(scans.attrs.get('project'))
print('No attributes for this one!!! Here is the listing: %s' % str(central.inspect.datatypes(scans.datatype())))

In [None]:
# How to access attributes:
# Get experiment (or any other xml element)
exp = central.select('/project/CAMTRIO/subject/CTBI_S00638/experiments').first()
# Two ways of accessing simple attributes
print(exp.attrs.get('date'))  # via pyxnat
print(exp.xpath('xnat:date/text()')[0])  # via xpath
# Only one way to access complex attributes (the attribute of a sub-element)
print(exp.xpath('xnat:scanner/@manufacturer')[0]) # via xpath
# Print the whole XML for this element (the experiment) so you can see other attributes you can use
print(exp.get())

In [None]:
quality_checks = exp.xpath('//xnat:scan/xnat:quality/text()')
print("%i/%i" % (sum([1 if s == 'usable' else 0 for s in quality_checks]), len(quality_checks)))
print(exp.xpath('//xnat:scan/xnat:series_description/text()'))

In [None]:
# TODO: save subjects_table into a csv (use sep=';' to be compatible with Excel)

In [None]:
# TODO: download files, see http://xnat.bigr.nl/index.php/Xnat:Pyxnat
#allscans = xnat.select.project("PROJECTNAME").subject("SUBJECT").experiment("SESSION").scans()
#for scan in allscans:
    #allscans.download("DOWNLOADDIR", type='ALL', extract=True)