# Center TBI demographics using search
Get all demographics data possible using XNAT search via REST API.

We get different fields than the website search export (in particular the json fields are missing)...

Please edit login.cfg with your credentials before executing this script.

### Init and helper functions

In [None]:
#
# Creation: 04/2017 by Aldo Camargo & Stephen Larroque
#
%load_ext autoreload
%autoreload 2

import json
import os
import lxml
import xml.etree.ElementTree as ET
import pyxnat

In [None]:
#### HELPER FUNCTIONS
from copy import deepcopy
from libs.xmlpp import get_pprint as xml_pprint
def get_raw_xml(elements_list):
    '''Get the source xml of a list of lxml elements or pyxnat objects'''
    # Convert to a list of elements if it's a single element (to ease looping)
    if not isinstance(elements_list, list):
        elements_list = [elements_list]

    out = ''
    for i, element in enumerate(elements_list):
        out += '\n=== Element %i\n' % i
        # If this is an XML element
        if isinstance(element, lxml.etree._Element):
            # Make a copy of the element because we will modify it
            e = deepcopy(element)
            # Strip comments, else lxml does not know how to print the XML
            lxml.etree.strip_tags(e, lxml.etree.Comment)
            # Add the XML of this element to the output
            out += xml_pprint(lxml.etree.tostring(e, pretty_print=True))
            #print(lxml.etree.tostring(e, pretty_print=True)) #debug
        # pyxnat object, we just fetch the xml from the server
        if isinstance(element, pyxnat.core.resources.EObject):
            out += element.get()
        # Print differently if this is any other type
        else:
            out += repr(element)
    return out

def pprint_xml(obj):
    print(xml_pprint(get_raw_xml(obj)))

#### HELPER GLOBALS
# XNAT namespace (to use with lxml xpath queries)
xnatns = {'arc': 'http://nrg.wustl.edu/arc',
 'cat': 'http://nrg.wustl.edu/catalog',
 'ext': 'http://nrg.wustl.edu/ext',
 'pipe': 'http://nrg.wustl.edu/pipe',
 'prov': 'http://www.nbirn.net/prov',
 'scr': 'http://nrg.wustl.edu/scr',
 'val': 'http://nrg.wustl.edu/val',
 'wrk': 'http://nrg.wustl.edu/workflow',
 'xdat': 'http://nrg.wustl.edu/security',
 'xnat': 'http://nrg.wustl.edu/xnat',
 'xnat_a': 'http://nrg.wustl.edu/xnat_assessments',
 'xsi': 'http://www.w3.org/2001/XMLSchema-instance'}

### Connection and pandas database building

In [None]:
# Loading login infos
cfgpath = os.path.join(os.getcwd(), 'login.cfg')
with open(cfgpath) as f:
    login_infos = json.load(f)

# Connect to XNAT db
central = pyxnat.Interface(server="http://tbixnat.incf.org:8080", user=login_infos['username'], password=login_infos['password'], cachedir='/tmp')
# Add schemas (allows to use .attrs() to get list of attributes)
central.manage.schemas.add('xnat/xnat.xsd')

# Get list of all centers
centers = central.select.projects()
print(centers.get())

# Select center (constraining to one center for the moment)
# TODO: loop over all centers
#cULgData_Liege_project = central.select.project('LIE')

# Show structure of project
central.inspect.structure()

In [None]:
central.inspect.datatypes()

In [None]:
mrtable = central.select('xnat:mrSessionData').all()
with open('test.json', 'wb') as f:
    json.dump(mrtable.data, f, ensure_ascii=False, indent=4, sort_keys=True)

In [None]:
from tqdm import tqdm
count = len(central.inspect.datatypes())
for datatype in tqdm(central.inspect.datatypes(), total=count):
    try:
        filename = 'ctbi_search_'+datatype+'.json'
        filename = filename.replace(':', '-')
        table = central.select(datatype).all()
        with open(filename, 'wb') as f:
            json.dump(table.data, f, ensure_ascii=False, indent=4, sort_keys=True)
    except Exception as exc:
        if 'refusing to fulfill it' in str(exc):
            # This is usually because we don't have the permission, so we just skip...
            print('Unauthorized access for datatype: %s' % datatype)
            with open(filename, 'w') as f:
                f.write('Unauthorized access')
            pass
        else:
            raise