### Include


You will need the following python library
### fasp-scripts

Clone the **fasp-client** branch of fasp-scripts

```git clone -b fasp-client --single-branch https://github.com/ga4gh/fasp-scripts.git```

change directory to local copy of fasp-scripts

Install into your favorite python environment

```pip install .```


In [1]:
import os
import sys
import json
import requests
import pandas as pd 
import numpy as np
#import seaborn as sns
from pathlib import Path
from datetime import datetime
import pprint
#from lifelines import KaplanMeierFitter

pprint = pprint.PrettyPrinter(indent=1).pprint
# pprint() is now available to pretty-print any JSON

FHIR_SERVER = 'https://include-api-fhir-service.includedcc.org'
# Optional: Turn off SSL verification. Useful when dealing with a corporate proxy with self-signed certificates.
# This should be set to True unless you actually see certificate errors.
VERIFY_SSL = False

if not VERIFY_SSL:
    requests.packages.urllib3.disable_warnings()



# Kids First uses cookie-based authentication, so we've pre-fetched a cookie and made it available here.
#kf_cookie = requests.get("https://raw.githubusercontent.com/mitre/fhir-exercises/main/kf_cookie.txt", verify=VERIFY_SSL).text.rstrip()

full_cookie_path = os.path.expanduser('~/.keys/include_prod_fhir_cookie.json')

with open(full_cookie_path) as f:
    cookies = json.load(f)
kf_cookie = cookies['Cookie']


# We make a requests.Session to ensure consistent headers/cookie across all the requests we make
s = requests.Session()
s.headers.update({'Accept': 'application/fhir+json'})
s.verify = VERIFY_SSL
s.cookies['AWSELBAuthSessionCookie-0'] = kf_cookie


# Test out the cookie by querying the server metadata
r = s.get(f"{FHIR_SERVER}/metadata")

if "<!DOCTYPE html>" in r.text:
    sys.stderr.write('ERROR: Could not authenticate with Kids First. The cookie may need to be updated')
    

# This helper method allows us to easily switch between printing an entire Bundle, or just the first 20 lines.
# Set truncate_for_github = False for actual use,
# or just replace the function with a `return bundle`

def print_bundle(bundle, truncate_for_github = False):
    if not truncate_for_github:
        return bundle
    
    lines = json.dumps(bundle, indent=2).split('\n')
    if len(lines) <= 20:
        return bundle
    else:   
        print('\n'.join(lines[:20]))
        print('...\nBundle truncated. Change the "print_bundle" function above to print the full content.')

In [38]:
# Resolves all pages for the bundle. Returns an array with all Bundles, including the original Bundle.
def resolve_pages(bundle):
    next_page_link = next(filter(lambda link: link['relation'] == 'next', bundle['link']), None)
    if next_page_link:
        next_page = s.get(next_page_link['url']).json()
        return [bundle] + resolve_pages(next_page)
    else:
        return [bundle]

# NOTE: No cell output.

def runQuery(query, verbose=True):
    r = s.get(f"{FHIR_SERVER}/{query}")
    first_bundle = r.json()
    all_bundles = resolve_pages(first_bundle)

    resources = [entry['resource'] for bundle in all_bundles for entry in bundle['entry']]
    if verbose:
        print(f"Total  Resources: {len(resources)}")
    return resources

In [3]:
study_id = 19908
studies = runQuery(f"ResearchStudy?_id={study_id}")
id_system = 'https://include.org/htp/fhir/researchstudy'
include_study_id = [id['value'] for id in studies[0]['identifier'] if id['system']== id_system][0]
print (include_study_id)


print(study_id)
print(studies[0]['title'])
studies 

Total  Resources: 1
HTP
19908
Crnic Institute Human Trisome Project


[{'resourceType': 'ResearchStudy',
  'id': '19908',
  'meta': {'versionId': '3',
   'lastUpdated': '2022-03-17T22:09:38.053+00:00',
   'source': '#PQXswYZi3FTYGhdS',
   'tag': [{'system': 'https://include.org/htp/fhir/researchstudy',
     'code': 'HTP'}]},
  'identifier': [{'use': 'official',
    'system': 'https://include.org/htp/fhir/researchstudy',
    'value': 'HTP'},
   {'system': 'https://www.ncbi.nlm.nih.gov/projects/gap/cgi-bin/study.cgi?study_id=',
    'value': 'phs002330'}],
  'title': 'Crnic Institute Human Trisome Project',
  'status': 'completed',
  'relatedArtifact': [{'type': 'documentation',
    'url': 'https://includedcc.org/studies/human-trisome-project'}],
  'keyword': [{'coding': [{'system': 'https://includedcc.org/fhir/code-systems/programs',
      'version': 'v1',
      'code': 'INCLUDE-KF',
      'display': 'INCLUDE/KF'}]},
   {'coding': [{'code': 'HTP'}]}],
  'description': 'TBD',
  'enrollment': [{'reference': 'Group/15367'}]}]

In [4]:
patients = runQuery(f"Patient?_has:ResearchSubject:individual:study={study_id}")

Total  Resources: 686


In [5]:
import json
from collections import Counter
import pandas as pd

rTypes = Counter()

for p in patients:
    rTypes[p['resourceType']] += 1
    extCounter = Counter()
    if 'extension' in p:
        for e in p['extension']:
            extCounter[e['url']] +=1
    #print(extCounter)
print (json.dumps(rTypes, indent=3))

{
   "Patient": 686
}


In [6]:
n=53
patient_id = patients[n]['id']
patients[n]


{'resourceType': 'Patient',
 'id': '5561',
 'meta': {'versionId': '2',
  'lastUpdated': '2022-03-15T15:19:22.125+00:00',
  'source': '#3Sr5wARGaK4xf0f8',
  'tag': [{'system': 'https://include.org/htp/fhir/researchstudy',
    'code': 'HTP'}]},
 'extension': [{'url': 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-race',
   'extension': [{'url': 'ombCategory',
     'valueCoding': {'system': 'urn:oid:2.16.840.1.113883.6.238',
      'version': 'v1',
      'code': '2028-9',
      'display': 'White'}},
    {'url': 'text', 'valueString': 'White'}]},
  {'url': 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity',
   'extension': [{'url': 'ombCategory',
     'valueCoding': {'system': 'urn:oid:2.16.840.1.113883.6.238',
      'version': 'v1',
      'code': '2186-5',
      'display': 'Not Hispanic or Latino'}},
    {'url': 'text', 'valueString': 'Not Hispanic or Latino'}]}],
 'identifier': [{'use': 'official',
   'system': 'https://include.org/htp/fhir/patient',
   'value':

We can see the data in more compact form via a DataFrame. This shows that the same set of three attributes are available for all patients.

In [7]:
patient_dict = {}
patient_list = []
patient_ids = []
std_attributes = {'gender':'','birthDate':'','maritalStatus':'','multipleBirthBoolean':''}
for p in patients:
    rTypes[p['resourceType']] += 1
    extCounter = Counter()
    if 'extension' in p:
        for e in p['extension']:
            #print(e)
            ext = e['url'].split('/')[-1]
            ee0 = e['extension'][0]
            if 'valueCoding' in ee0:
                extval = ee0['valueCoding']['display']
            else:
                #print (ee0)
                extval=ee0["valueString"]

            if p['id'] not in patient_dict:
                patient_dict[p['id']] = {ext:extval}
            else:
                patient_dict[p['id']][ext] = extval
    for att in std_attributes.keys():
        if att in p:
            if p['id'] not in patient_dict:
                patient_dict[p['id']] = {att:p[att]}
            else:
                patient_dict[p['id']][att] = p[att]

    patient_list.append(extCounter)
    patient_ids.append(p['id'])


pd.set_option("display.max_rows", 30, "display.max_columns", None)
#pdf = pd.DataFrame(patient_list)    
pdf = pd.DataFrame.from_dict(patient_dict, orient="index")
pdf

Unnamed: 0,us-core-race,gender,us-core-ethnicity
5612,White,male,
5616,White,female,
5609,White,female,
5606,White,female,Not Hispanic or Latino
5607,White,female,
...,...,...,...
5480,,female,
5472,,female,
5463,,male,
5457,,female,


The attributes above are informative but are unlikely to part of a meaningful query in building an asthma cohort.

What else might we look at? Let's try Observations.

In [8]:
import pandas as pd

resources = runQuery(f"Patient?_has:ResearchSubject:individual:study={study_id}&_revinclude=Observation:subject")

observations = []
obsCounter  = Counter()
codeCounter = Counter()
vccCounter = Counter()
printObsCounts = False
for r in resources:

    if r['resourceType'] == 'Observation':
        obsCounter[r['subject']['reference']] +=1
        coding0 = r['code']['coding'][0]
        if 'display' in coding0:
            obs_display_name = coding0['display']
        else:
            print(obs_display_name)
            obs_display_name = coding0
        if 'valueCodeableConcept' in r:
            vcc_text = r['valueCodeableConcept']['text']
        else:
            print(f'Obs {r}')
            vcc_text = r
        codeCounter[obs_display_name] +=1
        vccCounter[vcc_text] +=1
        observations.append(r)



#Summarize
print(f"Number of patients with observations {len(obsCounter.keys())}")

if printObsCounts:
    print("Observation count per patient")
    print(json.dumps(obsCounter, indent=3))
print("Coding counts")
#print(json.dumps(codeCounter, indent=3))
df = pd.DataFrame.from_dict(codeCounter,  orient='index')
pd.set_option("display.max_rows", 30, "display.max_columns", None)
display(df)
vccdf= pd.DataFrame.from_dict(vccCounter,  orient='index')
display(vccdf)

Total  Resources: 10034
family member
Obs {'resourceType': 'Observation', 'id': '19740', 'meta': {'versionId': '1', 'lastUpdated': '2022-03-11T01:43:58.779+00:00', 'source': '#Cr4lrjGSsbbO7lgH', 'tag': [{'system': 'https://include.org/htp/fhir/researchstudy', 'code': 'HTP'}]}, 'identifier': [{'use': 'official', 'system': 'https://include.org/htp/fhir/observation', 'value': 'HTP.HTP0665.Visit 1.BMI'}], 'status': 'final', 'category': [{'coding': [{'system': 'http://terminology.hl7.org/CodeSystem/observation-category', 'code': 'vital-signs', 'display': 'Vital Signs'}]}], 'code': {'coding': [{'system': 'https://nih-ncpi.github.io/ncpi-fhir-ig/data-dictionary/HTP/encounter', 'code': 'BMI'}]}, 'subject': {'reference': 'Patient/5557'}, 'focus': [{'reference': 'ObservationDefinition/4881'}], 'encounter': {'reference': 'Encounter/11846'}, '_effectiveDateTime': {'extension': [{'url': 'http://hl7.org/fhir/StructureDefinition/cqf-relativeDateTime', 'extension': [{'url': 'target', 'valueReference':

TypeError: unhashable type: 'dict'

In [9]:
observations[1]

{'resourceType': 'Observation',
 'id': '310358',
 'meta': {'versionId': '1',
  'lastUpdated': '2022-03-18T20:01:53.760+00:00',
  'source': '#iop20zugEhA5BJdK',
  'profile': ['https://ncpi-fhir.github.io/ncpi-fhir-ig/StructureDefinition/family-relationship'],
  'tag': [{'system': 'https://include.org/htp/fhir/researchstudy',
    'code': 'HTP'}]},
 'identifier': [{'use': 'official',
   'system': 'https://include.org/htp/fhir/observation',
   'value': 'HTP.HTP0701.FAMMEMB.HTP0420'}],
 'status': 'final',
 'code': {'coding': [{'system': 'http://terminology.hl7.org/CodeSystem/v3-RoleCode',
    'version': 'v1',
    'code': 'FAMMEMB',
    'display': 'family member'}],
  'text': 'Family Relationship'},
 'subject': {'reference': 'Patient/5592'},
 'focus': [{'reference': 'Patient/5318'}],
 'valueCodeableConcept': {'coding': [{'system': 'http://terminology.hl7.org/CodeSystem/v3-RoleCode',
    'version': 'v1',
    'code': 'FAMMEMB',
    'display': 'family member'}],
  'text': 'family member'}}

In [11]:
def download(url, file_path):
    '''Download a file from a URL to a local file path'''
    with open(os.path.expanduser(file_path), "wb") as file:
        response = requests.get(url)
        file.write(response.content)

In [12]:
# Set up drs client
from fasp.loc import kfDRSClient

cl = kfDRSClient("~/.keys/kf_credentials.json")

### This is the filter

In [13]:
documentQuery = "DocumentReference?&category=RNA-Seq&type=Gene-Expression,Gene-Expression-Quantifications&location:missing=false"
exp_docs = runQuery(documentQuery)

Total  Resources: 1155


In [26]:
def download_study_files(documents, folder):
    for d in exp_docs:
        url = d['content'][0]['attachment']['url']
        print(d['type']['text'])

        print(url)
        drs_id = url.split('/')[-1]
        drs_response = cl.get_object(drs_id)
        #print(json.dumps(drs_response, indent=3))
        file_name = drs_response["name"]
        print(file_name)
        d_url= cl.get_access_url(drs_id, 's3')
        #download(d_url,f'{folder}/{file_name}')



    print(d['content'][1]['format']['display'])

    print('_'*80)

In [27]:
folder = '/Users/yourpath/yourfolder'
download_study_files(exp_docs, folder)

Gene Expression
drs://data.kidsfirstdrc.org/65ee3d14-a471-42f2-857f-e75dbd5e8756
fe12f0da-e170-4345-b55c-889c27333ab9.rsem.genes.results.gz
Gene Expression
drs://data.kidsfirstdrc.org/b841fc14-0769-4a95-9824-4d70b75cd5d6
808baf95-9a60-4ecc-a393-5910d054b581.rsem.genes.results.gz
Gene Expression
drs://data.kidsfirstdrc.org/8b8f6e3b-65be-4f93-a259-75fc306ba14a
9ece8e3a-6e4d-4fb4-b12f-c5ab2000fa38.kallisto.abundance.tsv.gz
Gene Expression
drs://data.kidsfirstdrc.org/eb3d5066-5af5-4935-82aa-13fe9f68c3d5
0524d200-76d3-4929-87d1-d68784df44f1.rsem.genes.results.gz
Gene Expression
drs://data.kidsfirstdrc.org/d4541867-d8f1-45a0-b889-4229b53e1ab9
fe12f0da-e170-4345-b55c-889c27333ab9.kallisto.abundance.tsv.gz
Gene Expression
drs://data.kidsfirstdrc.org/bff411a0-b356-4042-b3eb-283556935472
9ece8e3a-6e4d-4fb4-b12f-c5ab2000fa38.rsem.genes.results.gz
Gene Expression
drs://data.kidsfirstdrc.org/38d738d1-c8ec-4be8-be1d-1d88c29e16e0
487ba87c-053d-47dc-a5bc-2e7e77733ace.kallisto.abundance.tsv.gz
Gene Exp

Gene Expression
drs://data.kidsfirstdrc.org/c88afa7f-5249-4ef8-8fe5-f05499eb0c9f
740540f5-54e6-419e-938b-e99604c733c7.rsem.genes.results.gz
Gene Expression
drs://data.kidsfirstdrc.org/d267d9e9-90be-4425-9d16-51f923587f6f
c9f21f29-7c3c-47ee-94ea-c4a7559d251d.kallisto.abundance.tsv.gz
Gene Expression
drs://data.kidsfirstdrc.org/26648053-fd97-4066-bfa1-199af74d470a
701f0e2f-49de-4429-81c8-d5162cdc0990.rsem.genes.results.gz
Gene Expression
drs://data.kidsfirstdrc.org/4c493052-c2db-47ef-9eb1-8b247f2e2b27
701f0e2f-49de-4429-81c8-d5162cdc0990.kallisto.abundance.tsv.gz
Gene Expression
drs://data.kidsfirstdrc.org/6fc6b676-342f-4d31-b3cb-7be9456ddd6b
0fc85294-4ec8-43e9-bc03-b4e4cf894d2f.kallisto.abundance.tsv.gz
Gene Expression
drs://data.kidsfirstdrc.org/2316cb76-a032-474a-865a-b8220d284348
c9f21f29-7c3c-47ee-94ea-c4a7559d251d.rsem.genes.results.gz
tsv
________________________________________________________________________________


In [None]:
def unpack_specimen(specimen_id, specimenList=None, verbose = False): 
    if specimenList == None:
        specimenList = []
        
    specimens = runQuery(f"Specimen?_id={specimen_id}", verbose = verbose)
    specimenList.insert(0, specimens[0])
    specString = ""
    if 'parent' in specimens[0]:
        parent_specimen = specimens[0]['parent'][0]['reference']
        pid = parent_specimen.split('/')[-1]
        specString = unpack_specimen(pid, specimenList)
    else:
        indent=""
        specString = ""
        for spec in specimenList:
            sid = f"Specimen/{spec['id']}"
            stype = spec['type']['text']
            if specString != "" :
                specString = specString + '->'
            specString = specString +  stype
            if verbose:
                print(f"{indent}{stype}\t{sid}")
            indent +="\t"
    return specString

In [82]:
# dataframe looks like
#   columns: 
#       document_reference_attachment_uri (either drs:// or gs://), 
#       drs_uri, (if it exists)
#       document_reference_reference, ( DocumentReference/1234 )
#       file_path,  (downloaded document_reference_attachment_uri on local file system)
#       specimen_bodySite, 
#       condition_code,
#       research_study_reference, (full uri of research_study https:/example.com/fhir/ResearchStudy/1234)
#       patient_reference, (full uri  https:/example.com/fhir/Patient/123)
#       specimen_reference, (full uri  https:/example.com/fhir/Specimen/123)
#       ... extra columns (eg. observations) allowed
#   index:
#       document_reference_reference

import pandas as pd

def query_include_fhir(query, verbose=True):
    if verbose:
        print("running query")
    documents = runQuery(query)
    records = []
    if verbose:
        print("building dataframe")
    base = FHIR_SERVER+'/'
    for d in documents:
        url = d['content'][0]['attachment']['url']
        title = d['content'][0]['attachment']['title']
        type = d['type']['text']
        drs_id = url.split('/')[-1]
        drs_response = cl.get_object(drs_id)
        drs_file_name = drs_response["name"]
        subject_id = d['subject']['reference']
        specimen_reference = d['context']['related'][0]['reference']
        body_site = unpack_specimen(specimen_reference.split('/')[-1])

        #format = "xyz"
        doc_reference = f"DocumentReference/{d['id']}"
        
        if url.startswith('drs'):
            drs_uri = url
        else:
            drs_uri = ""
        records.append({"document_reference_attachment_uri": url,
                        "drs_uri":drs_uri,
                        "document_reference_reference":base+doc_reference,
                        "file_path":title,
                        "drs_file_name":drs_file_name,
                        "specimen_type": body_site,
                        "condition_code":"per study",
                        "research_study_reference":d['meta']['tag'][0]['code'],
                        "patient_reference":base+subject_id,
                        "specimen_reference":base+specimen_reference,                        
                        #"format":format,
                        "type":type,
                        "drs_id":drs_id})

    df = pd.DataFrame(records)
    return df

In [83]:
df3 = query_include_fhir(documentQuery)
df3

running query
Total  Resources: 1155
building dataframe


Unnamed: 0,document_reference_attachment_uri,drs_uri,document_reference_reference,file_path,drs_file_name,specimen_type,condition_code,research_study_reference,patient_reference,specimen_reference,type,drs_id
0,drs://data.kidsfirstdrc.org/755bb5b4-87fe-4d1f...,drs://data.kidsfirstdrc.org/755bb5b4-87fe-4d1f...,https://include-api-fhir-service.includedcc.or...,3e85f5ef-ef23-4ad1-b858-b58cfab3dad7.rsem.gene...,3e85f5ef-ef23-4ad1-b858-b58cfab3dad7.rsem.gene...,Whole blood->RNA,per study,HTP,https://include-api-fhir-service.includedcc.or...,https://include-api-fhir-service.includedcc.or...,Gene Expression,755bb5b4-87fe-4d1f-98ca-8f940f8b6d3d
1,drs://data.kidsfirstdrc.org/245d53d8-495b-411a...,drs://data.kidsfirstdrc.org/245d53d8-495b-411a...,https://include-api-fhir-service.includedcc.or...,62e4f4e3-bd88-41b0-820f-7105e6b81c53.kallisto....,62e4f4e3-bd88-41b0-820f-7105e6b81c53.kallisto....,Whole blood->RNA,per study,HTP,https://include-api-fhir-service.includedcc.or...,https://include-api-fhir-service.includedcc.or...,Gene Expression,245d53d8-495b-411a-b048-14041ec7fdd2
2,drs://data.kidsfirstdrc.org/3101d68b-ac4f-4425...,drs://data.kidsfirstdrc.org/3101d68b-ac4f-4425...,https://include-api-fhir-service.includedcc.or...,e26776ad-db1a-4a02-9157-93999f2fd798.rsem.isof...,e26776ad-db1a-4a02-9157-93999f2fd798.rsem.isof...,Whole blood->RNA,per study,HTP,https://include-api-fhir-service.includedcc.or...,https://include-api-fhir-service.includedcc.or...,Gene Expression,3101d68b-ac4f-4425-a103-f8ca1dc7c955
3,drs://data.kidsfirstdrc.org/fbb132a4-ef4e-4959...,drs://data.kidsfirstdrc.org/fbb132a4-ef4e-4959...,https://include-api-fhir-service.includedcc.or...,85b1c4ca-ab0e-451d-8e53-72893cde65c2.kallisto....,85b1c4ca-ab0e-451d-8e53-72893cde65c2.kallisto....,Whole blood->RNA,per study,HTP,https://include-api-fhir-service.includedcc.or...,https://include-api-fhir-service.includedcc.or...,Gene Expression,fbb132a4-ef4e-4959-9707-7c622464d6f0
4,drs://data.kidsfirstdrc.org/5eb1f446-9abf-45d4...,drs://data.kidsfirstdrc.org/5eb1f446-9abf-45d4...,https://include-api-fhir-service.includedcc.or...,3e39c08e-301e-4de4-b102-509af82a7c02.rsem.isof...,3e39c08e-301e-4de4-b102-509af82a7c02.rsem.isof...,Whole blood->RNA,per study,HTP,https://include-api-fhir-service.includedcc.or...,https://include-api-fhir-service.includedcc.or...,Gene Expression,5eb1f446-9abf-45d4-9d48-e234cfa17eb6
...,...,...,...,...,...,...,...,...,...,...,...,...
1150,drs://data.kidsfirstdrc.org/aed40326-c0ff-4846...,drs://data.kidsfirstdrc.org/aed40326-c0ff-4846...,https://include-api-fhir-service.includedcc.or...,de78590f-ecec-41d4-82d5-8d20431b8481.kallisto....,de78590f-ecec-41d4-82d5-8d20431b8481.kallisto....,Whole blood->RNA,per study,HTP,https://include-api-fhir-service.includedcc.or...,https://include-api-fhir-service.includedcc.or...,Gene Expression,aed40326-c0ff-4846-9130-9f2494f87735
1151,drs://data.kidsfirstdrc.org/73425691-f12c-4528...,drs://data.kidsfirstdrc.org/73425691-f12c-4528...,https://include-api-fhir-service.includedcc.or...,92ef5872-5d8b-42b5-b782-04f3d5c5a413.rsem.isof...,92ef5872-5d8b-42b5-b782-04f3d5c5a413.rsem.isof...,Whole blood->RNA,per study,HTP,https://include-api-fhir-service.includedcc.or...,https://include-api-fhir-service.includedcc.or...,Gene Expression,73425691-f12c-4528-ad9f-af2dd76aaf14
1152,drs://data.kidsfirstdrc.org/35fba390-f592-4191...,drs://data.kidsfirstdrc.org/35fba390-f592-4191...,https://include-api-fhir-service.includedcc.or...,54580cb8-9215-403b-9335-bee05be91550.rsem.gene...,54580cb8-9215-403b-9335-bee05be91550.rsem.gene...,Whole blood->RNA,per study,HTP,https://include-api-fhir-service.includedcc.or...,https://include-api-fhir-service.includedcc.or...,Gene Expression,35fba390-f592-4191-b596-1ec3125c339e
1153,drs://data.kidsfirstdrc.org/64c85338-936f-42a0...,drs://data.kidsfirstdrc.org/64c85338-936f-42a0...,https://include-api-fhir-service.includedcc.or...,0a60970e-026c-417c-a75f-981f50cf7321.rsem.isof...,0a60970e-026c-417c-a75f-981f50cf7321.rsem.isof...,Whole blood->RNA,per study,HTP,https://include-api-fhir-service.includedcc.or...,https://include-api-fhir-service.includedcc.or...,Gene Expression,64c85338-936f-42a0-bc32-dbf7f21e0bac


In [84]:
df3.to_csv(f"{include_study_id}_files.txt", sep='\t')

## Examine a Specimen

In [18]:
spec = runQuery("Specimen?_id=299259")
spec

Total  Resources: 1


[{'resourceType': 'Specimen',
  'id': '299259',
  'meta': {'versionId': '1',
   'lastUpdated': '2022-03-12T14:04:57.262+00:00',
   'source': '#9iNqUjR2rBGOmwCb',
   'tag': [{'system': 'https://include.org/htp/fhir/researchstudy',
     'code': 'HTP'}]},
  'identifier': [{'use': 'official',
    'system': 'https://include.org/htp/fhir/specimen',
    'value': 'HTP0349B_RNA_paxgene'}],
  'status': 'available',
  'type': {'coding': [{'system': 'http://purl.obolibrary.org/obo/obi.owl',
     'version': 'v1',
     'code': 'OBI:0000880',
     'display': 'RNA extract'}],
   'text': 'RNA'},
  'subject': {'reference': 'Patient/5242'},
  'parent': [{'reference': 'Specimen/296834'}],
  'collection': {'_collectedDateTime': {'extension': [{'url': 'http://hl7.org/fhir/StructureDefinition/cqf-relativeDateTime',
      'extension': [{'url': 'target',
        'valueReference': {'reference': 'Patient/5242'}},
       {'url': 'targetPath', 'valueString': 'birthDate'},
       {'url': 'relationship', 'valueCode'

In [19]:
parent_spec = runQuery("Specimen?_id=296834")
parent_spec

Total  Resources: 1


[{'resourceType': 'Specimen',
  'id': '296834',
  'meta': {'versionId': '1',
   'lastUpdated': '2022-03-12T14:03:23.525+00:00',
   'source': '#fTylC4gbeWUaaYbC',
   'tag': [{'system': 'https://include.org/htp/fhir/researchstudy',
     'code': 'HTP'}]},
  'identifier': [{'use': 'official',
    'system': 'https://include.org/htp/fhir/specimen',
    'value': 'HTP0349B_Whole blood'}],
  'status': 'unavailable',
  'type': {'coding': [{'system': 'http://purl.obolibrary.org/obo/ncit.owl',
     'version': 'v1',
     'code': 'C17610',
     'display': 'Blood Sample'}],
   'text': 'Whole blood'},
  'subject': {'reference': 'Patient/5242'},
  'collection': {'_collectedDateTime': {'extension': [{'url': 'http://hl7.org/fhir/StructureDefinition/cqf-relativeDateTime',
      'extension': [{'url': 'target',
        'valueReference': {'reference': 'Patient/5242'}},
       {'url': 'targetPath', 'valueString': 'birthDate'},
       {'url': 'relationship', 'valueCode': 'after'},
       {'url': 'offset',
    

In [76]:
def unpack_specimen(specimen_id, specimenList=None, verbose = False): 
    if specimenList == None:
        specimenList = []
        
    specimens = runQuery(f"Specimen?_id={specimen_id}", verbose = verbose)
    specimenList.insert(0, specimens[0])
    specString = ""
    if 'parent' in specimens[0]:
        parent_specimen = specimens[0]['parent'][0]['reference']
        pid = parent_specimen.split('/')[-1]
        specString = unpack_specimen(pid, specimenList)
    else:
        indent=""
        specString = ""
        for spec in specimenList:
            sid = f"Specimen/{spec['id']}"
            stype = spec['type']['text']
            if specString != "" :
                specString = specString + '->'
            specString = specString +  stype
            if verbose:
                print(f"{indent}{stype}\t{sid}")
            indent +="\t"
    return specString

In [77]:
unpack_specimen('299259')

'Whole blood->RNA'