# FHIR-Aggregator
## Explore data in the test google fhir service

### install and test dependencies

In [1]:
pip install dtale

Collecting dtale
  Downloading dtale-3.16.1-py2.py3-none-any.whl.metadata (16 kB)
Collecting dash-colorscales (from dtale)
  Downloading dash_colorscales-0.0.4.tar.gz (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.3/62.3 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting dash-daq (from dtale)
  Downloading dash_daq-0.5.0.tar.gz (642 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m642.7/642.7 kB[0m [31m20.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting squarify (from dtale)
  Downloading squarify-0.4.4-py3-none-any.whl.metadata (600 bytes)
Collecting strsimpy (from dtale)
  Downloading strsimpy-0.2.1-py3-none-any.whl.metadata (20 kB)
Collecting dash-bootstrap-components (from dtale)
  Downloading dash_bootstrap_components-1.7.1-py3-none-any.whl.metadata (17 kB)
Collecting lz4 (from dtale)
  Downloading lz4-4.4.3-cp311-cp

# install the query tool

In [3]:
pip install git+https://github.com/FHIR-Aggregator/fhir-query.git

Collecting git+https://github.com/FHIR-Aggregator/fhir-query.git
  Cloning https://github.com/FHIR-Aggregator/fhir-query.git to /tmp/pip-req-build-fbiwv6ak
  Running command git clone --filter=blob:none --quiet https://github.com/FHIR-Aggregator/fhir-query.git /tmp/pip-req-build-fbiwv6ak
  Resolved https://github.com/FHIR-Aggregator/fhir-query.git to commit 0358fc754a44d9ae55fd16575e5a314dc8c1917d
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting halo (from fhir_query==0.1.0)
  Downloading halo-0.0.31.tar.gz (11 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting fhir.resources==8.0.0b4 (from fhir_query==0.1.0)
  Downloading fhir.resources-8.0.0b4-py2.py3-none-any.whl.metadata (47 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.4/47.4 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting dotty-dict (from fhir

# verify the tool was installed

In [4]:
!fq

[0mUsage: fq [OPTIONS] COMMAND [ARGS]...

  Run FHIR GraphDefinition traversal.

Options:
  --help  Show this message and exit.

Commands:
  main*       Run FHIR GraphDefinition traversal.
  dataframe   Create dataframes from the local db.
  summarize   Summarize the aggregation results.
  visualize   Visualize the aggregation results.
  vocabulary  Retrieve Vocabulary Observation and ResearchStudy resources...
[0m[0m

# retrieve vocabularies used on commonly used resources

In [5]:
%env FHIR_BASE=https://google-fhir.test-fhir-aggregator.org
!fq vocabulary vocabulary.tsv --fhir-base-url $FHIR_BASE

env: FHIR_BASE=https://google-fhir.test-fhir-aggregator.org
[0m[K[0m[?25h[0m[32m✔[39m Wrote 155 vocabularies to vocabulary.tsv
[0m[K[0m[?25h[0m[0m

### show vocabularies

In [10]:
import pandas as pd
import dtale.app as dtale_app
import dtale
df = pd.read_csv('vocabulary.tsv', sep='\t')
dtale_app.USE_COLAB = True
dtale.show(df)

https://zuqdi8bb94-496ff2e9c6d22116-40000-colab.googleusercontent.com/dtale/main/2

# retrieve a pre-defined set of queries, a GraphDefinition
## in this case, retrieve an entire study

In [27]:
!wget https://raw.githubusercontent.com/FHIR-Aggregator/fhir-query/refs/heads/main/graph-definitions/R5/ResearchStudyGraph.yaml


--2025-02-11 23:24:35--  https://raw.githubusercontent.com/FHIR-Aggregator/fhir-query/refs/heads/main/graph-definitions/R5/ResearchStudyGraph.yaml
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.111.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1840 (1.8K) [text/plain]
Saving to: ‘ResearchStudyGraph.yaml.2’


2025-02-11 23:24:36 (25.0 MB/s) - ‘ResearchStudyGraph.yaml.2’ saved [1840/1840]



# export the data to a local database

In [28]:
%env  FHIR_BASE=https://google-fhir.test-fhir-aggregator.org
# export a study using a set of stored queries
!fq --fhir-base-url $FHIR_BASE  --graph-definition-file-path  ResearchStudyGraph.yaml  --path '/ResearchStudy?identifier=TCGA-KIRC'

env: FHIR_BASE=https://google-fhir.test-fhir-aggregator.org
research-study-graph is valid FHIR R5 GraphDefinition
[0m[K[0m[?25h[0m[32m✔[39m Fetching https://google-fhir.test-fhir-aggregator.org/ResearchStudy?identifier=TCGA-KIRC
[0m[K[0m[?25h[0m[32m✔[39m Processing link: ResearchSubject/study={path} with 1 ResearchStudy(s)
[0m[K[0m[?25h[0m[32m✔[39m Processing link: Group/part-of-study={path}&_count=1000&_total=accurate with 1 ResearchStudy(s)
[0m[K[0m[?25h[0m[32m✔[39m Processing link: Patient/part-of-study={path}&_count=1000&_total=accurate with 1 ResearchStudy(s)
[0m[K[0m[?25h[0m[32m✔[39m Processing link: Specimen/part-of-study={path}&_count=1000&_total=accurate with 1 ResearchStudy(s)
[0m[K[0m[?25h[0m[32m✔[39m Processing link: Observation/part-of-study={path}&_count=1000&_total=accurate with 1 ResearchStudy(s)
[0m[K[0m[?25h[0m[32m✔[39m Processing link: Procedure/part-of-study={path}&_count=1000&_total=accurate with 1 ResearchStudy(s)



pandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.



[1m[36m⠹[0m Processing link: ServiceRequest/part-of-study={path}&_count=1000&_total=accurate with 1 Rese (...)


pandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.



[1m[36m⠋[0m Processing link: ServiceRequest/part-of-study={path}&_count=1000&_total=accurate with 1 Rese (...)


pandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.



[1m[36m⠧[0m Processing link: ServiceRequest/part-of-study={path}&_count=1000&_total=accurate with 1 Rese (...)


pandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.



[0m[K[0m[?25h[0m[32m✔[39m Processing link: ServiceRequest/part-of-study={path}&_count=1000&_total=accurate with 1 ResearchStudy(s)
[0m[K[0m[?25h[0m[32m✔[39m Processing link: ImagingStudy/part-of-study={path}&_count=1000&_total=accurate with 1 ResearchStudy(s)
[0m[K[0m[?25h[0m[32m✔[39m Processing link: Condition/part-of-study={path}&_count=1000&_total=accurate with 1 ResearchStudy(s)
[0m[K[0m[?25h[0m[31m✖[39m Could not find any resources for MedicationAdministration->Medication link: {'params': '_id={path}&_count=1000&_total=accurate', 'path': 'MedicationAdministration.medication.reference.reference', 'sourceId': 'MedicationAdministration', 'targetId': 'Medication'}
[0m[K[0m[?25h[0mAggregated Results: {'Condition': 537, 'DocumentReference': 29352, 'Group': 16, 'ImagingStudy': 2177, 'MedicationAdministration': 1074, 'Observation': 27286, 'Patient': 537, 'Procedure': 1616, 'ResearchStudy': 1, 'ResearchSubject': 537, 'ServiceRequest': 27163, 'Specimen': 171

In [29]:
# summarize the extracted data
!fq summarize

Condition[0m:[0m
[0m  [0mcount[0m:[0m [0m537[0m
[0m  [0mreferences[0m:[0m
[0m    [0mEncounter[0m:[0m
[0m      [0mcount[0m:[0m [0m537[0m
[0m    [0mObservation[0m:[0m
[0m      [0mcount[0m:[0m [0m4889[0m
[0m    [0mPatient[0m:[0m
[0m      [0mcount[0m:[0m [0m537[0m
[0m    [0mResearchStudy[0m:[0m
[0m      [0mcount[0m:[0m [0m537[0m
[0mDocumentReference[0m:[0m
[0m  [0mcount[0m:[0m [0m29352[0m
[0m  [0mreferences[0m:[0m
[0m    [0mGroup[0m:[0m
[0m      [0mcount[0m:[0m [0m16[0m
[0m    [0mPatient[0m:[0m
[0m      [0mcount[0m:[0m [0m29336[0m
[0m    [0mResearchStudy[0m:[0m
[0m      [0mcount[0m:[0m [0m29352[0m
[0m    [0mServiceRequest[0m:[0m
[0m      [0mcount[0m:[0m [0m27163[0m
[0m    [0mSpecimen[0m:[0m
[0m      [0mcount[0m:[0m [0m35332[0m
[0mGroup[0m:[0m
[0m  [0mcount[0m:[0m [0m16[0m
[0m  [0mreferences[0m:[0m
[0m    [0mPatient[0m:[0m
[0m      [0mcount[0m:[0m [0m8592

In [31]:
# create a dataframe from  the extracted data
!fq dataframe


Saved /tmp/fhir-graph.tsv[0m
[0mSaved /tmp/fhir-graph.tsv[0m
[0m[0m

In [4]:
import pandas as pd
import dtale.app as dtale_app
import dtale
df = pd.read_csv('/tmp/fhir-graph.tsv')
dtale_app.USE_COLAB = True
dtale.show(df)



In [None]:
pip install lifelines