# Generate Report From the ELLA REST API

Let's load in the data we fetched from `Creating-Custom-Reports/Parse-Analysis-From-REST-API.ipynb`.

In [7]:
import json
import pandas as pd
import os
import copy
from pprint import pprint

## All data will be loaded in asyncronously from REST APIs

with open('/data/analysis_rest_api_data.json', 'r') as reader:
    analysis_data = json.load(reader)

with open('/data/mock_patient_data.json', 'r') as reader:
    mock_patient_data = json.load(reader)
    
acmg_codes = {
    '1': 'Benign',
    '2': 'Likely Benign',
    '3': 'Variant of Uncertain Significance',
    '4': 'Likely Pathogenic',
    '5': 'Pathogenic',
    'U': 'Unknown'
}
ranks = ['U', '1', '2', '3', '4', '5']

In [None]:
! pip install tabulate

In [48]:
filtered_transcripts_report_data = []
for allele_assessment in analysis_data['allele_assessments']:
    classification_int_code = allele_assessment['allele_assessment']['classification']
    allele_report = allele_assessment['allele_report']
    transcripts = allele_assessment['annotation']['transcripts']
    # This just gives the name of the filtered transcripts
    filtered_transcripts_ids = allele_assessment['annotation']['filtered_transcripts']
    filtered_transcripts_data =  list(filter(lambda n : n['transcript'] in filtered_transcripts_ids, transcripts))
    for n in filtered_transcripts_data:
        filtered_transcripts_report_data.append(n)
    allele_assessment['filterd_transcripts'] = filtered_transcripts_report_data
    
analysis_data['allele_assessments'][0]

{'allele_assessment': {'allele_id': 3,
  'analysis_id': 2,
  'annotation_id': 3,
  'attachment_ids': [],
  'classification': '4',
  'custom_annotation_id': None,
  'date_created': '2020-10-19T10:49:44.630277+00:00',
  'date_superceeded': None,
  'evaluation': {'acmg': {'included': [{'code': 'PVS1',
      'comment': '',
      'match': None,
      'op': None,
      'source': 'suggested',
      'uuid': '5104d287-683d-4391-a75b-5694c65f2b88'},
     {'code': 'PPxPM2',
      'comment': '',
      'match': None,
      'op': None,
      'source': 'suggested',
      'uuid': 'b44315f8-2db9-4605-b824-acf51e50ac31'}],
    'suggested': [{'code': 'REQ_GP_last_exon_not_important',
      'match': ['LENI'],
      'op': '$in',
      'source': 'genepanel.last_exon_important'},
     {'code': 'REQ_GP_LOF_missense',
      'match': ['ANY'],
      'op': '$in',
      'source': 'genepanel.disease_mode'},
     {'code': 'REQ_not_in_last_exon',
      'match': ['no'],
      'op': '$in',
      'source': 'transcript.i

In [87]:
# We are assuming we only need data for the first sample!
columns = ['Gene', 'Variant', 'Zygosity', 'Variant Classification']

# We also need to rank the variants and go for the most conservative classification
gene_data = []
for allele_assessment in analysis_data['allele_assessments']:
    transcripts = allele_assessment['filterd_transcripts']
    for transcript in transcripts:
        d = {
            'Gene': transcript['symbol'],
            'Variant': transcript['HGVSc'],
            'Zygosity': allele_assessment['samples'][0]['genotype']['type'],
            'Variant Classification': acmg_codes[str(allele_assessment['allele_assessment']['classification'])],
            'Variant Classification Code': allele_assessment['allele_assessment']['classification']
        }
        gene_data.append(d)

#print(gene_data)
import random
random.shuffle(gene_data)
gene_data
df = pd.DataFrame.from_records(gene_data)

df = df.drop_duplicates()
df
#df['Variant Classififica']

Unnamed: 0,Gene,Variant,Zygosity,Variant Classification,Variant Classification Code
0,BRCA2,c.10G>T,Heterozygous,Likely Pathogenic,4
1,BRCA2,c.198A>G,Heterozygous,Likely Pathogenic,4
2,BRCA2,c.97G>T,Heterozygous,Likely Pathogenic,4
3,BRCA2,c.72A>T,Heterozygous,Unknown,U
4,BRCA2,c.67+2T>A,Heterozygous,Unknown,U
5,BRCA2,c.72A>T,Heterozygous,Likely Pathogenic,4
7,BRCA2,c.10G>T,Heterozygous,Unknown,U
9,BRCA2,c.51_52delAC,Heterozygous,Unknown,U
15,BRCA2,c.97G>T,Heterozygous,Unknown,U
17,BRCA2,c.51_52delAC,Heterozygous,Likely Pathogenic,4


In [92]:
# We also need to rank the variants and go for the most conservative classification
# I think this will work, but it should be tested more

df = df.sort_values(by=['Variant Classification Code'], key=lambda x: x.map(acmg_codes))
df = df.drop_duplicates(subset=['Variant'])
df.drop(columns=['Variant Classification Code'])
df

Unnamed: 0,Gene,Variant,Zygosity,Variant Classification,Variant Classification Code
0,BRCA2,c.10G>T,Heterozygous,Likely Pathogenic,4
1,BRCA2,c.198A>G,Heterozygous,Likely Pathogenic,4
2,BRCA2,c.97G>T,Heterozygous,Likely Pathogenic,4
5,BRCA2,c.72A>T,Heterozygous,Likely Pathogenic,4
17,BRCA2,c.51_52delAC,Heterozygous,Likely Pathogenic,4
18,BRCA2,c.67+2T>A,Heterozygous,Likely Pathogenic,4


In [76]:
from tabulate import tabulate
markdown_table_str = tabulate(df, headers='keys', tablefmt='pipe', showindex=False)
mock_patient_data['markdown_table_str'] = markdown_table_str

In [93]:
from jinja2 import Environment, BaseLoader


report_header_table = """
# Report

## Sample-1

---

|  |  |  |  |
| --- | --- | --- | --- |
| **Patient Name:** | {{ patient_data.patient_last_name }}, {{ patient_data.patient_first_name }} | **Ordered By:** | {{test_data.test_requesting_physician}} |
| **Date of Birth:** | {{patient_data.patient_dob}} | **Sample Collection Date:** | {{ sample_data.sample_collection_date }} | 
| **Gender**: | {{patient_data.patient_gender}} | **Sample Accession Date:** | {{ sample_data.sample_accession_date }} |
| **Sample Type:** | {{sample_data.sample_type}} | **Report Data:** | {{report_data.report_date}} |
| **Sample ID/MRN:** | {{ sample_data.sample_id }} | | |
| **Test Accession:** | {{ test_data.test_accession }} | | | 
| **Test Ordered:** | {{test_data.test_ordered}} | | | 
| **Test Code:** | {{ test_data.test_code }} | | |
| **Test Indictation** | {{ test_data.test_indication }} | | |

---

## Result: Postitive

---

{{ markdown_table_str }}

"""

rtemplate = Environment(loader=BaseLoader()).from_string(report_header_table)
rendered_report_header_table = rtemplate.render(**mock_patient_data)
print(rendered_report_header_table)




# Report

## Sample-1

---

|  |  |  |  |
| --- | --- | --- | --- |
| **Patient Name:** | Smith, Todd | **Ordered By:** | Dr. Who |
| **Date of Birth:** | 12/11/1977 | **Sample Collection Date:** | 12/9/2020 | 
| **Gender**: | Male | **Sample Accession Date:** | 12/9/2020 |
| **Sample Type:** | blood | **Report Data:** | 12/9/2020 |
| **Sample ID/MRN:** | 1234567 | | |
| **Test Accession:** | BRCA122345 | | | 
| **Test Ordered:** | BRCA2 sequencing & Deletion/Duplication analysis | | | 
| **Test Code:** | ABCD | | |
| **Test Indictation** | Patient has family history of breast cancer. | | |

---

## Result: Postitive

---

| Gene   | Variant      | Zygosity     | Variant Classification   |
|:-------|:-------------|:-------------|:-------------------------|
| BRCA2  | c.10G>T      | Heterozygous | Likely Pathogenic        |
| BRCA2  | c.51_52delAC | Heterozygous | Likely Pathogenic        |
| BRCA2  | c.67+2T>A    | Heterozygous | Likely Pathogenic        |
| BRCA2  | c.72A>T      | He

In [94]:
# This doesn't work in the jupyterhub output
from IPython.display import display, Markdown, Latex, HTML
display(Markdown(rendered_report_header_table))



# Report

## Sample-1

---

|  |  |  |  |
| --- | --- | --- | --- |
| **Patient Name:** | Smith, Todd | **Ordered By:** | Dr. Who |
| **Date of Birth:** | 12/11/1977 | **Sample Collection Date:** | 12/9/2020 | 
| **Gender**: | Male | **Sample Accession Date:** | 12/9/2020 |
| **Sample Type:** | blood | **Report Data:** | 12/9/2020 |
| **Sample ID/MRN:** | 1234567 | | |
| **Test Accession:** | BRCA122345 | | | 
| **Test Ordered:** | BRCA2 sequencing & Deletion/Duplication analysis | | | 
| **Test Code:** | ABCD | | |
| **Test Indictation** | Patient has family history of breast cancer. | | |

---

## Result: Postitive

---

| Gene   | Variant      | Zygosity     | Variant Classification   |
|:-------|:-------------|:-------------|:-------------------------|
| BRCA2  | c.10G>T      | Heterozygous | Likely Pathogenic        |
| BRCA2  | c.51_52delAC | Heterozygous | Likely Pathogenic        |
| BRCA2  | c.67+2T>A    | Heterozygous | Likely Pathogenic        |
| BRCA2  | c.72A>T      | Heterozygous | Likely Pathogenic        |
| BRCA2  | c.97G>T      | Heterozygous | Likely Pathogenic        |
| BRCA2  | c.198A>G     | Heterozygous | Likely Pathogenic        |
