In [1]:
from collections import Counter
import sys

sys.path.append('..')

In [2]:
from filter_clinvar_xml import filter_xml, pprint, iterate_cvs_from_xml
from clinvar_xml_io.clinvar_xml_io import *

## Filter dataset

In [3]:
def drug_response(x: ClinVarRecord):
    return 'drug response' in x.clinical_significance_list or x.trait_set_type == 'DrugResponse'

In [4]:
filter_xml(
    input_xml='/home/april/projects/opentargets/ClinVarFullRelease_00-latest.xml.gz',
    output_xml='/home/april/projects/opentargets/drug-response.xml.gz',
    filter_fct=drug_response,
    max_num=None
)

INFO:filter_clinvar_xml:Records written: 4970


In [3]:
drug_xml = '/home/april/projects/opentargets/drug-response.xml.gz'

## Submitters of drug response data

In [4]:
# For this we can't use ClinVarRecord as it only includes the RCV - submitter is in the SCV element
all_submitters = []
for raw_cvs_xml in iterate_cvs_from_xml(drug_xml):
    scvs = find_elements(raw_cvs_xml, './ClinVarAssertion/ClinVarSubmissionID')
    submitters = [scv.attrib.get('submitter') for scv in scvs]
    all_submitters.extend(submitters)

In [5]:
submitter_counts = Counter(all_submitters)

In [6]:
len(submitter_counts)

37

In [7]:
submitter_counts

Counter({'PharmGKB': 401,
         'OMIM': 84,
         'Institute of Microbiology; University Hospital and Univeristy of Lausanne': 3,
         'Neurology IV Unit; Fondazione Istituto Neurologico C. Besta': 1,
         "Center for Pediatric Genomic Medicine,Children's Mercy Hospital and Clinics": 5,
         'Laboratory for Molecular Medicine,Mass General Brigham Personalized Medicine': 58,
         "Center for Advanced Molecular Diagnostics, Cytogenetics Laboratory,Brigham and Women's Hospital": 2,
         'Center for Personalized Medicine, Roswell Park Cancer Institute': 1,
         'Albrecht-Kossel-Institute,Medical University Rostock': 46,
         'Genetic Testing Lab, Ashok and Rita Patel Institute of Integrated Study and Research in Biotechnology and Allied Sciences': 66,
         'Oxford Haemato-Oncology Service,Oxford University Hospitals NHS Foundation Trust': 30,
         'Kopetz Lab,MD Anderson Cancer Center': 2,
         'Division of Hematology/Oncology, Florida,Mayo Cli

## Drug response attributes provided

In [8]:
dataset = ClinVarDataset(drug_xml)

In [9]:
all_trait_names = set()

for r in dataset:
    for trait in r.traits:
        if trait.trait_xml.attrib['Type'] == 'DrugResponse':
            all_trait_names.add(trait.preferred_or_other_valid_name)

In [10]:
all_trait_names

{'AKT1 Inhibitor response',
 'Aminoglycoside-induced deafness',
 'Androgen deprivation therapy response',
 'Anti-SEMA4D Monoclonal Antibody VX15/2503',
 'Azathioprine response',
 'CYP2C19: decreased function',
 'CYP2C19: increased function',
 'CYP2C19: no function',
 'CYP2C19: normal function',
 'CYP2C19: uncertain function',
 'Cabozantinib resistance',
 'Carbamazepine hypersensitivity',
 'Citalopram response',
 'Clopidogrel response',
 'Codeine response',
 'Corticosteroids response',
 'Dabrafenib response',
 'Debrisoquine, poor metabolism of',
 'Debrisoquine, ultrarapid metabolism of',
 'Deutetrabenazine response',
 'Dopamine agonists response',
 'Doxorubicin response',
 'Efavirenz response',
 'Entrectinib resistance',
 'Erlotinib response',
 'Escitalopram response',
 'Everolimus response',
 'Ezetimibe response',
 'Fluorouracil response',
 'Flurbiprofen response',
 'Gefitinib response',
 'Gemcitabine response',
 'Gentamicin response',
 'Glipizide response',
 'Histone Methylation Thera

## Summary

* 4970 drug response records in June 2022 data
* 37 different submitters - not just PharmGKB!
* Not much in ClinVar XML besides the trait name itself
    * According to [clinical significance docs](https://www.ncbi.nlm.nih.gov/clinvar/docs/clinsig/) they "anticipate adding more specific drug response terms based on a recommendation by CPIC."
* Trait names have variable informativeness
    * Often just "response"
    * Can include PharmGKB categories like `Efficacy`, `Dosage`, `Toxicity`, etc. (possibly these are just PharmGKB submissions?)
    * Can occasionally include indication of direction - e.g. `Cabozantinib resistance`, `Carbamazepine hypersensitivity`, `Debrisoquine, ultrarapid metabolism of`, `Suxamethonium response - slow metabolism`
