### Variant & clinical annotations overview

In [1]:
import numpy as np
import pandas as pd

#### Part 1: pharmgkb variant overview

In [2]:
# Contains information about the study population size, 
# biogeographical group and statistics for the variant annotations; 
# this file is cross-referenced against the 3 variant annotation files
study_parameter_df = pd.read_csv("../datasets/rawData-21-Dec-2022/pharmgkb/variantAnnotations/study_parameters.tsv",sep='\t')
print(study_parameter_df)
study_parameter_df.head()

       Study Parameters ID  Variant Annotation ID     Study Type  Study Cases  \
0               1449169927             1449169911   case/control         15.0   
1                982022171              982022165   case/control         99.0   
2               1451833880             1451833860         cohort          1.0   
3                982022155              982022148   case/control         99.0   
4               1451283504             1451283480  meta-analysis          NaN   
...                    ...                    ...            ...          ...   
32021           1451293460             1451293440    case series         39.0   
32022           1451293520             1451293500    case series         39.0   
32023           1451293504             1451293521    case series         39.0   
32024           1451293420             1451293404    case series         39.0   
32025           1451566940             1451566922            NaN          NaN   

       Study Controls      

Unnamed: 0,Study Parameters ID,Variant Annotation ID,Study Type,Study Cases,Study Controls,Characteristics,Characteristics Type,Frequency In Cases,Allele Of Frequency In Cases,Frequency In Controls,Allele Of Frequency In Controls,P Value,Ratio Stat Type,Ratio Stat,Confidence Interval Start,Confidence Interval Stop,Biogeographical Groups
0,1449169927,1449169911,case/control,15.0,50.0,"SCAR, MPE, SJS (cases and controls)",Disease,0.06,*35:08:01,0.001,*35:08:01,= 0.231,OR,10.45,0.4,270.41,East Asian
1,982022171,982022165,case/control,99.0,99.0,Immunocompromised patients were excluded. Pers...,Study Cohort,0.0,T,0.005,T,,OR,,,,"Multiple groups, Majority Caucasian, several N..."
2,1451833880,1451833860,cohort,1.0,,,,,,,,,,,,,Unknown
3,982022155,982022148,case/control,99.0,99.0,Immunocompromised patients were excluded. Pers...,Study Cohort,0.252,A,0.308,A,= 0.46,OR,0.74,0.4,1.38,"Multiple groups, Majority Caucasian, several N..."
4,1451283504,1451283480,meta-analysis,,,"meta-analysis of 27 studies, median minor alle...",Study Cohort,0.35,A,,,< 0.001,,,,,Multiple groups


In [3]:
# Contains associations in which the variant affects a phenotype, with or without drug information.
var_pheno_ann_df = pd.read_csv("../datasets/rawData-21-Dec-2022/pharmgkb/variantAnnotations/var_pheno_ann.tsv",sep='\t', on_bad_lines='skip')
print(var_pheno_ann_df)
var_pheno_ann_df.head()

       Variant Annotation ID                Variant/Haplotypes    Gene  \
0                 1449169911                    HLA-B*35:08:01   HLA-B   
1                  982022165                        rs45607939    NAT2   
2                 1451833860  CYP3A poor metabolizer phenotype   CYP3A   
3                  982022148                         rs1799930    NAT2   
4                 1451283480                        rs16969968  CHRNA5   
...                      ...                               ...     ...   
12820              827695506                         rs3745274  CYP2B6   
12821             1447984440                         rs2472677   NR1I2   
12822             1444936025                         rs2654754    DRD3   
12823             1450807601                         rs2472677   NR1I2   
12824             1450807610                         rs2472677   NR1I2   

                                             Drug(s)      PMID  \
0                                        lamo

Unnamed: 0,Variant Annotation ID,Variant/Haplotypes,Gene,Drug(s),PMID,Phenotype Category,Significance,Notes,Sentence,Alleles,Specialty Population
0,1449169911,HLA-B*35:08:01,HLA-B,lamotrigine,29238301,Toxicity,no,The allele was not significant when comparing ...,HLA-B *35:08:01 is not associated with likelih...,*35:08:01,
1,982022165,rs45607939,NAT2,sulfamethoxazole / trimethoprim,22850190,Toxicity,no,Minor allele frequencies were compared between...,Allele T is not associated with increased risk...,T,
2,1451833860,CYP3A poor metabolizer phenotype,CYP3A,loperamide,35815036,Toxicity,not stated,A patient with opioid use disorder showed slow...,CYP3A poor metabolizer phenotype is associated...,,
3,982022148,rs1799930,NAT2,sulfamethoxazole / trimethoprim,22850190,Toxicity,no,Minor allele frequencies were compared between...,Allele A is not associated with increased risk...,A,
4,1451283480,rs16969968,CHRNA5,,22071378,Other,yes,this was from meta-analysis of 27 studies but ...,Allele A is associated with increased severity...,A,


In [5]:
# Contains associations in which the variant affects a drug dose, response, metabolism, etc
var_drug_ann_df = pd.read_csv("../datasets/rawData-21-Dec-2022/pharmgkb/variantAnnotations/var_drug_ann.tsv",sep='\t', on_bad_lines='skip')
print(var_drug_ann_df)
var_drug_ann_df.head()

       Variant Annotation ID                      Variant/Haplotypes    Gene  \
0                 1451834452                     CYP3A4*1, CYP3A4*17  CYP3A4   
1                 1451159680                               rs5031016  CYP2A6   
2                 1183684657  CYP2D6 ultrarapid metabolizer genotype  CYP2D6   
3                 1451306860                               CYP2C9*11  CYP2C9   
4                 1448997750                     CYP2B6*1, CYP2B6*18  CYP2B6   
...                      ...                                     ...     ...   
11192             1448997209                               rs3745274  CYP2B6   
11193             1448617505                               rs3745274  CYP2B6   
11194             1448998426                               rs3745274  CYP2B6   
11195             1448107499                               rs3745274  CYP2B6   
11196             1450807575                               rs2472677   NR1I2   

          Drug(s)      PMID     Phenoty

Unnamed: 0,Variant Annotation ID,Variant/Haplotypes,Gene,Drug(s),PMID,Phenotype Category,Significance,Notes,Sentence,Alleles,Specialty Population
0,1451834452,"CYP3A4*1, CYP3A4*17",CYP3A4,nifedipine,15634941,"Other,""Metabolism/PK""",not stated,in vitro expression of the recombinant CYP3A4*...,CYP3A4 *17 is associated with decreased metabo...,*17,
1,1451159680,rs5031016,CYP2A6,warfarin,22248286,Dosage,no,No association was found between this variant ...,Allele G is not associated with increased dose...,G,
2,1183684657,CYP2D6 ultrarapid metabolizer genotype,CYP2D6,tramadol,18204346,Metabolism/PK,yes,"Median (+)R,R-tramadol area under the curve wa...",CYP2D6 ultra-metabolizer genotype is associate...,,
3,1451306860,CYP2C9*11,CYP2C9,warfarin,33350885,Dosage,not stated,This case suggests that CYP2C9 *11/*11 carrier...,CYP2C9 *11/*11 is associated with decreased do...,*11/*11,
4,1448997750,"CYP2B6*1, CYP2B6*18",CYP2B6,efavirenz,16495778,Metabolism/PK,yes,Please note that in the paper the allele was r...,CYP2B6 *1/*18 is associated with increased con...,*1/*18,


In [6]:
# Contains in vitro and functional analysis-type associations
var_fa_ann_df = pd.read_csv("../datasets/rawData-21-Dec-2022/pharmgkb/variantAnnotations/var_fa_ann.tsv",sep='\t', on_bad_lines='skip')
print(var_fa_ann_df)
var_fa_ann_df.head()

      Variant Annotation ID  \
0                1451148445   
1                1447814273   
2                1447814277   
3                1447990384   
4                1448281185   
...                     ...   
1805             1451914664   
1806             1451914660   
1807             1451914680   
1808             1451566922   
1809             1451566980   

                                     Variant/Haplotypes     Gene  \
0                                 CYP2C19*1, CYP2C19*17  CYP2C19   
1                                             rs9923231   VKORC1   
2                                            rs56314408   VKORC1   
3                                             rs1065852   CYP2D6   
4                                    CYP2B6*1, CYP2B6*6   CYP2B6   
...                                                 ...      ...   
1805                                            rs11615    ERCC1   
1806                                            rs13181    ERCC2   
1807           

Unnamed: 0,Variant Annotation ID,Variant/Haplotypes,Gene,Drug(s),PMID,Phenotype Category,Significance,Notes,Sentence,Alleles,Specialty Population
0,1451148445,"CYP2C19*1, CYP2C19*17",CYP2C19,normeperidine,30902024,,not stated,"In other in vitro experiments, normeperidine f...",CYP2C19 *17/*17 is associated with increased f...,*17/*17,
1,1447814273,rs9923231,VKORC1,,26847243,Other,no,,Allele T is not associated with transcription ...,T,
2,1447814277,rs56314408,VKORC1,,26847243,Other,yes,"In the European population, this SNPs is in hi...",Allele C is associated with increased transcri...,C,
3,1447990384,rs1065852,CYP2D6,bufuralol,2211621,Metabolism/PK,not stated,In vitro experiments showed a significant decr...,Allele A is associated with decreased activity...,A,
4,1448281185,"CYP2B6*1, CYP2B6*6",CYP2B6,bupropion,27439448,Efficacy,yes,The ratio of hydroxybupropion versus bupropion...,CYP2B6 *1/*1 is associated with increased acti...,*1/*1,


#### Part 2: pharmgkb clinical overview

In [4]:
# Contains all of the meta-data about each clinical annotation
clinical_annotations_df = pd.read_csv("../datasets/rawData-21-Dec-2022/pharmgkb/clinicalAnnotations/clinical_annotations.tsv",sep='\t', on_bad_lines='skip')
print(clinical_annotations_df)
clinical_annotations_df.head()

      Clinical Annotation ID  \
0                  981755803   
1                 1449311190   
2                  981204774   
3                 1449191690   
4                 1449191746   
...                      ...   
5028              1451553326   
5029              1451553595   
5030              1451553580   
5031              1451566760   
5032              1451567040   

                                     Variant/Haplotypes           Gene  \
0                                            rs75527207           CFTR   
1                                             rs4149056        SLCO1B1   
2                                             rs1799971          OPRM1   
3                                           rs141033578           CFTR   
4                                            rs78769542           CFTR   
...                                                 ...            ...   
5028                                         rs45445694  C18orf56;TYMS   
5029                   

Unnamed: 0,Clinical Annotation ID,Variant/Haplotypes,Gene,Level of Evidence,Level Override,Level Modifiers,Score,Phenotype Category,PMID Count,Evidence Count,Drug(s),Phenotype(s),Latest History Date (YYYY-MM-DD),URL,Specialty Population
0,981755803,rs75527207,CFTR,1A,,Rare Variant; Tier 1 VIP,234.875,Efficacy,28,30,ivacaftor,Cystic Fibrosis,2021-03-24,https://www.pharmgkb.org/clinicalAnnotation/98...,Pediatric
1,1449311190,rs4149056,SLCO1B1,3,,Tier 1 VIP,2.0,Dosage,1,1,mercaptopurine;methotrexate,Precursor Cell Lymphoblastic Leukemia-Lymphoma,2021-03-24,https://www.pharmgkb.org/clinicalAnnotation/14...,Pediatric
2,981204774,rs1799971,OPRM1,4,,,-2.0,Efficacy,2,3,Drugs used in nicotine dependence;nicotine,Tobacco Use Disorder,2021-03-24,https://www.pharmgkb.org/clinicalAnnotation/98...,
3,1449191690,rs141033578,CFTR,1A,,Rare Variant; Tier 1 VIP,200.0,Efficacy,1,3,ivacaftor,Cystic Fibrosis,2021-03-24,https://www.pharmgkb.org/clinicalAnnotation/14...,
4,1449191746,rs78769542,CFTR,1A,,Rare Variant; Tier 1 VIP,200.0,Efficacy,1,3,ivacaftor,Cystic Fibrosis,2021-03-24,https://www.pharmgkb.org/clinicalAnnotation/14...,


In [5]:
# Contains the genotype- or allele-based annotation text and CPIC-assigned allele function,if available
clinical_ann_alleles_df = pd.read_csv("../datasets/rawData-21-Dec-2022/pharmgkb/clinicalAnnotations/clinical_ann_alleles.tsv",sep='\t', on_bad_lines='skip')
print(clinical_ann_alleles_df)
clinical_ann_alleles_df.head()

       Clinical Annotation ID Genotype/Allele  \
0                   981755803              AA   
1                   981755803              AG   
2                   981755803              GG   
3                  1449311190              CC   
4                  1449311190              CT   
...                       ...             ...   
15540              1451567040             *30   
15541              1451567040             *31   
15542              1451567040             *32   
15543              1451567040             *33   
15544              1451567040             *34   

                                         Annotation Text Allele Function  
0      Patients with the rs75527207 AA genotype (two ...             NaN  
1      Patients with the rs75527207 AG genotype (one ...             NaN  
2      Patients with the rs75527207 GG genotype (do n...             NaN  
3      Patients with the CC genotype and Precursor Ce...             NaN  
4      Patients with the CT genotype

Unnamed: 0,Clinical Annotation ID,Genotype/Allele,Annotation Text,Allele Function
0,981755803,AA,Patients with the rs75527207 AA genotype (two ...,
1,981755803,AG,Patients with the rs75527207 AG genotype (one ...,
2,981755803,GG,Patients with the rs75527207 GG genotype (do n...,
3,1449311190,CC,Patients with the CC genotype and Precursor Ce...,
4,1449311190,CT,Patients with the CT genotype and Precursor Ce...,


In [6]:
# Contains information about each supporting annotation (variant annotation, guideline annotation, label annotation) for every clinical annotation
clinical_ann_evidence_df = pd.read_csv("../datasets/rawData-21-Dec-2022/pharmgkb/clinicalAnnotations/clinical_ann_evidence.tsv",sep='\t', on_bad_lines='skip')
print(clinical_ann_evidence_df)
clinical_ann_evidence_df.head()

       Clinical Annotation ID  Evidence ID  \
0                   981755803  PA166114461   
1                   981755803  PA166104890   
2                   981755803    981755665   
3                   981755803    981755678   
4                   981755803    982009991   
...                       ...          ...   
14761              1451553580   1451614946   
14762              1451566760   1451566720   
14763              1451567040   1451566980   
14764              1451567040   1451647340   
14765              1451567040   1451566922   

                             Evidence Type  \
0                     Guideline Annotation   
1                         Label Annotation   
2                  Variant Drug Annotation   
3                  Variant Drug Annotation   
4                  Variant Drug Annotation   
...                                    ...   
14761              Variant Drug Annotation   
14762              Variant Drug Annotation   
14763  Variant Functional Assay A

Unnamed: 0,Clinical Annotation ID,Evidence ID,Evidence Type,Evidence URL,PMID,Summary,Score
0,981755803,PA166114461,Guideline Annotation,https://www.pharmgkb.org/guidelineAnnotation/P...,,Annotation of CPIC Guideline for ivacaftor and...,100.0
1,981755803,PA166104890,Label Annotation,https://www.pharmgkb.org/labelAnnotation/PA166...,,Annotation of FDA Label for ivacaftor and CFTR,100.0
2,981755803,981755665,Variant Drug Annotation,https://www.pharmgkb.org/variantAnnotation/981...,21083385.0,Genotypes AA + AG are associated with response...,0.25
3,981755803,981755678,Variant Drug Annotation,https://www.pharmgkb.org/variantAnnotation/981...,22047557.0,Genotypes AA + AG are associated with response...,2.0
4,981755803,982009991,Variant Drug Annotation,https://www.pharmgkb.org/variantAnnotation/982...,23590265.0,Allele A is associated with response to ivacaf...,2.25


In [7]:
# Contains the history of the clinical annotation, including the creation date and the dates of changes or updates to the annotation
clinical_ann_history_df = pd.read_csv("../datasets/rawData-21-Dec-2022/pharmgkb/clinicalAnnotations/clinical_ann_history.tsv",sep='\t', on_bad_lines='skip')
print(clinical_ann_history_df)
clinical_ann_history_df.head()

       Clinical Annotation ID Date (YYYY-MM-DD)    Type  \
0                   981755803        2018-03-28  Update   
1                   981755803        2018-11-28  Update   
2                   981755803        2020-11-04  Update   
3                   981755803        2021-02-02  Update   
4                   981755803        2021-02-02  Update   
...                       ...               ...     ...   
13188              1451553580        2021-10-21  Create   
13189              1451553580        2021-12-13  Update   
13190              1451566760        2021-10-29  Create   
13191              1451567040        2021-10-29  Create   
13192              1451567040        2022-01-13  Update   

                                                 Comment  
0                        Added PMID 25145599 to evidence  
1                        Added PMID 23628510 to evidence  
2      Updated text to match new format. Added guidel...  
3      Attached CPIC guideline and FDA label for ivac..

Unnamed: 0,Clinical Annotation ID,Date (YYYY-MM-DD),Type,Comment
0,981755803,2018-03-28,Update,Added PMID 25145599 to evidence
1,981755803,2018-11-28,Update,Added PMID 23628510 to evidence
2,981755803,2020-11-04,Update,Updated text to match new format. Added guidel...
3,981755803,2021-02-02,Update,Attached CPIC guideline and FDA label for ivac...
4,981755803,2021-02-02,Update,Small edit to text to match template.


#### Part 3: civic annotation

In [8]:
ClinicalEvidence_df = pd.read_csv("../datasets/rawData-21-Dec-2022/civic/nightly-ClinicalEvidenceSummaries.tsv",sep='\t', on_bad_lines='skip')
print(ClinicalEvidence_df)
ClinicalEvidence_df.head()

        gene  entrez_id variant                         disease    doid  \
0       JAK2       3717   V617F               Lymphoid Leukemia  1037.0   
1     PDGFRA       5156   D842V  Gastrointestinal Stromal Tumor  9253.0   
2     DNMT3A       1788    R882          Acute Myeloid Leukemia  9119.0   
3     DNMT3A       1788    R882          Acute Myeloid Leukemia  9119.0   
4       JAK2       3717   V617F        Chronic Myeloid Leukemia  8552.0   
...      ...        ...     ...                             ...     ...   
4030    TP53       7157   G266V                             NaN     NaN   
4031    TP53       7157   G266E                             NaN     NaN   
4032    TP53       7157   F270S                             NaN     NaN   
4033    TP53       7157   V272G                             NaN     NaN   
4034    IDH1       3417    R132              Cholangiocarcinoma  4947.0   

     phenotypes       drugs drug_interaction_type evidence_type  \
0           NaN         NaN     

Unnamed: 0,gene,entrez_id,variant,disease,doid,phenotypes,drugs,drug_interaction_type,evidence_type,evidence_direction,...,representative_transcript2,ensembl_version,reference_build,variant_summary,variant_origin,last_review_date,evidence_civic_url,variant_civic_url,gene_civic_url,is_flagged
0,JAK2,3717,V617F,Lymphoid Leukemia,1037.0,,,,Diagnostic,Supports,...,,75.0,GRCh37,JAK2 V617F is a highly recurrent mutation in m...,Somatic,2020-11-20 16:18:21 UTC,https://civicdb.org/links/evidence_items/1,https://civicdb.org/links/variants/64,https://civicdb.org/links/genes/28,False
1,PDGFRA,5156,D842V,Gastrointestinal Stromal Tumor,9253.0,,,,Diagnostic,Supports,...,,75.0,GRCh37,PDGFRA D842 mutations are characterized broadl...,Somatic,2015-06-21 16:49:38 UTC,https://civicdb.org/links/evidence_items/2,https://civicdb.org/links/variants/99,https://civicdb.org/links/genes/38,False
2,DNMT3A,1788,R882,Acute Myeloid Leukemia,9119.0,,,,Diagnostic,Supports,...,,75.0,GRCh37,DNMT3A R882 mutations are associated with cyto...,Somatic,2016-03-26 22:10:59 UTC,https://civicdb.org/links/evidence_items/3,https://civicdb.org/links/variants/32,https://civicdb.org/links/genes/18,False
3,DNMT3A,1788,R882,Acute Myeloid Leukemia,9119.0,,,,Diagnostic,Supports,...,,75.0,GRCh37,DNMT3A R882 mutations are associated with cyto...,Somatic,2016-03-16 22:09:27 UTC,https://civicdb.org/links/evidence_items/4,https://civicdb.org/links/variants/32,https://civicdb.org/links/genes/18,False
4,JAK2,3717,V617F,Chronic Myeloid Leukemia,8552.0,,,,Diagnostic,Supports,...,,75.0,GRCh37,JAK2 V617F is a highly recurrent mutation in m...,Somatic,2015-06-21 16:49:39 UTC,https://civicdb.org/links/evidence_items/5,https://civicdb.org/links/variants/64,https://civicdb.org/links/genes/28,False


In [9]:
GeneSummaries_df = pd.read_csv("../datasets/rawData-21-Dec-2022/civic/nightly-GeneSummaries.tsv",sep='\t', on_bad_lines='skip')
print(GeneSummaries_df)
GeneSummaries_df.head()

     gene_id                         gene_civic_url    name  entrez_id  \
0          1      https://civicdb.org/links/genes/1     ALK        238   
1          2      https://civicdb.org/links/genes/2    AKT1        207   
2          3      https://civicdb.org/links/genes/3    ARAF        369   
3          4      https://civicdb.org/links/genes/4    ABL1         25   
4          5      https://civicdb.org/links/genes/5    BRAF        673   
..       ...                                    ...     ...        ...   
484    26599  https://civicdb.org/links/genes/26599   ECSCR     641700   
485    32083  https://civicdb.org/links/genes/32083    KLLN  100144748   
486    34321  https://civicdb.org/links/genes/34321    DUX4  100288687   
487    41522  https://civicdb.org/links/genes/41522  PRNCR1  101867536   
488    56400  https://civicdb.org/links/genes/56400    COX2       4513   

                                           description  \
0    ALK amplifications, fusions and mutations have..

Unnamed: 0,gene_id,gene_civic_url,name,entrez_id,description,last_review_date,is_flagged
0,1,https://civicdb.org/links/genes/1,ALK,238,"ALK amplifications, fusions and mutations have...",2017-03-06 00:00:15 UTC,False
1,2,https://civicdb.org/links/genes/2,AKT1,207,"AKT1, also referred to as protein kinase B, is...",2017-02-09 21:58:06 UTC,False
2,3,https://civicdb.org/links/genes/3,ARAF,369,ARAF has recently become increasingly consider...,2015-06-21 16:49:19 UTC,False
3,4,https://civicdb.org/links/genes/4,ABL1,25,ABL1 is most relevant to cancer in its role in...,2015-06-21 16:49:20 UTC,False
4,5,https://civicdb.org/links/genes/5,BRAF,673,BRAF mutations are found to be recurrent in ma...,2019-09-19 19:32:44 UTC,False


In [10]:
VariantGroupSummaries_df = pd.read_csv("../datasets/rawData-21-Dec-2022/civic/nightly-VariantGroupSummaries.tsv",sep='\t', on_bad_lines='skip')
print(VariantGroupSummaries_df)
VariantGroupSummaries_df.head()

    variant_group_id                      variant_group_civic_url  \
0                  1   https://civicdb.org/links/variant_groups/1   
1                  2   https://civicdb.org/links/variant_groups/2   
2                  3   https://civicdb.org/links/variant_groups/3   
3                  4   https://civicdb.org/links/variant_groups/4   
4                  5   https://civicdb.org/links/variant_groups/5   
5                  6   https://civicdb.org/links/variant_groups/6   
6                  7   https://civicdb.org/links/variant_groups/7   
7                  8   https://civicdb.org/links/variant_groups/8   
8                  9   https://civicdb.org/links/variant_groups/9   
9                 10  https://civicdb.org/links/variant_groups/10   
10                11  https://civicdb.org/links/variant_groups/11   
11                12  https://civicdb.org/links/variant_groups/12   
12                13  https://civicdb.org/links/variant_groups/13   
13                14  https://civi

Unnamed: 0,variant_group_id,variant_group_civic_url,variant_group,description,last_review_date,is_flagged
0,1,https://civicdb.org/links/variant_groups/1,Imatinib Resistance,While imatinib has shown to be incredibly succ...,2015-06-21 16:49:55 UTC,False
1,2,https://civicdb.org/links/variant_groups/2,KIT Exon 17,,2016-04-15 17:15:01 UTC,False
2,3,https://civicdb.org/links/variant_groups/3,Crizotinib Resistance,The ALK oncogene has long been considered a dr...,2015-06-21 16:49:55 UTC,False
3,4,https://civicdb.org/links/variant_groups/4,KIT Exon 11,,2016-04-15 17:15:01 UTC,False
4,5,https://civicdb.org/links/variant_groups/5,Other V600's,While BRAF V600E is nearly ubiquitous in many ...,2015-06-21 16:49:55 UTC,False


In [11]:
VariantSummaries_df = pd.read_csv("../datasets/rawData-21-Dec-2022/civic/nightly-VariantSummaries.tsv",sep='\t', on_bad_lines='skip')
print(VariantSummaries_df)
VariantSummaries_df.head()

      variant_id                        variant_civic_url  gene  entrez_id  \
0              1     https://civicdb.org/links/variants/1  ABL1         25   
1              2     https://civicdb.org/links/variants/2  ABL1         25   
2              3     https://civicdb.org/links/variants/3  ABL1         25   
3              4     https://civicdb.org/links/variants/4  AKT1        207   
4              5     https://civicdb.org/links/variants/5   ALK        238   
...          ...                                      ...   ...        ...   
1610        4113  https://civicdb.org/links/variants/4113  TP53       7157   
1611        4114  https://civicdb.org/links/variants/4114  TP53       7157   
1612        4115  https://civicdb.org/links/variants/4115  TP53       7157   
1613        4122  https://civicdb.org/links/variants/4122  TP53       7157   
1614        4123  https://civicdb.org/links/variants/4123  TP53       7157   

             variant                                           

Unnamed: 0,variant_id,variant_civic_url,gene,entrez_id,variant,summary,variant_groups,chromosome,start,stop,...,variant_types,hgvs_expressions,last_review_date,civic_variant_evidence_score,allele_registry_id,clinvar_ids,variant_aliases,assertion_ids,assertion_civic_urls,is_flagged
0,1,https://civicdb.org/links/variants/1,ABL1,25,BCR::ABL,"The BCR-ABL fusion protein, commonly referred ...",ABL1 fusions in B-ALL,22,23522397.0,23632600.0,...,transcript_fusion,,2022-03-10 22:27:25 UTC,255.5,,,"T(9;22)(Q34;Q11),BCR-ABL1,BCR-ABL",,,False
1,2,https://civicdb.org/links/variants/2,ABL1,25,BCR::ABL T315I,While the efficacy of imatinib has revolutioni...,Imatinib Resistance,9,133748283.0,133748283.0,...,"missense_variant,transcript_fusion","NM_007313.2:c.1001C>T,NP_005148.2:p.Thr315Ile,...",2022-03-10 19:55:19 UTC,146.0,CA122575,12624.0,"THR334ILE,RS121913459,BCR-ABL THR315ILE,BCR-AB...",,,False
2,3,https://civicdb.org/links/variants/3,ABL1,25,BCR::ABL E255K,While the efficacy of imatinib has revolutioni...,Imatinib Resistance,9,133738363.0,133738363.0,...,"missense_variant,transcript_fusion","NC_000009.11:g.133738363G>A,NM_005157.5:c.763G...",2022-03-11 04:36:22 UTC,83.0,CA16602551,376090.0,"E274K,RS121913448,BCR-ABL GLU255LYS,BCR-ABL1 E...",,,False
3,4,https://civicdb.org/links/variants/4,AKT1,207,E17K,AKT1 E17K is a recurrent mutation that has bee...,,14,105246551.0,105246551.0,...,missense_variant,"NC_000014.8:g.105246551C>T,NM_001014432.1:c.49...",2021-11-12 18:53:34 UTC,33.5,CA123660,13983.0,"GLU17LYS,RS34409589",,,False
4,5,https://civicdb.org/links/variants/5,ALK,238,EML4::ALK,The EML4-ALK fusion variant 1 consisting of AL...,ALK Fusions,2,42396490.0,42522656.0,...,transcript_fusion,,2022-03-10 21:08:52 UTC,38.0,,,EML4-ALK,,,False
