## Import available `cdapython` functions

In [1]:
import numpy as np
import pandas as pd
from itables import init_notebook_mode, show
init_notebook_mode(all_interactive=True)
import itables.options as opt
opt.maxBytes=0
opt.scrollX="200px"
opt.scrollCollapse=True
opt.paging=True
opt.maxColumns=0

In [2]:
from cdapython.data_exploration import tables, columns, column_values, column_data_types
from cdapython.fetch import fetch_rows
from cdapython.query import summary_counts

## Get a list of searchable CDA tables

In [3]:
tables()

['diagnosis',
 'file',
 'researchsubject',
 'somatic_mutation',
 'specimen',
 'subject',
 'treatment']

## Explore CDA tables' columns in detail

In [4]:
columns()

table,column,data_type,nullable,description
Loading... (need help?),,,,


## See what values are populated in a given column

In [5]:
column_values( 'primary_diagnosis_site' )

primary_diagnosis_site,count
Loading... (need help?),


## Fetch subject row summary information for a column value

In [6]:
summary_counts( table= 'subject', match_all = 'primary_diagnosis_site = kid*')

{'total_subject_matches': 3806,
 'total_related_files': 515873,
 'subject_identifier_system': {'CDS': 81,
  'GDC': 3506,
  'IDC': 1375,
  'PDC': 228},
 'sex': {'F': 88,
  'female': 1262,
  'Female': 39,
  'M': 122,
  'male': 2239,
  'Male': 51,
  'not reported': 1,
  'unknown': 1,
  None: 3},
 'race': {'american indian or alaska native': 2,
  'asian': 67,
  'Asian': 1,
  'black or african american': 327,
  'Black or African American': 10,
  'native hawaiian or other pacific islander': 1,
  'not allowed to collect': 25,
  'not reported': 499,
  'other': 84,
  'unknown': 3,
  'Unknown': 103,
  'white': 2395,
  'White': 59,
  None: 230},
 'ethnicity': {'1': 4,
  'C': 6,
  'H': 1,
  'hispanic or latino': 179,
  'Hispanic or Latino': 1,
  'not allowed to collect': 43,
  'not hispanic or latino': 2151,
  'Not Hispanic or Latino': 71,
  'not reported': 1073,
  'Not Reported': 1,
  'UNK': 2,
  'unknown': 2,
  'Unknown': 55,
  'W': 3,
  None: 214},
 'cause_of_death': {'Cancer Related': 151,
  '

## Fetch rows from any table for a column value

## Fetch subject rows for a column value

In [7]:
fetch_rows( table= 'somatic_mutation', match_all = 'subject_id = TCGA.TCGA-04-1369')

case_id,cda_subject_id,entrez_gene_id,file_gdc_id,hgnc_id,transcript_id,aliquot_barcode_normal,aliquot_barcode_tumor,all_effects,allele,amino_acids,appris,bam_file,biotype,callers,canonical,case_barcode,ccds,cdna_position,cds_position,center,chromosome,clin_sig,codons,consequence,context,cosmic,dbsnp_rs,dbsnp_val_status,distance,domains,end_position,ensp,esp_aa_af,esp_ea_af,existing_variation,exon,exon_number,feature,feature_type,flags,gdc_filter,gene,gene_pheno,gnomad_af,gnomad_afr_af,gnomad_amr_af,gnomad_asj_af,gnomad_eas_af,gnomad_fin_af,gnomad_nfe_af,gnomad_non_cancer_af,gnomad_non_cancer_afr_af,gnomad_non_cancer_ami_af,gnomad_non_cancer_amr_af,gnomad_non_cancer_asj_af,gnomad_non_cancer_eas_af,gnomad_non_cancer_fin_af,gnomad_non_cancer_max_af_adj,gnomad_non_cancer_max_af_pops_adj,gnomad_non_cancer_mid_af,gnomad_non_cancer_nfe_af,gnomad_non_cancer_oth_af,gnomad_non_cancer_sas_af,gnomad_oth_af,gnomad_sas_af,hgvs_offset,hgvsc,hgvsp,hgvsp_short,high_inf_pos,hotspot,hugo_symbol,impact,intron,mane,match_norm_seq_allele1,match_norm_seq_allele2,match_norm_validation_allele1,match_norm_validation_allele2,matched_norm_aliquot_barcode,matched_norm_aliquot_uuid,max_af,max_af_pops,mirna,motif_name,motif_pos,motif_score_change,muse,mutation_status,mutect2,n_alt_count,n_depth,n_ref_count,ncbi_build,normal_submitter_uuid,one_consequence,pheno,pick,pindel,polyphen,primary_site,project_short_name,protein_position,pubmed,reference_allele,refseq,rna_alt_count,rna_depth,rna_ref_count,rna_support,sample_barcode_normal,sample_barcode_tumor,score,sequence_source,sequencer,sequencing_phase,sift,somatic,start_position,strand,swissprot,symbol,symbol_source,t_alt_count,t_depth,t_ref_count,thousg_af,thousg_afr_af,thousg_amr_af,thousg_eas_af,thousg_eur_af,thousg_sas_af,transcript_strand,transcription_factors,trembl,tsl,tumor_aliquot_barcode,tumor_aliquot_uuid,tumor_seq_allele1,tumor_seq_allele2,tumor_submitter_uuid,tumor_validation_allele1,tumor_validation_allele2,uniparc,uniprot_isoform,validation_method,validation_status,variant_class,variant_classification,variant_type,varscan2,verification_status
Loading... (need help?),,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
