# Make counts table

This counts how many patients have a CNV (amplification or deletion) at each gene in the selected chromosome.

In [1]:
import pandas as pd
import numpy as np
import cptac
import cnvutils
import cptac.utils as ut
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import seaborn as sns

In [2]:
CHROMOSOME = '8'
CUTOFF = 0.2 # For a gene to count as having a CNV event, the log ratio value has to have at least this magnitude

In [3]:
datasets = {
    "brca": cptac.Brca,
    "ccrcc": cptac.Ccrcc,
    "colon": cptac.Colon,
    "endo": cptac.Endometrial,
    "gbm": cptac.Gbm,
    "hnscc": cptac.Hnscc,
    "lscc": cptac.Lscc,
    "luad": cptac.Luad,
    "ovarian": cptac.Ovarian,
}

## Load tables and get counts for genes on our chromosome

In [4]:
gene_locations = cnvutils.get_gene_locations()
chr_gene_locations = gene_locations[gene_locations["chromosome"] == CHROMOSOME]

In [5]:
def get_gain_counts(row):
    gain = len(row[row > CUTOFF])
    return gain

In [6]:
def get_loss_counts(row):
    loss = len(row[row < -CUTOFF])
    return loss

In [7]:
cnv_long = pd.DataFrame()
for cancer_type in datasets.keys():
    
    df = datasets[cancer_type]().get_CNV().transpose()
    num_patients = df.shape[1]
    
    # Get just our chromosome
    df = df[df.index.get_level_values(0).isin(chr_gene_locations.index.get_level_values(0))]
    
    # Calculate counts
    df['gain'] = df.apply(get_gain_counts, axis=1)
    df['loss'] = df.apply(get_loss_counts, axis=1)
    
    # Join in locations
    df = df.join(chr_gene_locations)
    
    df = df.melt(
        id_vars=['start_bp', 'end_bp'], 
        value_vars=['gain', 'loss'], 
        ignore_index=False
    )
    
    df = df.assign(
        cancer_type_total_patients=num_patients,
        cancer=cancer_type
    )
    
    cnv_long = cnv_long.append(df)

                                                



                                            

In [8]:
cnv_long

Unnamed: 0_level_0,Unnamed: 1_level_0,start_bp,end_bp,variable,value,cancer_type_total_patients,cancer
Name,Database_ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AARD,ENSG00000205002.3,116938207.0,116944487.0,gain,79,122,brca
ABRA,ENSG00000174429.3,106759483.0,106770244.0,gain,79,122,brca
ADAM18,ENSG00000168619.11,39584489.0,39730065.0,gain,49,122,brca
ADAM2,ENSG00000104755.10,39743735.0,39838289.0,gain,44,122,brca
ADAM28,ENSG00000042980.8,24294069.0,24359014.0,gain,22,122,brca
ADAM32,ENSG00000197140.10,39106990.0,39284917.0,gain,49,122,brca
ADAM3A,ENSG00000197475.7,39451045.0,39522852.0,gain,48,122,brca
ADAM5,ENSG00000196115.8,39314591.0,39417378.0,gain,48,122,brca
ADAM7,ENSG00000069206.11,24440930.0,24526970.0,gain,20,122,brca
ADAM9,ENSG00000168615.7,38996869.0,39105261.0,gain,42,122,brca


## Save combined table

In [9]:
cnv_long = cnv_long.sort_values(['cancer', 'start_bp'])
cnv_long = cnv_long.reset_index()

In [10]:
cnv_long.to_csv(f'chr{CHROMOSOME}_cnv_counts.tsv', sep='\t')