# Create Counts Tables

For several of our analyses we will need these tables representing the counts of how many patients have a gain for a particular gene. Here we create the table for use in future analyses.

## Setup

In [1]:
import pandas as pd
import cptac
import numpy as np
import cnvutils



In [2]:
# Any value lower than this will be considered a loss
COPY_NUMBER_LOSS = -0.2
# Any value above this will be considered a gain
COPY_NUMBER_GAIN = 0.2

In [3]:
cnv = {
    'BRCA': cptac.Brca().get_CNV(),
    'CCRCC': cptac.Ccrcc().get_CNV(),
    'COLON': cptac.Colon().get_CNV(),
    'ENDO': cptac.Endometrial().get_CNV(),
    'GBM': cptac.Gbm().get_CNV(),
    'HNSCC': cptac.Hnscc().get_CNV(),
    'LSCC': cptac.Lscc().get_CNV(),
    'LUAD': cptac.Luad().get_CNV(),
    'OVARIAN': cptac.Ovarian().get_CNV()
}

Checking that hnscc index is up-to-date...      



Checking that lscc index is up-to-date... 



Checking that luad index is up-to-date...



                                            

## Create Table

In [4]:
def get_gain_counts(row):
    return np.sum(row > COPY_NUMBER_GAIN)

In [5]:
def get_loss_counts(row):
    return np.sum(row < COPY_NUMBER_LOSS)

In [6]:
counts_list = list()
for cancer_type in cnv.keys():
    df = cnv[cancer_type].transpose()
    gain = df.apply(get_gain_counts, axis=1)
    loss = df.apply(get_loss_counts, axis=1)
    df['gain'] = gain
    df['loss'] = loss
    df['cancer'] = cancer_type
    counts_list.append(df[['gain', 'loss', 'cancer']].reset_index())

In [7]:
counts = pd.concat(counts_list, ignore_index=True)

In [8]:
counts.head()

Unnamed: 0,Name,Database_ID,gain,loss,cancer
0,7SK,ENSG00000232512.2,15,18,BRCA
1,7SK,ENSG00000249352.3,11,31,BRCA
2,7SK,ENSG00000254144.2,66,7,BRCA
3,7SK,ENSG00000260682.2,9,63,BRCA
4,7SK,ENSG00000271765.1,13,23,BRCA


## Append Gene Location

In [9]:
locations = cnvutils.get_gene_locations()

In [10]:
counts_with_loc = counts.merge(locations.reset_index())

In [11]:
cnv_counts = counts_with_loc.melt(id_vars=['Name', 'Database_ID', 'start_bp', 'end_bp', 'cancer', 'chromosome', 'arm'], value_vars=['gain', 'loss'])

In [12]:
cnv_counts.to_csv("cnv_counts.tsv", sep='\t')

In [13]:
cnv_counts.head()

Unnamed: 0,Name,Database_ID,start_bp,end_bp,cancer,chromosome,arm,variable,value
0,7SK,ENSG00000232512.2,,,BRCA,,,gain,15
1,7SK,ENSG00000249352.3,,,BRCA,,,gain,11
2,7SK,ENSG00000254144.2,,,BRCA,,,gain,66
3,7SK,ENSG00000260682.2,,,BRCA,,,gain,9
4,7SK,ENSG00000271765.1,,,BRCA,,,gain,13
