# Make counts table

This counts how many patients have a CNV (amplification or deletion) at each gene in the selected chromosome.

In [1]:
import cnvutils
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import numpy as np
import os
import pandas as pd
import seaborn as sns

## Import parameters

These were set in the set_parameters notebook.

In [2]:
params = cnvutils.load_params(os.path.join("data", "params.json"))
CHROMOSOME = params["CHROMOSOME"]
CUTOFF = params["GENE_CNV_MAGNITUDE_CUTOFF"]
CANCER_TYPES = params["CANCER_TYPES"]

## Load tables

In [3]:
data_types = ["CNV"]
tables = cnvutils.load_tables(CANCER_TYPES, data_types, pancan=True)
cnv = tables["CNV"]

                                              

DataVersionNotInstalledError: broadbrca data version 1.0 is not installed. To install, call the download function (either 'cptac.download' or 'cptac.pancan.download', depending on which module you're using), passing 'broadbrca' to the 'dataset' parameter and '1.0' to the 'version' parameter.

In [None]:
cnv = tables["CNV"]

## Get counts for genes on our chromosome

In [None]:
gene_locations = cnvutils.get_gene_locations()
chr_gene_locations = gene_locations[gene_locations["chromosome"] == CHROMOSOME]

In [None]:
def get_gain_counts(row):
    gain = len(row[row > CUTOFF])
    return gain

In [None]:
def get_loss_counts(row):
    loss = len(row[row < -CUTOFF])
    return loss

In [None]:
cnv_long = pd.DataFrame()
for cancer_type in CANCER_TYPES:
    
    df = cnv[cancer_type].transpose()
    num_patients = df.shape[1]
    
    # Get just our chromosome
    df = df[df.index.get_level_values(0).isin(chr_gene_locations.index.get_level_values(0))]
    
    # Calculate counts
    df['gain'] = df.apply(get_gain_counts, axis=1)
    df['loss'] = df.apply(get_loss_counts, axis=1)
    
    # Join in locations
    df = df.join(chr_gene_locations)
    
    df = df.melt(
        id_vars=['start_bp', 'end_bp'], 
        value_vars=['gain', 'loss'], 
        ignore_index=False
    )
    
    df = df.assign(
        cancer_type_total_patients=num_patients,
        cancer=cancer_type
    )
    
    cnv_long = cnv_long.append(df)

In [None]:
cnv_long

## Save combined table

In [None]:
cnv_long = cnv_long.sort_values(['cancer', 'start_bp'])
cnv_long = cnv_long.reset_index()

In [None]:
cnv_long.to_csv(os.path.join("data", "cnv_counts_pancan.tsv"), sep='\t', index=False)