# Create Event Tables

The first thing we need to do is create tables that indicate whether a patient has the given event or not. A table should be created for each event 

## Setup

In [1]:
import cptac
import pandas as pd
import cnvutils



In [2]:
# For 
CUTOFF = 0.8

CHROMOSOME = '5'

# Event (start, end)
GAIN_EVENT = (92151,50396192)
LOSS_EVENT = (51383448, 148169733)


In [3]:
# List here the types of cancer we want to look at for this event
cnv_tables = {
    "brca": cptac.Brca().get_CNV(),
    "colon": cptac.Colon().get_CNV(),
    "ccrcc": cptac.Ccrcc().get_CNV(),
    "endo": cptac.Endometrial().get_CNV(),
    "gbm": cptac.Gbm().get_CNV(),
    "hnscc": cptac.Hnscc().get_CNV(),
    "lscc": cptac.Lscc().get_CNV(),
    "luad": cptac.Luad().get_CNV(),
    "ovarian": cptac.Ovarian().get_CNV()
}

Checking that hnscc index is up-to-date...      



Checking that lscc index is up-to-date... 



Checking that luad index is up-to-date...



                                            

## Append Gene location data

In [4]:
locations = cnvutils.get_gene_locations()

In [5]:
for cancer_type in cnv_tables.keys():
    df = cnv_tables[cancer_type]
    df = df.transpose()
    if not isinstance(df.index, pd.MultiIndex):
        new_df = df.join(locations.droplevel(1))
        cnv_tables[cancer_type] = new_df.dropna()
    else:
        new_df = df.join(locations)
        cnv_tables[cancer_type] = new_df.dropna()

## Get percentage with event

In [6]:
def has_gain_event(row, cutoff):
    values = list(row)
    percent = len([x for x in values if x >= 0.2]) / len(values)
    return percent >= CUTOFF

In [7]:
def has_loss_event(row, cutoff):
    values = list(row)
    percent = len([x for x in values if x <= -0.2]) / len(values)
    return percent >= CUTOFF

In [8]:
for cancer_type in cnv_tables.keys():
    df = cnv_tables[cancer_type]
    # Subset Chromosome
    df_chromosome = df[df.chromosome == CHROMOSOME]
    # Find Gain Events
    event_dict = dict()
    if GAIN_EVENT:
        event_gain = df_chromosome[df_chromosome.start_bp > GAIN_EVENT[0]]
        event_gain = event_gain[event_gain.end_bp < GAIN_EVENT[1]]
        event_gain = event_gain.drop(columns=['chromosome', 'start_bp', 'end_bp', 'arm'])
        gain_event = event_gain.apply(lambda x: has_gain_event(x, CUTOFF))
        event_dict['gain_event'] = gain_event
    if LOSS_EVENT:
        event_loss = df_chromosome[df_chromosome.start_bp > LOSS_EVENT[0]]
        event_loss = event_loss[event_loss.end_bp < LOSS_EVENT[1]]
        event_loss = event_loss.drop(columns=['chromosome', 'start_bp', 'end_bp', 'arm'])
        loss_event = event_loss.apply(lambda x: has_loss_event(x, CUTOFF))
        event_dict['loss_event'] = loss_event
    # Create event table
    event_table = pd.DataFrame(event_dict)
    # Write to csv
    event_table.to_csv(f'{cancer_type}_has_event.tsv', sep='\t')