# Create Event Tables

The first thing we need to do is create tables that indicate whether a patient has the given event or not. A table should be created for each event 

## Setup

In [4]:
import cptac
import pandas as pd
import cnvutils

In [5]:
CUTOFF = 0.8

CHROMOSOME = '20'
# These are the event ranges we were using. I am leaving them here for now in case we find we need them again.
# GAIN_EVENT_START = 80794385
# GAIN_EVENT_END = 130794385
# LOSS_EVENT_START = 0
# LOSS_EVENT_END = 30794385

GAIN_EVENT = (0,64290385)
LOSS_EVENT = None

# #These are the new event ranges (30 Sep 2020)
# GAIN_EVENT_START = 52110839
# GAIN_EVENT_END = 145052465
# LOSS_EVENT_START = 202660
# LOSS_EVENT_END = 37421341

In [6]:
# List here the types of cancer we want to look at for this event
cnv_tables = {
    "brca": cptac.Brca().get_CNV(),
    "colon": cptac.Colon().get_CNV(),
    "hnscc": cptac.Hnscc().get_CNV(),
    "lscc": cptac.Lscc().get_CNV(),
    "luad": cptac.Luad().get_CNV(),
    "ovarian": cptac.Ovarian().get_CNV(),
    "gbm": cptac.Gbm().get_CNV(),
    "endo": cptac.Endometrial().get_CNV(),
    "ccrcc": cptac.Ccrcc().get_CNV()
}

Formatting dataframes...                  



Checking that luad index is up-to-date...



Checking that endometrial index is up-to-date...



                                                

## Append Gene location data

In [7]:
locations = cnvutils.get_gene_locations()

In [8]:
for cancer_type in cnv_tables.keys():
    df = cnv_tables[cancer_type]
    df = df.transpose()
    if not isinstance(df.index, pd.MultiIndex):
        new_df = df.join(locations.droplevel(1))
        cnv_tables[cancer_type] = new_df.dropna()
    else:
        new_df = df.join(locations)
        cnv_tables[cancer_type] = new_df.dropna()

## Get percentage with event

In [9]:
def has_gain_event(row, cutoff):
    values = list(row)
    percent = len([x for x in values if x >= 0.2]) / len(values)
    return percent >= CUTOFF

In [10]:
def has_loss_event(row, cutoff):
    values = list(row)
    percent = len([x for x in values if x <= -0.2]) / len(values)
    return percent >= CUTOFF

In [11]:
for cancer_type in cnv_tables.keys():
    df = cnv_tables[cancer_type]
    # Subset Chromosome
    df_chromosome = df[df.chromosome == CHROMOSOME]
    # Find Gain Events
    event_dict = dict()
    if GAIN_EVENT:
        event_gain = df_chromosome[df_chromosome.start_bp > GAIN_EVENT[0]]
        event_gain = event_gain[event_gain.end_bp < GAIN_EVENT[1]]
        event_gain = event_gain.drop(columns=['chromosome', 'start_bp', 'end_bp', 'arm'])
        gain_event = event_gain.apply(lambda x: has_gain_event(x, CUTOFF))
        event_dict['gain_event'] = gain_event
    if LOSS_EVENT:
        event_loss = df_chromosome[df_chromosome.start_bp > LOSS_EVENT[0]]
        event_loss = event_loss[event_loss.end_bp < LOSS_EVENT[1]]
        event_loss = event_loss.drop(columns=['chromosome', 'start_bp', 'end_bp', 'arm'])
        loss_event = event_loss.apply(lambda x: has_loss_event(x, CUTOFF))
        event_dict['loss_event'] = loss_event
#     event_gain = df_8[df_8.start_bp > GAIN_EVENT_START]
#     event_gain = event_gain[event_gain.end_bp < GAIN_EVENT_END]
#     event_gain = event_gain.drop(columns=['chromosome', 'start_bp', 'end_bp', 'arm'])
#     gain_event = event_gain.apply(lambda x: has_gain_event(x, CUTOFF))
    # Find Loss Events

    # Create event table
    
    event_table = pd.DataFrame(event_dict)
    # Write to csv
    event_table.to_csv(f'{cancer_type}_has_event.tsv', sep='\t')