# Create Event Tables

The first thing we need to do is create tables that indicate whether a patient has the given event or not. A table should be created for each 

## Setup

In [1]:
import cptac
import pandas as pd
from functions import *

In [2]:
# List here the types of cancer we want to look at for this event
cnv_tables = {
    "brca": cptac.Brca().get_CNV(),
    "colon": cptac.Colon().get_CNV(),
    "hnscc": cptac.Hnscc().get_CNV(),
    "lscc": cptac.Lscc().get_CNV(),
    "luad": cptac.Luad().get_CNV(),
    "ovarian": cptac.Ovarian().get_CNV()
}

Checking that lscc index is up-to-date... 



version 3scc v3.2.......                 
Checking that luad index is up-to-date...



                                            

## Append Gene location Data

In [3]:
locations = get_gene_locations()

In [6]:
for cancer_type in cnv_tables.keys():
    df = cnv_tables[cancer_type]
    df = df.transpose()
    if not isinstance(df.index, pd.MultiIndex):
        new_df = df.join(locations.droplevel(1))
        cnv_tables[cancer_type] = new_df.dropna()
    else:
        new_df = df.join(locations)
        cnv_tables[cancer_type] = new_df.dropna()

## Get percentage with event

In [7]:
CUTOFF = 0.8
GAIN_EVENT_START = 80794385
GAIN_EVENT_END = 130794385
LOSS_EVENT_START = 0
LOSS_EVENT_END = 30794385

In [8]:
def has_gain_event(row, cutoff):
    values = list(row)
    percent = len([x for x in values if x >= 0.2]) / len(values)
    return percent >= cutoff

In [9]:
def has_loss_event(row, cutoff):
    values = list(row)
    percent = len([x for x in values if x <= -0.2]) / len(values)
    return percent >= cutoff

In [11]:
for cancer_type in cnv_tables.keys():
    df = cnv_tables[cancer_type]
    # Subset Chromosome 8
    df_8 = df[df.chromosome == '8']
    # Find Gain Events
    event_gain = df_8[df_8.start_bp > GAIN_EVENT_START]
    event_gain = event_gain[event_gain.end_bp < GAIN_EVENT_END]
    event_gain = event_gain.drop(columns=['chromosome', 'start_bp', 'end_bp', 'arm'])
    gain_event = event_gain.apply(lambda x: has_gain_event(x, CUTOFF))
    # Find Loss Events
    event_loss = df_8[df_8.start_bp > LOSS_EVENT_START]
    event_loss = event_loss[event_loss.end_bp < LOSS_EVENT_END]
    event_loss = event_loss.drop(columns=['chromosome', 'start_bp', 'end_bp', 'arm'])
    loss_event = event_loss.apply(lambda x: has_loss_event(x, CUTOFF))
    # Create event table
    event_table = pd.DataFrame({'gain_event': gain_event, 'loss_event': loss_event})
    # Write to csv
    event_table.to_csv(f'{cancer_type}_has_event.tsv', sep='\t')