# SNCA analysis step 6: identify common events


Identify significant events by setting MIN_SAMPLES_WITH_EVENT and MIN_CANCER_TYPES. Get a list of events with several cancer types that have several samples with that event. Create a table for each type of cancer with each significant event as a column (ex: 8p_gain, 20_loss, etc.) and boolean values indicating if a patient has the event.

In [78]:
MIN_SAMPLES_WITH_EVENT = 25
MIN_CANCER_TYPES = 4

## Setup

In [79]:
import pandas as pd
import numpy as np
import os

## Load data

In [80]:
cancer_types = [
    "brca",
    "ccrcc",
    "colon",
    "endometrial",
    "gbm",
    "hnscc",
    "lscc",
    "luad",
    "ovarian"
]

In [81]:
events_dir = "event_tables"
event_info = dict()

for cancer_type in cancer_types:
    file_path = os.path.join(events_dir, f'{cancer_type}_event_table.tsv.gz')
    df = pd.read_csv(file_path, sep='\t').\
        assign(cancer_type=cancer_type)
    df.rename(columns={'chromosome_gain': '_gain', 'chromosome_loss': '_loss'}, inplace=True)
    df = df.pivot_table(index='Patient_ID', columns='chromosome', values=['p_gain', 'q_gain', 'p_loss', 'q_loss', '_gain', '_loss'])
    df.columns = [f'{x[1]}{x[0]}' for x in df.columns.values]
    event_info[cancer_type] = df

## Get events to look at

In [82]:
all_events = pd.DataFrame()
for cancer_type in cancer_types:
    df = event_info[cancer_type]
    events = df.apply(lambda x: list(x).count(True) > MIN_SAMPLES_WITH_EVENT)
    events.name=cancer_type
    if all_events.empty:
        all_events = pd.DataFrame(events)
    else:
        all_events[cancer_type] = events
#     print(all_events)
all_events[all_events.apply(lambda x: list(x).count(True) > MIN_CANCER_TYPES, axis=1)]

Unnamed: 0,brca,ccrcc,colon,endometrial,gbm,hnscc,lscc,luad,ovarian
7p_gain,False,True,True,False,True,False,True,True,False
8q_gain,True,False,True,False,False,True,True,False,True


In [57]:
from collections import Counter


In [61]:
results = pd.DataFrame.from_dict(Counter(all_events), orient='index')

In [65]:
results[results[0] >= MIN_CANCER_TYPES]

Unnamed: 0,0
20_gain,5
20p_gain,5
5p_gain,5
7p_gain,7
17p_loss,5
21p_loss,5
4p_loss,4
8p_loss,6
9p_loss,5
1q_gain,4


In [48]:
events = df.apply(lambda x: list(x).count(True) > MIN_SAMPLES_WITH_EVENT)
results = df.loc[:,list(events)]
results

Unnamed: 0_level_0,20_gain,16_loss,17_loss,5p_gain,6p_gain,12p_gain,20p_gain,4p_loss,8p_loss,11p_loss,...,9q_loss,13q_loss,14q_loss,15q_loss,16q_loss,17q_loss,18q_loss,19q_loss,21q_loss,22q_loss
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
01OV002,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
01OV007,False,False,False,True,False,False,True,,True,True,...,True,True,False,False,False,False,True,False,False,True
01OV008,False,True,False,True,True,True,True,True,True,True,...,True,False,False,True,True,False,False,True,False,False
01OV010,True,False,True,False,False,False,True,True,True,False,...,False,False,True,True,True,True,True,True,True,True
01OV013,False,False,False,False,True,True,False,True,True,False,...,False,False,False,False,True,False,True,False,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26OV008,False,False,True,False,False,True,False,False,True,True,...,False,True,False,True,False,True,False,False,True,True
26OV009,False,False,True,True,True,False,False,True,True,False,...,True,False,True,True,True,True,False,False,False,True
26OV010,False,False,True,False,False,False,False,False,True,True,...,False,True,False,False,True,True,False,True,False,False
26OV011,False,False,False,True,False,False,False,False,False,True,...,False,True,True,False,True,False,True,False,False,False
