# SCNA analysis step 4: Create event tables

We will create tables for each cancer type with boolean columns for each event type (p_gain, q_gain, p_loss, q_loss, chromosome_gain, chromosome_loss).

We first will define an event as having a certain portion of the region (p_arm, q_arm, whole chromosome) with the event (gain or loss).

In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
# Defines the proportion needed to be considered an event
EVENT_CUTOFF = 0.95

In [57]:
def create_event_table(
    cancer_type,
    event_cutoff,
    input_dir,
    output_dir
):
    input_file = os.path.join(input_dir, f"{cancer_type}_cna_summary.tsv.gz")
    summary = pd.read_csv(input_file, sep='\t', dtype={"chromosome": "O"})
    summary.rename(columns={'prop_arm_amplified': 'gain', 'prop_arm_deleted': 'loss'}, inplace=True)
    event_summary = pd.pivot_table(summary, index=['Patient_ID', 'chromosome'], columns='arm', values=['gain', 'loss'])
    event_summary.columns = [f'{col[1]}_{col[0]}' for col in event_summary.columns.values]
    # Check to see if each arm has an event
    for col in event_summary.columns:
        event_summary[col] = event_summary[col] >= event_cutoff
    # Define whole chromosome event when both arms are either gained or lost
    event_summary['chromosome_gain'] = event_summary['p_gain'] & event_summary['q_gain']
    event_summary['chromosome_loss'] = event_summary['p_loss'] & event_summary['q_loss']
    event_summary.reset_index(inplace=True)
    output_file = os.path.join(output_dir, f"{cancer_type}_event_table.tsv.gz")
    event_summary.to_csv(output_file, index=False, compression="gzip", sep='\t')
    return event_summary

In [4]:
cancer_types = [
    "brca",
    "ccrcc",
    "colon",
    "endometrial",
    "gbm",
    "hnscc",
    "lscc",
    "luad",
    "ovarian"
]

In [59]:
for cancer_type in cancer_types:
    create_event_table(
        cancer_type=cancer_type,
        event_cutoff=EVENT_CUTOFF,
        input_dir='summary_tables',
        output_dir='event_tables'
    )