# Get tables identifying samples with arm level events on 7p

For Lindsey's analysis of EGFR.

In [1]:
import pandas as pd

In [2]:
cancer_types = [
    "brca",
    "ccrcc",
    "colon",
    "endometrial",
    "gbm",
    "hnscc",
    "lscc",
    "luad",
    "ovarian"
]

In [3]:
chr7p_df = pd.DataFrame()

for cancer_type in cancer_types:
    
    # Read in the table
    cancer_df = pd.read_csv(f"{cancer_type}_cna_summary.tsv.gz", sep="\t", dtype={"chromosome": str})
    
    # Select data for chr 7p
    cancer_df = cancer_df[
        (cancer_df["chromosome"] == "7") &
        (cancer_df["arm"] == "p")
    ]

    # Mark the cancer type. We use the insert function instead of assign
    # so we can specify to insert the column at the front of the table.
    cancer_df.insert(0, "cancer_type", cancer_type)
    
    # Add columns indicating whether each sample has an arm level amplification or deletion
    cancer_df = cancer_df.assign(
        has_arm_amplification=cancer_df["prop_arm_amplified"] >= 0.95,
        has_arm_deletion=cancer_df["prop_arm_deleted"] >= 0.95,
    )
    
    # Append to the overall table
    chr7p_df = chr7p_df.append(cancer_df)

In [4]:
# Save the table
chr7p_df.to_csv("7p_summary_cutoff_95_percent.tsv.gz", sep="\t", index=False, compression="gzip")