# SCNA analysis step 1: Make long format CNA table for each cancer type

For each cancer type, convert CNA table to long format (i.e., columns are [Patient_ID, gene, cna_val], so one row for each gene for each sample)

In [1]:
import cptac
import pandas as pd
import numpy as np
import datetime
import os

In [2]:
dss = {
    "brca": cptac.Brca,
    "ccrcc": cptac.Ccrcc,
    "colon": cptac.Colon,
    "endometrial": cptac.Endometrial,
    "gbm": cptac.Gbm,
    "hnscc": cptac.Hnscc,
    "lscc": cptac.Lscc,
    "luad": cptac.Luad,
    "ovarian": cptac.Ovarian
}

In [3]:
def make_long_table(cancer_type, datasets_dict, long_cna_tables_dir):
    """Pass None for long_cna_tables_dir if you don't want to re-save the output."""
    
    cna = dss[cancer_type]().\
        get_CNV().\
        reset_index().\
        melt(id_vars="Patient_ID", value_name="cna_val").\
        rename(columns={"Name": "gene"})
    
    if "Database_ID" in cna.columns:
        # Parse the database IDs to remove version numbers from Ensembl IDs, as they interfere with lookup.
        cna = cna.assign(
            Database_ID=cna["Database_ID"].str.rsplit(".", n=1, expand=True)[0]
        )
        
    if long_cna_tables_dir is not None:
        if not os.path.isdir(long_cna_tables_dir):
            os.mkdir(long_cna_tables_dir)
            
        file_path = os.path.join(long_cna_tables_dir, f"{cancer_type}_cna_long.tsv.gz")
        cna.to_csv(file_path, index=False, compression="gzip", sep="\t")
    
    return file_path

In [4]:
for cancer_type in dss.keys():
    print(make_long_table(cancer_type, dss, "long_cna_tables"))

long_cna_tables/brca_cna_long.tsv.gz     
long_cna_tables/ccrcc_cna_long.tsv.gz     
long_cna_tables/colon_cna_long.tsv.gz     
long_cna_tables/endometrial_cna_long.tsv.gz     
                                        



long_cna_tables/gbm_cna_long.tsv.gz
                                          



long_cna_tables/hnscc_cna_long.tsv.gz
                                         



long_cna_tables/lscc_cna_long.tsv.gz
long_cna_tables/luad_cna_long.tsv.gz     
long_cna_tables/ovarian_cna_long.tsv.gz     
