# Make arm level event table for chromosome 8

Here we look for genes IN the event discussed that are affected by the arm-level event. We will find these effects by performing a series of t-tests comparing the proteomic values of the patients with the event against those without the event.

## Setup (Install necessary packages)

We will start by importing necessary packages and collecting all of the proteomics data we will need to run the tests. The cancer types analyzed should have been determined in 01_event_basic_info where we determine which types of cancer seem to have the event we are looking at.

In [1]:
import pandas as pd
import numpy as np
import os
import sys
import operator
import cptac
import cnvutils

In [2]:
cancer_types = [
    "brca",
    "colon",
    "hnscc",
    "lscc",
    "luad",
    "ovarian"
]

## Make the arm level event tables

In [3]:
news = {}

for cancer_type in cancer_types:
    
    # Get the event table
    event_file_path = os.path.join("..", "chromosome_7", "01_event_tables", f"{cancer_type}_cna_summary.tsv.gz")
    
    event = pd.\
    read_csv(event_file_path, sep='\t', dtype={"chromosome": str})
    
    event = event[event["chromosome"] == "8"].\
    melt(
        id_vars=["Patient_ID", "chromosome", "arm"]
    )
    
    event = event[
        (
            (event["arm"] == "p") & (event["variable"] == "prop_arm_deleted") |
            (event["arm"] == "q") & (event["variable"] == "prop_arm_amplified")
        )
    ].\
    drop(columns=["chromosome", "arm"]).\
    replace({
        "prop_arm_amplified": "gain_event",
        "prop_arm_deleted": "loss_event"
    })
    
    event = event.\
    assign(value=event["value"] >= 0.95).\
    pivot(
        index="Patient_ID",
        columns="variable",
        values="value"
    )
    
    event.columns.name = None
    event.index.name = None
    
    news[cancer_type] = event

## Load the previously generated event tables, which are based on crossover location

In [4]:
olds = {}

for cancer_type in cancer_types:
    
    df = pd.read_csv(f"{cancer_type}_has_event.tsv", sep="\t", index_col=0)
    olds[cancer_type] = df

## Compare the two

In [5]:
for cancer_type in cancer_types:
    
    old = olds[cancer_type]
    new = news[cancer_type]
    
    print(f"{cancer_type}: \n{(~new.eq(old)).sum()}\n")

brca: 
gain_event    39
loss_event    63
dtype: int64

colon: 
gain_event    19
loss_event    44
dtype: int64

hnscc: 
gain_event    45
loss_event    23
dtype: int64

lscc: 
gain_event    21
loss_event    58
dtype: int64

luad: 
gain_event    59
loss_event    63
dtype: int64

ovarian: 
gain_event    18
loss_event    33
dtype: int64



In [6]:
for cancer_type in cancer_types:
    
    old = olds[cancer_type]
    new = news[cancer_type]
    
    print(f"""{cancer_type}
old gain: {old["gain_event"].sum()}
new gain: {new["gain_event"].sum()}

old loss: {old["loss_event"].sum()}
new loss: {new["loss_event"].sum()}
""")

brca
old gain: 63
new gain: 45

old loss: 57
new loss: 15

colon
old gain: 47
new gain: 35

old loss: 41
new loss: 7

hnscc
old gain: 41
new gain: 28

old loss: 30
new loss: 9

lscc
old gain: 64
new gain: 48

old loss: 69
new loss: 15

luad
old gain: 32
new gain: 15

old loss: 34
new loss: 13

ovarian
old gain: 41
new gain: 27

old loss: 51
new loss: 18



In [7]:
for cancer_type in cancer_types:
    
    old = olds[cancer_type]
    new = news[cancer_type]

    print(f"""{cancer_type}
new - old gain: {new["gain_event"].sum() - old["gain_event"].sum()}
new - old loss: {new["loss_event"].sum() - old["loss_event"].sum()}
""")

brca
new - old gain: -18
new - old loss: -42

colon
new - old gain: -12
new - old loss: -34

hnscc
new - old gain: -13
new - old loss: -21

lscc
new - old gain: -16
new - old loss: -54

luad
new - old gain: -17
new - old loss: -21

ovarian
new - old gain: -14
new - old loss: -33



In [8]:
# Make some lists
lists = {}

for cancer_type in cancer_types:
    
    old = olds[cancer_type]
    new = news[cancer_type]
    
    # loss only old
    loss_filter = old.loss_event.astype(float).combine(new.loss_event.astype(float), operator.sub)
    loo = loss_filter[loss_filter == 1].index
    
    # loss only new
    lon = loss_filter[loss_filter == -1].index    

    # gain only old
    gain_filter = old.gain_event.astype(float).combine(new.gain_event.astype(float), operator.sub)
    goo = gain_filter[gain_filter == 1].index
    
    # gain only new
    gon = gain_filter[gain_filter == -1].index
    
    # Save
    lists[cancer_type] = {}
    
    lists[cancer_type]["loo"] = loo
    lists[cancer_type]["lon"] = lon
    
    lists[cancer_type]["goo"] = goo
    lists[cancer_type]["gon"] = gon

In [9]:
for cancer_type in lists.keys():
    print(lists[cancer_type]["loo"])

Index(['CPT000814', 'X01BR001', 'X01BR018', 'X01BR020', 'X01BR023', 'X01BR026',
       'X01BR027', 'X01BR030', 'X01BR033', 'X01BR043', 'X03BR002', 'X03BR004',
       'X03BR005', 'X03BR013', 'X06BR005', 'X09BR004', 'X09BR007', 'X11BR006',
       'X11BR011', 'X11BR012', 'X11BR015', 'X11BR016', 'X11BR017', 'X11BR018',
       'X11BR019', 'X11BR020', 'X11BR024', 'X11BR027', 'X11BR044', 'X11BR047',
       'X11BR049', 'X11BR056', 'X11BR058', 'X11BR075', 'X14BR005', 'X14BR014',
       'X18BR006', 'X18BR007', 'X20BR002', 'X20BR007', 'X20BR008', 'X21BR001'],
      dtype='object')
Index(['01CO001', '01CO005', '01CO013', '05CO002', '05CO005', '05CO011',
       '05CO033', '05CO047', '05CO049', '06CO001', '09CO008', '09CO015',
       '11CO008', '11CO020', '11CO032', '11CO036', '11CO037', '11CO039',
       '11CO042', '11CO043', '11CO044', '11CO045', '11CO053', '11CO072',
       '14CO005', '15CO002', '16CO003', '16CO011', '20CO001', '20CO004',
       '21CO006', '21CO007', '22CO004', '27CO004'],
      

In [10]:
for cancer_type in lists.keys():
    print(lists[cancer_type]["lon"])

Index([], dtype='object')
Index([], dtype='object')
Index([], dtype='object')
Index([], dtype='object')
Index([], dtype='object')
Index([], dtype='object')


In [11]:
for cancer_type in lists.keys():
    print(lists[cancer_type]["goo"])

Index(['X01BR023', 'X01BR043', 'X05BR001', 'X05BR016', 'X05BR029', 'X05BR042',
       'X05BR043', 'X05BR045', 'X11BR004', 'X11BR011', 'X11BR028', 'X11BR073',
       'X11BR075', 'X14BR005', 'X18BR006', 'X20BR001', 'X20BR002', 'X21BR001'],
      dtype='object')
Index(['01CO019', '05CO007', '05CO039', '05CO041', '09CO008', '11CO008',
       '11CO037', '11CO053', '11CO054', '11CO072', '16CO002', '16CO003'],
      dtype='object')
Index(['C3L-00987', 'C3L-04791', 'C3L-04849', 'C3N-01338', 'C3N-01645',
       'C3N-01754', 'C3N-01859', 'C3N-01947', 'C3N-03028', 'C3N-03457',
       'C3N-03783', 'C3N-03876', 'C3N-04273'],
      dtype='object')
Index(['C3L-00081', 'C3L-01884', 'C3L-02163', 'C3L-02619', 'C3L-02646',
       'C3L-02963', 'C3L-03965', 'C3L-04071', 'C3N-00497', 'C3N-01411',
       'C3N-02426', 'C3N-02575', 'C3N-03441', 'C3N-03486', 'C3N-03877',
       'C3N-04124'],
      dtype='object')
Index(['C3L-00001', 'C3L-00893', 'C3L-01924', 'C3N-00175', 'C3N-00547',
       'C3N-00551', 'C3N-00

In [12]:
for cancer_type in lists.keys():
    print(lists[cancer_type]["gon"])

Index([], dtype='object')
Index([], dtype='object')
Index([], dtype='object')
Index([], dtype='object')
Index([], dtype='object')
Index([], dtype='object')


In [13]:
dss = {
    "brca": cptac.Brca,
    "colon": cptac.Colon,
    "hnscc": cptac.Hnscc,
    "lscc": cptac.Lscc,
    "luad": cptac.Luad,
    "ovarian": cptac.Ovarian
}

In [42]:
def make_samples_plot(samples, chrm, cancer_type, datasets_dict):
    
    # Get CNV table and transpose
    ds = datasets_dict[cancer_type](no_internet=True)
    cnv = ds.get_CNV().transpose()
    
    # Join in the locations
    locs = cnvutils.\
    get_gene_locations().\
    dropna(axis="index", how="all")

    if isinstance(cnv.index, pd.MultiIndex):
        cnv = cnv.\
        join(locs, how="inner")
        
    else:
        cnv = cnv.\
        join(locs.droplevel(1), how="inner").\
        drop_duplicates()
        
    cnv = cnv.assign(loc=cnv[["start_bp", "end_cp"]].max(axis="columns"))

    # For each sample, select the genes on the chromosome we're interested in, and plot
    

In [48]:
for cancer_type in dss.keys():
    
    b = make_samples_plot(None, None, cancer_type, dss)

                               



                            



                               

In [53]:
b.join(b[["start_bp"]].min(axis="columns").rename("min"))

Unnamed: 0_level_0,01OV002,01OV007,01OV008,01OV010,01OV013,01OV017,01OV018,01OV019,01OV023,01OV026,...,26OV008,26OV009,26OV010,26OV011,26OV013,chromosome,start_bp,end_bp,arm,min
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A1BG,-0.124800,0.074800,-0.991900,-0.478500,-0.083000,-0.587100,-0.0770,0.223900,0.0825,0.383723,...,0.076300,0.205669,-0.602800,0.235100,-1.226100,19,58345178.0,58353492.0,q,58345178.0
A1CF,0.066700,0.136400,-0.003900,-0.123900,0.019500,0.013100,-0.1067,0.172400,-0.5631,-0.132200,...,0.154900,0.266400,0.089700,0.207300,0.140600,10,50799409.0,50885675.0,q,50799409.0
A3GALT2,0.081300,0.047500,-0.018100,0.806700,-0.161200,0.145500,-0.0401,0.220700,0.0888,0.102900,...,-0.189000,-0.201700,-0.418400,0.243300,-0.081000,1,33306766.0,33321098.0,p,33306766.0
A4GALT,-0.156200,-0.222500,-0.434800,-0.272900,-0.216300,-0.122600,-0.2866,-0.393200,-0.6430,-0.031600,...,-0.841500,-0.318800,0.130200,0.544000,-0.104100,22,42692121.0,42721298.0,q,42692121.0
A4GNT,0.081200,0.056000,-0.392100,0.165800,0.329000,0.787800,0.0284,0.461300,0.1775,-0.107800,...,-0.385200,0.173300,-0.006300,0.490700,0.300300,3,138123713.0,138132390.0,q,138123713.0
AAAS,-0.048600,0.143100,-0.000700,0.018500,-0.196300,0.179300,0.2417,-0.404600,0.1399,-0.112500,...,0.117000,0.158100,0.637400,0.096200,0.172000,12,53307456.0,53324864.0,q,53307456.0
AACS,-0.170700,0.049300,-0.030700,-0.106300,-0.277400,0.166900,0.0773,-0.456800,0.1296,-0.161800,...,-0.003000,0.096400,-0.464000,-0.312100,-0.222500,12,125065434.0,125143333.0,q,125065434.0
AADAC,-0.864400,0.204400,0.316600,0.165800,0.652900,0.730500,0.1513,0.241300,0.3698,-0.091900,...,-0.232000,0.276200,0.307200,0.669400,0.760100,3,151814073.0,151828488.0,q,151814073.0
AADACL2,0.267600,0.204400,0.316600,0.165800,0.652900,0.730500,0.1513,0.241300,0.3698,-0.091900,...,-0.232000,0.276200,0.307200,0.669400,0.760100,3,151733916.0,151761339.0,q,151733916.0
AADACL3,0.080900,0.064200,0.521600,-0.265000,-0.193814,-0.524100,0.0249,0.287600,0.3469,0.092200,...,-0.023970,-0.165400,0.350800,0.206500,-0.074900,1,12716110.0,12728760.0,p,12716110.0
