# Chromosome 8 Cis-Effects

Here we run t-tests on all the proteins on chromosome 8 against the 2 events (gain of 8q and loss of 8p)

## Import Libraries and download data

In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import cptac
from scipy import stats

In [6]:
brca = cptac.Brca()
hnscc = cptac.Hnscc()
luad = cptac.Luad()
ovarian = cptac.Ovarian()
colon = cptac.Colon()
lscc = cptac.Lscc()

Checking that luad index is up-to-date... 



Checking that ovarian index is up-to-date...



version 3scc v3.2.......                    
                            



## Get proteomic and CNV Event data for each cancer type

In [7]:
brca_has_event = pd.read_csv("../brca_has_event.csv")
hnscc_has_event = pd.read_csv("../hnscc_has_event.csv")
luad_has_event = pd.read_csv("../luad_has_event.csv")
ovarian_has_event = pd.read_csv("../ovarian_has_event.csv")
colon_has_event = pd.read_csv("../colon_has_event.csv")
lscc_has_event = pd.read_csv("../lscc_has_event.csv")

In [8]:
brca_prot = brca.get_proteomics()
hnscc_prot = hnscc.get_proteomics()
luad_prot = luad.get_proteomics()
ovarian_prot = ovarian.get_proteomics()
colon_prot = colon.get_proteomics()
lscc_prot = lscc.get_proteomics()

## Get Genes on Chromosome 8

In [9]:
import pyensembl
ensembl = pyensembl.EnsemblRelease()

In [10]:
def add_chromo_and_loc(df):
    chromo = list()
    not_found = list()
    count = 0
    for gene in list(df.index.get_level_values(0)):
        count += 1
        try:
            e_gene = ensembl.genes_by_name(gene)
            chromo.append(e_gene[0].contig)
        except:
            not_found.append(gene)
            chromo.append(None)
    df['chromo'] = chromo
    print(len(not_found)/count)
    return df

In [11]:
brca_prot_loc = add_chromo_and_loc(brca_prot.transpose())
hnscc_prot_loc = add_chromo_and_loc(hnscc_prot.transpose())
luad_prot_loc = add_chromo_and_loc(luad_prot.transpose())
ovarian_prot_loc = add_chromo_and_loc(ovarian_prot.transpose())
colon_prot_loc = add_chromo_and_loc(colon_prot.transpose())
lscc_prot_loc = add_chromo_and_loc(lscc_prot.transpose())

0.03225487286039379
0.01353882833787466
0.014861201981493597
0.019932622122403144
0.027271600347093094
0.013995680345572354


In [12]:
# Get just those on chromosome 8
brca_prot_8 = brca_prot_loc[brca_prot_loc.chromo == '8']
brca_prot_8.drop('chromo', axis=1, inplace=True)

hnscc_prot_8 = hnscc_prot_loc[hnscc_prot_loc.chromo == '8']
hnscc_prot_8.drop('chromo', axis=1, inplace=True)

luad_prot_8 = luad_prot_loc[luad_prot_loc.chromo == '8']
luad_prot_8.drop('chromo', axis=1, inplace=True)

ovarian_prot_8 = ovarian_prot_loc[ovarian_prot_loc.chromo == '8']
ovarian_prot_8.drop('chromo', axis=1, inplace=True)

colon_prot_8 = colon_prot_loc[colon_prot_loc.chromo == '8']
colon_prot_8.drop('chromo', axis=1, inplace=True)

lscc_prot_8 = lscc_prot_loc[lscc_prot_loc.chromo == '8']
lscc_prot_8.drop('chromo', axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


## Append Event Info

In [13]:
brca_prot_8 = brca_prot_8.transpose()
hnscc_prot_8 = hnscc_prot_8.transpose()
luad_prot_8 = luad_prot_8.transpose()
ovarian_prot_8 = ovarian_prot_8.transpose()
colon_prot_8 = colon_prot_8.transpose()
lscc_prot_8 = lscc_prot_8.transpose()

In [14]:
brca_has_event.set_index('Patient_ID', inplace=True)
hnscc_has_event.set_index('Patient_ID', inplace=True)
luad_has_event.set_index('Patient_ID', inplace=True)
ovarian_has_event.set_index('Patient_ID', inplace=True)
colon_has_event.set_index('Patient_ID', inplace=True)
lscc_has_event.set_index('Patient_ID', inplace=True)

In [15]:
brca_prot_and_event = brca_prot_8.join(brca_has_event)
hnscc_prot_and_event = hnscc_prot_8.join(hnscc_has_event)
luad_prot_and_event = luad_prot_8.join(luad_has_event)
ovarian_prot_and_event = ovarian_prot_8.join(ovarian_has_event)
colon_prot_and_event = colon_prot_8.join(colon_has_event)
lscc_prot_and_event = lscc_prot_8.join(lscc_has_event)



In [16]:
brca_prot_8 = brca_prot_and_event.dropna(subset=['gain_event', 'loss_event'], how='all')
hnscc_prot_8 = hnscc_prot_and_event.dropna(subset=['gain_event', 'loss_event'], how='all')
luad_prot_8 = luad_prot_and_event.dropna(subset=['gain_event', 'loss_event'], how='all')
ovarian_prot_8 = ovarian_prot_and_event.dropna(subset=['gain_event', 'loss_event'], how='all')
colon_prot_8 = colon_prot_and_event.dropna(subset=['gain_event', 'loss_event'], how='all')
lscc_prot_8 = lscc_prot_and_event.dropna(subset=['gain_event', 'loss_event'], how='all')

## Get Gain and Loss Events

In [17]:
brca_gain_event = list(map(bool, brca_prot_8['gain_event']))
hnscc_gain_event = list(map(bool, hnscc_prot_8['gain_event']))
luad_gain_event = list(map(bool, luad_prot_8['gain_event']))
ovarian_gain_event = list(map(bool, ovarian_prot_8['gain_event']))
colon_gain_event = list(map(bool, colon_prot_8['gain_event']))
lscc_gain_event = list(map(bool, lscc_prot_8['gain_event']))

In [18]:
brca_loss_event = list(map(bool, brca_prot_8['loss_event']))
hnscc_loss_event = list(map(bool, hnscc_prot_8['loss_event']))
luad_loss_event = list(map(bool, luad_prot_8['loss_event']))
ovarian_loss_event = list(map(bool, ovarian_prot_8['loss_event']))
colon_loss_event = list(map(bool, colon_prot_8['loss_event']))
lscc_loss_event = list(map(bool, lscc_prot_8['loss_event']))

In [19]:
brca_prot_8.drop(['gain_event', 'loss_event'], inplace=True, axis=1)
hnscc_prot_8.drop(['gain_event', 'loss_event'], inplace=True, axis=1)
luad_prot_8.drop(['gain_event', 'loss_event'], inplace=True, axis=1)
ovarian_prot_8.drop(['gain_event', 'loss_event'], inplace=True, axis=1)
colon_prot_8.drop(['gain_event', 'loss_event'], inplace=True, axis=1)
lscc_prot_8.drop(['gain_event', 'loss_event'], inplace=True, axis=1)

## Run T-tests

In [20]:
def t_test(col, event):
    has_event = col[event]
    no_event = col[[not i for i in event]]
    return stats.ttest_ind(has_event, no_event)
    

In [21]:
brca_t_test_gain = brca_prot_8.apply(lambda x: t_test(x, brca_gain_event), result_type='expand').transpose()
hnscc_t_test_gain = hnscc_prot_8.apply(lambda x: t_test(x, hnscc_gain_event), result_type='expand').transpose()
luad_t_test_gain = luad_prot_8.apply(lambda x: t_test(x, luad_gain_event), result_type='expand').transpose()
ovarian_t_test_gain = ovarian_prot_8.apply(lambda x: t_test(x, ovarian_gain_event), result_type='expand').transpose()
colon_t_test_gain = colon_prot_8.apply(lambda x: t_test(x, colon_gain_event), result_type='expand').transpose()
lscc_t_test_gain = lscc_prot_8.apply(lambda x: t_test(x, lscc_gain_event), result_type='expand').transpose()

  return (a < x) & (x < b)
  return (a < x) & (x < b)
  cond2 = cond0 & (x <= _a)


In [22]:
brca_t_test_loss = brca_prot_8.apply(lambda x: t_test(x, brca_loss_event), result_type='expand').transpose()
hnscc_t_test_loss = hnscc_prot_8.apply(lambda x: t_test(x, hnscc_loss_event), result_type='expand').transpose()
luad_t_test_loss = luad_prot_8.apply(lambda x: t_test(x, luad_loss_event), result_type='expand').transpose()
ovarian_t_test_loss = ovarian_prot_8.apply(lambda x: t_test(x, ovarian_loss_event), result_type='expand').transpose()
colon_t_test_loss = colon_prot_8.apply(lambda x: t_test(x, colon_loss_event), result_type='expand').transpose()
lscc_t_test_loss = lscc_prot_8.apply(lambda x: t_test(x, lscc_loss_event), result_type='expand').transpose()

In [23]:
brca_t_test_gain.index = [x[0] for x in list(brca_t_test_gain.index)]
luad_t_test_gain.index = [x[0] for x in list(luad_t_test_gain.index)]
ovarian_t_test_gain.index = [x[0] for x in list(ovarian_t_test_gain.index)]
lscc_t_test_gain.index = [x[0] for x in list(lscc_t_test_gain.index)]

In [24]:
brca_t_test_loss.index = [x[0] for x in list(brca_t_test_loss.index)]
luad_t_test_loss.index = [x[0] for x in list(luad_t_test_loss.index)]
ovarian_t_test_loss.index = [x[0] for x in list(ovarian_t_test_loss.index)]
lscc_t_test_loss.index = [x[0] for x in list(lscc_t_test_loss.index)]

In [25]:
luad_t_test_gain = luad_t_test_gain.rename(columns={0:"luad_stat", 1:"luad_pvalue"})
ovarian_t_test_gain = ovarian_t_test_gain.rename(columns={0:"ovarian_stat", 1:"ovarian_pvalue"})
brca_t_test_gain = brca_t_test_gain.rename(columns={0:"brca_stat", 1:"brca_pvalue"})
lscc_t_test_gain = lscc_t_test_gain.rename(columns={0:"lscc_stat", 1:"lscc_pvalue"})
hnscc_t_test_gain = hnscc_t_test_gain.rename(columns={0:"hnscc_stat", 1:"hnscc_pvalue"})
colon_t_test_gain = colon_t_test_gain.rename(columns={0:"colon_stat", 1:"colon_pvalue"})

In [26]:
luad_t_test_loss = luad_t_test_loss.rename(columns={0:"luad_stat", 1:"luad_pvalue"})
ovarian_t_test_loss = ovarian_t_test_loss.rename(columns={0:"ovarian_stat", 1:"ovarian_pvalue"})
brca_t_test_loss = brca_t_test_loss.rename(columns={0:"brca_stat", 1:"brca_pvalue"})
lscc_t_test_loss = lscc_t_test_loss.rename(columns={0:"lscc_stat", 1:"lscc_pvalue"})
hnscc_t_test_loss = hnscc_t_test_loss.rename(columns={0:"hnscc_stat", 1:"hnscc_pvalue"})
colon_t_test_loss = colon_t_test_loss.rename(columns={0:"colon_stat", 1:"colon_pvalue"})

In [27]:
results_gain = colon_t_test_gain.join(brca_t_test_gain).join(luad_t_test_gain).join(hnscc_t_test_gain).join(ovarian_t_test_gain).join(lscc_t_test_gain)

In [28]:
results_loss = colon_t_test_loss.join(brca_t_test_loss).join(luad_t_test_loss).join(hnscc_t_test_loss).join(ovarian_t_test_loss).join(lscc_t_test_loss) 

## Subset the rows where all cancer types have sig pvalue

In [29]:
def all_low_pvalues(row):
    return ((row['luad_pvalue'] < 0.1) & (row['ovarian_pvalue'] < 0.05) & (row['hnscc_pvalue'] < 0.1) & (row['colon_pvalue'] < 0.1) & (row['brca_pvalue'] < 0.1) & (row['lscc_pvalue'] < 0.1))

In [30]:
common_low_pvalues_gain = results_gain[results_gain.apply(lambda x: all_low_pvalues(x), axis=1)]

In [31]:
common_low_pvalues_loss = results_loss[results_loss.apply(lambda x: all_low_pvalues(x), axis=1)]

In [32]:
gain_proteins = list(common_low_pvalues_gain.index)
loss_proteins = list(common_low_pvalues_loss.index)

In [33]:
print("Gain Proteins: ")
print(gain_proteins)

Gain Proteins: 
['AGO2', 'C8orf82', 'CPNE3', 'DECR1', 'ESRP1', 'ESRP1', 'GGH', 'HGH1', 'LACTB2', 'MRPL15', 'NDRG1', 'NUDCD1', 'POP1', 'PRKDC', 'PTK2', 'PUF60', 'PUF60', 'PUF60', 'PUF60', 'PUF60', 'PUF60', 'RBM12B', 'RIDA', 'RMDN1', 'RRS1', 'SCRIB', 'SCRIB', 'TATDN1', 'TATDN1', 'YTHDF3', 'YWHAZ']


In [34]:
print("Loss Proteins")
print(loss_proteins)

Loss Proteins
['AGPAT5', 'ATP6V1B2', 'ATP6V1H', 'CCAR2', 'CCDC25', 'CHMP7', 'KIF13B', 'NAT1', 'PCM1', 'PPP2R2A', 'XPO7']
