# Contingency Tests

Which events occur together? We will use a fishers exact test  to determine which (if any) events occur together.

In [1]:
import pandas as pd
import scipy.stats as stats
import statsmodels.stats.multitest
from statsmodels.graphics.mosaicplot import mosaic
import matplotlib.pyplot as plt

In [2]:
CANCER_TYPES = ['brca', 'ccrcc', 'colon', 'endo', 'gbm', 'hnscc', 'lscc', 'luad', 'ovarian']

In [3]:
odds_list = list()
pvalues = list()
cancer_types = list()
event1 = list()
event2 = list()
for cancer in CANCER_TYPES:
    df = pd.read_csv(f"{cancer}_all_events.tsv", sep='\t', index_col=0)
    for x in range(len(df.columns)-1):
        for y in range(x+1, len(df.columns)):
            col1 = df.columns[x]
            col2 = df.columns[y]
            event = pd.crosstab(df[col1],df[col2])
            try:
                odds, pvalue = stats.fisher_exact(event)
                odds_list.append(odds)
                pvalues.append(pvalue)
                cancer_types.append(cancer)
                event1.append(col1)
                event2.append(col2)
#                 labelizer = lambda k: {('False', 'False'): f"No Event \nn={event.loc[k[0]=='True',k[1]=='True']}",
#                                       ('False','True'): f"{col2} \nn={event.loc[k[0]=='True',k[1]=='True']}",
#                                       ('True', 'False'): f"{col1} \nn={event.loc[k[0]=='True',k[1]=='True']}",
#                                       ('True', 'True'): f"Both Events \nn={event.loc[k[0]=='True',k[1]=='True']}"}[k]
#                 mosaic(df, [col1, col2], title=f'{cancer.upper()} ({col1}, {col2})', axes_label=False, labelizer=labelizer)
#                 plt.text(0.2, -0.1, f"Odds: {odds}")
#                 plt.text(0.2, -0.2, f"Pvalue: {pvalue}")
            except:
                odds_list.append(None)
                pvalues.append(None)
                cancer_types.append(cancer)
                event1.append(col1)
                event2.append(col2)
results = pd.DataFrame({"odds": odds_list, "pvalues": pvalues, "cancer": cancer_types, "event1": event1, "event2": event2})

In [4]:
list(results['pvalues'])

[0.7427821082360245,
 0.10841977524678842,
 0.8196542748668726,
 1.0,
 0.008952969303426074,
 0.11803996351355131,
 0.036088832952280075,
 0.14503129713030422,
 0.5198042605055643,
 0.27531514896034803,
 0.4102254167295094,
 0.0061298160378585095,
 0.4102254167295094,
 1.0,
 0.2146876444357845,
 nan,
 0.19205874972539136,
 0.0494025353509711,
 0.12072149151364649,
 8.610549860704719e-05,
 nan,
 nan,
 nan,
 nan,
 0.03373455695101811,
 0.0016984396630398578,
 0.1223598741648887,
 0.0014092497421768718,
 0.21255391433556267,
 0.061004572201791334,
 0.12877638672542235,
 0.008618667366069387,
 0.3743380719475296,
 nan,
 0.5591244398331112,
 0.7497479040377537,
 0.4942335941839887,
 nan,
 1.0,
 0.02752778760031328,
 nan,
 0.23248664288263118,
 nan,
 0.0987318219193987,
 nan,
 0.022974388613950673,
 0.04116605859191566,
 1.0,
 0.009789401437705448,
 0.17953978976267032,
 0.0842162326006529,
 0.069074666038114,
 1.2564609578316005e-05,
 0.4590370893985266,
 0.013643940607435727,
 0.0252730045

In [5]:
## Apply Correction Values
results = results.dropna(subset=['pvalues'])
print(list(results.pvalues))
results['pvalues'] = statsmodels.stats.multitest.multipletests(results['pvalues'], method='fdr_bh')[1]

[0.7427821082360245, 0.10841977524678842, 0.8196542748668726, 1.0, 0.008952969303426074, 0.11803996351355131, 0.036088832952280075, 0.14503129713030422, 0.5198042605055643, 0.27531514896034803, 0.4102254167295094, 0.0061298160378585095, 0.4102254167295094, 1.0, 0.2146876444357845, 0.19205874972539136, 0.0494025353509711, 0.12072149151364649, 8.610549860704719e-05, 0.03373455695101811, 0.0016984396630398578, 0.1223598741648887, 0.0014092497421768718, 0.21255391433556267, 0.061004572201791334, 0.12877638672542235, 0.008618667366069387, 0.3743380719475296, 0.5591244398331112, 0.7497479040377537, 0.4942335941839887, 1.0, 0.02752778760031328, 0.23248664288263118, 0.0987318219193987, 0.022974388613950673, 0.04116605859191566, 1.0, 0.009789401437705448, 0.17953978976267032, 0.0842162326006529, 0.069074666038114, 1.2564609578316005e-05, 0.4590370893985266, 0.013643940607435727, 0.02527300454453992, 0.16482177178979013, 0.030775026367989615, 0.030775026367989615, 0.3313752355813965, 0.174337260

In [6]:
results[results.pvalues < 0.1].sort_values(['event1', 'event2'])

Unnamed: 0,odds,pvalues,cancer,event1,event2
48,44.0,0.073421,endo,5_gain_event,13_loss_event
4,3.952381,0.072323,brca,5_gain_event,20_gain_event
19,19.529412,0.003444,ccrcc,5_gain_event,20_gain_event
75,5.789474,0.010938,hnscc,5_gain_event,5_loss_event
105,5.632184,0.094735,luad,5_gain_event,5_loss_event
31,5.601852,0.072323,colon,5_gain_event,8_gain_event
52,106.25,0.000754,endo,5_loss_event,13_loss_event
82,3.948718,0.068697,hnscc,5_loss_event,13_loss_event
97,8.870588,8.2e-05,lscc,5_loss_event,13_loss_event
112,9.655172,0.037993,luad,5_loss_event,13_loss_event


In [7]:
results.pivot(columns="cancer", index=['event1', 'event2'])

Unnamed: 0_level_0,Unnamed: 1_level_0,odds,odds,odds,odds,odds,odds,odds,odds,odds,pvalues,pvalues,pvalues,pvalues,pvalues,pvalues,pvalues,pvalues,pvalues
Unnamed: 0_level_1,cancer,brca,ccrcc,colon,endo,gbm,hnscc,lscc,luad,ovarian,brca,ccrcc,colon,endo,gbm,hnscc,lscc,luad,ovarian
event1,event2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2
13_loss_event,20_gain_event,1.809783,4.928571,,3.36,0.784615,0.0,0.942308,2.029412,0.321429,0.396346,0.221656,,0.546236,0.990226,0.546236,1.0,0.615338,0.234478
5_gain_event,13_loss_event,0.923077,3.291667,,44.0,8.823529,1.228235,0.831933,2.842105,0.826446,1.0,0.284441,,0.073421,0.103867,0.990226,0.905982,0.103867,0.990226
5_gain_event,20_gain_event,3.952381,19.529412,1.6,8.7,0.393548,1.109375,1.773333,2.121212,1.888889,0.072323,0.003444,0.771206,0.353193,0.890417,1.0,0.545223,0.614581,0.545223
5_gain_event,5_loss_event,1.307692,,3.272727,24.285714,,5.789474,2.142857,5.632184,2.527778,0.967417,,0.291569,0.125315,,0.010938,0.238914,0.094735,0.284441
5_gain_event,8_gain_event,2.317073,2.761364,5.601852,16.4,9.0,1.591837,2.04,2.185507,1.195804,0.282834,0.365826,0.072323,0.164664,0.349808,0.59106,0.311623,0.275922,0.990226
5_gain_event,8_loss_event,0.807453,2.852679,1.84375,0.0,0.0,2.282738,0.933333,1.808612,1.463415,0.992301,0.18526,0.59106,1.0,1.0,0.272911,1.0,0.36539,0.814047
5_loss_event,13_loss_event,2.24,,,106.25,,3.948718,8.870588,9.655172,0.667932,0.316432,,,0.000754,,0.068697,8.2e-05,0.037993,0.688983
5_loss_event,20_gain_event,0.475362,,1.0,2.025,,3.904762,1.127764,5.9375,1.0,0.725308,,1.0,0.671762,,0.284441,0.990226,0.272911,1.0
5_loss_event,8_gain_event,2.444444,,1.261905,4.277778,,2.280936,1.551724,5.25,1.0,0.284441,,0.967417,0.259127,,0.263057,0.528481,0.103867,1.0
5_loss_event,8_loss_event,3.327273,,1.794118,7.904762,,1.52381,2.363636,2.535714,1.054825,0.149333,,0.697742,0.230249,,0.647925,0.144577,0.349808,1.0
