# Analyze Phospho Results for MAPK Signaling

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import gseapy as gp
import re
import sys 

import cptac
import cptac.utils as u
import plot_utils as p

import warnings
warnings.filterwarnings('ignore')

Get tables with MAPK signaling phosphoproteomics.

In [2]:
df = pd.read_csv("csv/phospho_MAPK.csv")
luad_df = pd.read_csv("csv/Single_Cancer/Luad_phospho_MAPK.csv")
e_df = pd.read_csv("csv/Single_Cancer/Endo_phospho_MAPK.csv")
c_df = pd.read_csv("csv/Single_Cancer/Colon_phospho_MAPK.csv")

# Get Count of Significant Sites

In [3]:
print('LUAD')
luad_sig = luad_df.loc[luad_df['Luad_P_Value'] < 0.05]
print('p-val < 0.05 : ', len(luad_sig))
luad_sig_2 = luad_df.loc[luad_df['Luad_P_Value'] < 0.1]
print('p-val < 0.1 : ', len(luad_sig_2))

LUAD
p-val < 0.05 :  20
p-val < 0.1 :  40


In [4]:
print('ENDO')
e_sig = e_df.loc[e_df['Endo_P_Value'] < 0.05]
print('p-val < 0.05 : ', len(e_sig))
e_sig_2 = e_df.loc[e_df['Endo_P_Value'] < 0.1]
print('p-val < 0.1 : ', len(e_sig_2))

ENDO
p-val < 0.05 :  17
p-val < 0.1 :  26


In [5]:
print('sig in both\n')
pval = [0.05, 0.1]
for p in pval:
    sig = df.loc[df['Endo_P_Value'] < p]
    sig = sig.loc[sig['Luad_P_Value'] < p]
    print('p-val < ',p,' : ', len(sig))
    print(list(sig.Phospho), '\n')

sig in both

p-val <  0.05  :  3
['SOS1_S1161', 'MKNK2_S220', 'NFKB2_S858'] 

p-val <  0.1  :  4
['SOS1_S1161', 'MKNK2_S220', 'NFKB2_S858', 'SOS1_S1229'] 



# Get df with colors of legend from Figure

In [6]:
def check_status_color(site, df, cancer):
    df = df.loc[df['Phospho'] == site]
    if float(df[cancer+'_P_Value']) < .05:
        color = 'dark_'
    else:
        color = ''
    if float(df[cancer+'_Median']) < 0:
        color += 'blue'
    elif float(df[cancer+'_Median']) > 0:
        color += 'red'
       
    
    return color
    

In [7]:
e_sig_2['color'] = e_sig_2['Phospho'].apply(lambda x: check_status_color(x, e_sig_2, 'Endo'))
#e_sig_2.loc[e_sig_2['Phospho'] == 'DAXX_S714'] #MKNK1_S401, DAXX_S714
e_sig_2

Unnamed: 0,Phospho,Endo_P_Value,Endo_Median,color
0,MKNK2_S220,0.001348,0.947,dark_red
1,NFKB2_S858,0.001348,0.69285,dark_red
2,RAF1_T330,0.002399,1.12066,dark_red
3,TAB1_S378,0.002916,0.4635,dark_red
4,MKNK1_S401,0.00352,0.478,dark_red
5,MAP3K11_S35,0.00352,0.7352,dark_red
6,STK3_S413,0.008592,0.61475,dark_red
7,CACNB3_S393,0.010474,0.698,dark_red
8,ARAF_S260,0.01137,0.32061,dark_red
9,DAXX_S714,0.012088,0.2689,dark_red


In [8]:
luad_sig_2['color'] = luad_sig_2['Phospho'].apply(lambda x: check_status_color(x, luad_sig_2, 'Luad'))
#luad_sig_2.loc[luad_sig_2['Phospho'] == 'PRKCD_S683']
luad_sig_2

Unnamed: 0,Phospho,Luad_P_Value,Luad_Median,color
0,SOS1_S1161,5.5e-05,1.3025,dark_red
1,MKNK1_S209S214,0.015097,1.49,dark_red
2,MKNK2_S220,0.015097,1.3387,dark_red
3,FLNB_S316,0.015097,0.98345,dark_red
4,FLNB_T2585,0.015097,1.00035,dark_red
5,NFKB2_S222,0.015097,1.2369,dark_red
6,NFKB2_S858,0.015097,1.40475,dark_red
7,MAP3K2_S514,0.015097,0.82415,dark_red
8,FLNB_T519,0.016391,0.7862,dark_red
9,MAP2K2_S222,0.01679,0.69985,dark_red


# Get counts of sites affected for each protein

In [9]:
# proteins affected multiple times p < .1
luad_sig_2['Protein'], luad_sig_2['Site'] = luad_sig_2['Phospho'].str.split('_', 1).str
luad = luad_sig_2.Protein.value_counts().to_frame('Luad_Sig_Phosphosites')

e_sig_2['Protein'], e_sig_2['Site'] = e_sig_2['Phospho'].str.split('_', 1).str
ec = e_sig_2.Protein.value_counts().to_frame('Endo_Sig_Phosphosites')

m = luad.join(ec, how = 'outer').sort_values(by = ['Luad_Sig_Phosphosites', 'Endo_Sig_Phosphosites'], ascending = False)
m.head(7)

m.to_csv('KRAS_Table_1.csv')

In [10]:
# Check - Get all sites for a protein
df['Protein'], df['Site'] = df['Phospho'].str.split('_', 1).str
#df.loc[df['Protein'] == 'NFKB2']

# Check non-NaN measurements in Colon in the MAPK pathway

In [11]:
# does colon have data for proteins sig in other cancers?
e_list = list(e_sig_2.Protein)
luad_list = list(luad_sig_2.Protein)
prot = list(set(e_list + luad_list))
print('Num proteins with sig sites in endo or luad:', len(prot), 'proteins')

prot_sig = c_df[c_df.Protein.isin(prot)].dropna()
print(len(prot_sig.Protein.value_counts()), 'of those proteins found in colon')


Num proteins with sig sites in endo or luad: 36 proteins


AttributeError: 'DataFrame' object has no attribute 'Protein'

In [None]:
# does colon have data for sites sig in other cancers?
e_list = list(e_sig_2.Phospho)
luad_list = list(luad_sig_2.Phospho)
sites = list(set(e_list + luad_list))
print('Num sig sites in endo or luad:', len(sites) ,'sites')

sites_sig = c_df[c_df.Phospho.isin(sites)]
print(len(sites_sig.Phospho.value_counts()), 'found in colon')