# Using pandas to construct the table for dysregulated proteins from core table for each cell line

In [3]:
import numpy as np
import pandas as pd

<b>
Constant definitions for the different values :
    
- inhibitors of some kinases
- time points
- stimuli
</b>

In [40]:
cell_lines = ['BT20','BT549','MCF7','UACC812']
inhibitor_dict = {'AKTi' : 'GSK690693', 'FGFR1' : 'PD173074'}
list_of_activators = ['EGF', 'FGF1', 'HGF', 'IGF1', 'INS', 'NRG1','PBS','Serum']
DMSO = 'DMSO'
T = ['0min', '5min', '15min', '30min', '60min', '2hr', '4hr']
input_files_path = '../00_InputData/experimental/CSV/'

In [63]:
def read_clean(cell_line):
    df = pd.read_csv(input_files_path+cell_line+'_main.csv',skiprows=(2 if cell_line != 'UACC812' else 3))
    df.columns = list(df.iloc[0,:])[:4] + list(df.columns[4:])
    df = df.drop(0)
    df = df.drop('Cell Line',axis=1)
    df['Stimulus'] = df['Stimulus'].fillna('None')
    df['Stimulus'] = df['Stimulus'].map(lambda x : ('INS' if x == 'Insulin' else x ))
    return df

In [59]:
def get_map_antibody_genes(cell_line):
    df = pd.read_csv(input_files_path+'BT20'+'_main.csv')
    antibody_names = [el.upper() for el in list(df.iloc[0,:])[4:]]
    gene_names = list(df.iloc[1,:])[4:]
    return {ant:gene for ant,gene in zip(antibody_names,gene_names)}

In [60]:
def from_col_to_row(df_tmp):
    l = []
    df_tmpc = pd.DataFrame(columns = df_tmp.columns)
    for el in df_tmp.index:
        splitDot = el.split('.')
        for prot in splitDot:
            splitProt = prot.split('_')
            for site in splitProt[1:]:
                if site[0] in ['p','P'] and (not site[1].isdigit()):
                    l.append(splitProt[0]+'_'+site[1:])
                    df_tmpc = pd.concat([df_tmpc,df_tmp.loc[[el]]])
                elif (not site[0].isdigit()) and (site[0] != 'p'):
                    l.append(splitProt[0]+'_'+site)
                    df_tmpc = pd.concat([df_tmpc,df_tmp.loc[[el]]])
    df_tmpc.index = l
    return df_tmpc

## <b> Construct tables for the different cell lines and activators under DMSO</b>
These tables will be used to call the differentially phosphorylated proteins using limma's t-test.

And then build the networks using both OBRWR and PHONEMeS.

In [64]:
for cell_line in cell_lines:
    df = read_clean(cell_line)
    df_control = df[(df['Inhibitor'] == DMSO) & (df['Timepoint'] == '0min')]
    for activator in list_of_activators:
        #Extracting DMSO vs Stimuli
        df_act = df[(df['Inhibitor'] == DMSO) & (df['Stimulus'] == activator)]
        df_tmp = pd.concat([df_control,df_act])
        df_tmp = df_tmp.drop('Inhibitor',axis=1)
        df_tmp['Stimulus'] = df_tmp['Stimulus'] + '.' + df_tmp['Timepoint']
        df_tmp = df_tmp.drop('Timepoint',axis=1)
        df_tmp.set_index('Stimulus',inplace=True)
        df_tmp.index.name = None
        df_tmp = df_tmp.transpose()
        # Making the protein names match with PHONEMEs PKN IDs.
        df_act = from_col_to_row(df_tmp)
        df_act.to_csv('../00_InputData/stimuli/'+cell_line+'_'+activator+'.tsv',sep='\t')