# Introduction
This notebook is for setting new preferred names in the globalKinaseMap, or change the preferred names in the prediction data.

In [1]:
import pandas as pd

In [2]:
# file location
# local (../../), sammas (/Volumes/naegle_lab/Kinase Predictions/)
base = '/Volumes/naegle_lab/Kinase Predictions/'

# Prediction data type:
#. 'all' = all human phosphosites
#. '2exp' = with > 2 experimental evidence
#. '3exp' = with > 3 experimental evidence
d_type = '3exp'


In [18]:
# the globalKinaseMap
KinaseMap = base + 'Data/Map/globalKinaseMap.csv' 

# prediction data
PhosphoPICK = base + 'Data/Final/PhosphoPICK/PhosphoPICK_2020-02-26_' + d_type + '_matrix.csv'
NetworKIN = base + 'Data/Final/NetworKIN/NetworKIN_2020-02-26_' + d_type + '_matrix.csv' 
GPS = base + 'Data/Final/GPS5.0/GPS5_2020-02-26_' + d_type + '_matrix.csv'

# output 
pp_out = base + 'Data/comparison/Thresh/PhosphoPICK/'+ d_type + '/PhosphoPICK.csv'
nw_out = base + 'Data/comparison/Thresh/NetworKIN/'+ d_type + '/NetworKIN.csv'       
gps_out = base + 'Data/comparison/Thresh/GPS5.0/'+ d_type + '/GPS.csv'


In [13]:
def change_name(file, output, type):
    """
    Set new preferred names in the globalKinaseMap, or change the preferred names in the prediction data
    
    Parameters
    ----------
    file: input file location
    output: output file location
    type:
        'change' = change the preferred names in the prediction data for downstream analysis
        'set' = set new preferred names
    """
    df = pd.read_csv(file)
    for key in name_change:
        # set the Preferred Names in the globalKinaseMap, the default Preferred Name = Kinase Name
        if type == 'set':
            df.loc[df['Kinase Name'] == key, ["Preferred Name"]] = name_change[key]
        # change the Kinase Name in the prediction data sets to Preferred Names
        elif type == 'change':
            df = df.rename(columns={key: name_change[key]})
    
    df.to_csv(output, chunksize = 1000000, index = False)

### Set Preferred Names that are different from the common Kinase Name
| Kinase Name | Preferred Name | 
|-------------|----------------|
| EGFR        | ERBB1          | 
| TEK         | TIE2           | 
| FLT1        | VEGFR1         | 
| KDR         | VEGFR2         | 
| FLT4        | VEGFR3         | 

In [5]:
# create a dictionary for the name change 
name_change = {'EGFR' : 'ERBB1',
               'TEK' : 'TIE2',
               'FLT1': 'VEGFR1',
               'KDR':'VEGFR2',
               'FLT4':'VEGFR3'}

In [6]:
# Set the preferred names 
change_name(KinaseMap, KinaseMap, 'set')

In [20]:
# Change the preferred names for downstream analysis
# PhosphoPICK
change_name(PhosphoPICK, pp_out, 'change')

# NetworKIN
change_name(NetworKIN, nw_out, 'change')

# GPS5.0
change_name(GPS, gps_out, 'change')
