# KSTAR Example Analysis using BCR-ABL

## Way 1 : Use run_star_analysis to do the following
1. Map data
2. Run KSTAR analysis 
3. Generate random experiments
4. Find FPR p-value
5. Normalize data
6. Save all data

In [1]:
import pandas as pd
from kstar import kstar_runner, helpers

odir = '/Users/bj8th/Documents/GitHub/KSTAR/analysis/BCR-ABL'
name = 'BCR-ABL'
run_log = helpers.get_logger(f"{name}_run", f"{odir}/{name}_run.log")

experiment = pd.read_csv('/Users/bj8th/Documents/GitHub/KSTAR/analysis/BCR-ABL/PRE-MAPPED/experiment.1704.tsv', sep = '\t')
experiment.rename(columns={'average:data:treated_to_untreated:EOE(drug washout)' : 'EOE',
       'average:data:treated_to_untreated:HDP3(3hrs post treatment)' :'HDP3',
       'average:data:treated_to_untreated:HDP6(6hrs post treatment)' :'HDP6',
       'average:data:treated_to_untreated:pre-treatment' : 'PRE'}, inplace=True)

map_columns = {'peptide':'aligned_peptides', 'accession_id':'query_accession'}
data_columns = ['EOE','HDP3','HDP6','PRE']
window = 7
phospho_types = ['Y']
activity_agg = 'mean'
threshold = 0.5
greater = True
normalize = True
num_random_experiments = 2
target_alpha = 0.05

kstar_runner.run_kstar_analysis(
    run_log, 
    odir, 
    name, 
    experiment, 
    data_columns, 
    map_columns, 
    window, 
    phospho_types, 
    activity_agg, 
    threshold, 
    greater, 
    normalize,
    num_random_experiments, 
    target_alpha
)



## Way 2 : Manually Run STAR analysis

In [1]:
import pandas as pd
from kstar import kstar_runner

from kstar.activity import kstar_activity
from kstar.mapper import experiment_mapper
from kstar import helpers, config

import pickle

In [2]:
odir = '/Users/bj8th/Documents/GitHub/KSTAR/analysis/BCR-ABL'
name = 'BCR-ABL'

### 1. Load pre-mapped experiment and map experiment


In [3]:
# Load Data
pre_mapped = pd.read_csv('/Users/bj8th/Documents/GitHub/KSTAR/analysis/BCR-ABL/PRE-MAPPED/experiment.1704.tsv', sep = '\t')
pre_mapped.rename(columns={'average:data:treated_to_untreated:EOE(drug washout)' : 'EOE',
       'average:data:treated_to_untreated:HDP3(3hrs post treatment)' :'HDP3',
       'average:data:treated_to_untreated:HDP6(6hrs post treatment)' :'HDP6',
       'average:data:treated_to_untreated:pre-treatment' : 'PRE'}, inplace=True)

In [4]:
#set column names dictionary, need at least a site and accession or peptide/accession, or all three
map_dict = {'peptide':'aligned_peptides', 'accession_id':'query_accession'}
data_columns = ['EOE','HDP3','HDP6','PRE']

if not os.path.exists(f"{odir}/MAPPED_DATA"): 
    os.mkdir(f"{odir}/MAPPED_DATA")   
mapping_log = helpers.get_logger(f"mapping_{name}", f"{odir}/MAPPED_DATA/mapping_{name}.log")
exp_mapper = experiment_mapper.ExperimentMapper(experiment = pre_mapped,
                                                columns = map_dict, 
                                                logger = mapping_log,
                                                data_columns = data_columns)
exp_mapper.experiment.to_csv(f"{odir}/MAPPED_DATA/{name}_mapped.tsv", sep = '\t', index = False)



### 2. Run Kinase Activity Analysis on Experiment

In [5]:
# experiment = pd.read_csv('/Users/bj8th/Documents/GitHub/KSTAR/analysis/BCR-ABL/MAPPED_DATA/BCR-ABL_mapped.tsv', sep = '\t')
experiment = exp_mapper.experiment

In [6]:
#get logger for analysis
if not os.path.exists(f"{odir}/RESULTS"): 
    os.mkdir(f"{odir}/RESULTS") 
activity_log = helpers.get_logger(name, f'{odir}/RESULTS/{name}_kstar_activity.log')

In [7]:
phospho_types = ['Y'] #running on this type of kinase/substrate network

#preamble, setup the network dictionary. Here, using the default pickles from config
# only have to load one of these if running analysis on only one substrate type
networks = {}
networks['Y'] = pickle.load(open(config.NETWORK_Y_PICKLE, "rb" ) )
#networks['ST'] = pickle.load(open(config.NETWORK_ST_PICKLE, "rb" ) )

In [8]:
agg = 'mean'
threshold = 0.5
greater = True
kinact_dict = kstar_activity.run_kstar_analysis(experiment, activity_log, networks, phospho_types = phospho_types, data_columns = None, agg =agg, threshold = threshold,  greater = greater)

### 3. Normalize experiment activity results

Unnamed: 0_level_0,data:EOE,data:HDP3,data:HDP6,data:PRE
Kinase Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ABL1,0.000536,0.003023,0.00122,1.324631e-06
ABL2,0.000788,0.001762,0.001137,6.324278e-07
BLK,0.051136,0.078854,0.026532,0.01095167
BMX,0.073528,0.156551,0.031621,0.008024622
BTK,0.032667,0.265317,0.004215,0.002742062
CSF1R,0.065534,0.160495,0.026521,0.0002546213
EGFR,0.069633,0.171836,0.126768,0.003029107
EPHA1,0.071883,0.069989,0.056011,0.01614127
EPHA2,0.066034,0.031628,0.027317,0.002986685
EPHA3,0.031664,0.033446,0.012608,0.005133009


In [10]:
num_random_experiments=2
target_alpha=0.05
kstar_activity.normalize_analysis(kinact_dict, activity_log, num_random_experiments, target_alpha)

### 4. Save Results

In [None]:
kstar_activity.save_kstar(kinact_dict, name, odir)

In [11]:
kinact = kinact_dict['Y']
kinact.random_experiments.sum()

KSTAR_ACCESSION    Q96IY4Q96IY4Q96IY4Q8NCR9Q8IUK8Q9BX69Q9BX69P313...
KSTAR_SITE         Y156Y159Y380Y218Y116Y169Y178Y140Y162Y529Y590Y6...
data:EOE:0                                                        94
data:EOE:1                                                        94
data:HDP3:0                                                       81
data:HDP3:1                                                       81
data:HDP6:0                                                       92
data:HDP6:1                                                       92
data:PRE:0                                                       161
data:PRE:1                                                       161
dtype: object

In [13]:
experiment = kinact.evidence.groupby(['KSTAR_ACCESSION', 'KSTAR_SITE']).agg(kinact.aggregate)
for col in kinact.data_columns:
    print(f"{col}: {len(experiment[experiment[col] >= kinact.threshold])}")

data:EOE: 94
data:HDP3: 81
data:HDP6: 92
data:PRE: 161
