# KSTAR Example Analysis using BCR-ABL

## Way 1 : Use run_star_analysis to do the following
1. Map data
2. Run KSTAR analysis 
3. Generate random experiments
4. Find FPR p-value
5. Normalize data
6. Save all data

In [1]:
import pandas as pd
from kstar import kstar_runner, helpers

odir = '/Users/bj8th/Documents/GitHub/KSTAR/analysis/BCR-ABL'
name = 'BCR-ABL'
run_log = helpers.get_logger(f"{name}_run", f"{odir}/{name}_run.log")

experiment = pd.read_csv('/Users/bj8th/Documents/GitHub/KSTAR/analysis/BCR-ABL/PRE-MAPPED/experiment.1704.tsv', sep = '\t')
experiment.rename(columns={'average:data:treated_to_untreated:EOE(drug washout)' : 'EOE',
       'average:data:treated_to_untreated:HDP3(3hrs post treatment)' :'HDP3',
       'average:data:treated_to_untreated:HDP6(6hrs post treatment)' :'HDP6',
       'average:data:treated_to_untreated:pre-treatment' : 'PRE'}, inplace=True)

map_columns = {'peptide':'aligned_peptides', 'accession_id':'query_accession'}
data_columns = ['EOE','HDP3','HDP6','PRE']
window = 7
phospho_types = ['Y']
activity_agg = 'mean'
threshold = 0.5
greater = True
normalize = True
num_random_experiments = 2
target_alpha = 0.05

kstar_runner.run_kstar_analysis(
    run_log, 
    odir, 
    name, 
    experiment, 
    data_columns, 
    map_columns, 
    window, 
    phospho_types, 
    activity_agg, 
    threshold, 
    greater, 
    normalize,
    num_random_experiments, 
    target_alpha
)



AttributeError: 'KinaseActivity' object has no attribute 'rand_kinact'

In [3]:
import pickle
kinact_dict = pickle.load(open('/Users/bj8th/Documents/GitHub/KSTAR/analysis/BCR-ABL/RESULTS/BCR-ABL_kinact.p', "rb" ) )

In [8]:
kinact = kinact_dict['Y']
kinact.normalized_summary

## Way 2 : Manually Run STAR analysis

In [1]:
import pandas as pd
from kstar import kstar_runner

from kstar.activity import kstar_activity
from kstar.mapper import experiment_mapper
from kstar import helpers, config

import pickle

In [2]:
odir = '/Users/bj8th/Documents/GitHub/KSTAR/analysis/BCR-ABL'
name = 'BCR-ABL'

### 1. Load pre-mapped experiment and map experiment


In [3]:
# Load Data
pre_mapped = pd.read_csv('/Users/bj8th/Documents/GitHub/KSTAR/analysis/BCR-ABL/PRE-MAPPED/experiment.1704.tsv', sep = '\t')
pre_mapped.rename(columns={'average:data:treated_to_untreated:EOE(drug washout)' : 'EOE',
       'average:data:treated_to_untreated:HDP3(3hrs post treatment)' :'HDP3',
       'average:data:treated_to_untreated:HDP6(6hrs post treatment)' :'HDP6',
       'average:data:treated_to_untreated:pre-treatment' : 'PRE'}, inplace=True)

In [4]:
#set column names dictionary, need at least a site and accession or peptide/accession, or all three
map_dict = {'peptide':'aligned_peptides', 'accession_id':'query_accession'}
data_columns = ['EOE','HDP3','HDP6','PRE']

if not os.path.exists(f"{odir}/MAPPED_DATA"): 
    os.mkdir(f"{odir}/MAPPED_DATA")   
mapping_log = helpers.get_logger(f"mapping_{name}", f"{odir}/MAPPED_DATA/mapping_{name}.log")
exp_mapper = experiment_mapper.ExperimentMapper(experiment = pre_mapped,
                                                columns = map_dict, 
                                                logger = mapping_log,
                                                data_columns = data_columns)
exp_mapper.experiment.to_csv(f"{odir}/MAPPED_DATA/{name}_mapped.tsv", sep = '\t', index = False)



### 2. Run Kinase Activity Analysis on Experiment

In [5]:
# experiment = pd.read_csv('/Users/bj8th/Documents/GitHub/KSTAR/analysis/BCR-ABL/MAPPED_DATA/BCR-ABL_mapped.tsv', sep = '\t')
experiment = exp_mapper.experiment

In [6]:
activity_log = helpers.get_logger(name, f'{odir}/RESULTS/{name}_kstar_activity.log')

In [7]:
phospho_types = ['Y'] #running on this type of kinase/substrate network

#preamble, setup the network dictionary. Here, using the default pickles from config
# only have to load one of these if running analysis on only one substrate type
networks = {}
networks['Y'] = pickle.load(open(config.NETWORK_Y_PICKLE, "rb" ) )
#networks['ST'] = pickle.load(open(config.NETWORK_ST_PICKLE, "rb" ) )

In [9]:
agg = 'mean'
threshold = 0.5
greater = True
kinact_dict = kstar_activity.run_kstar_analysis(experiment, activity_log, networks, phospho_types = phospho_types, data_columns = None, agg =agg, threshold = threshold,  greater = greater)

### 3. Normalize experiment activity results

In [10]:
num_random_experiments=2
target_alpha=0.05
for phospho_type, kinact in kinact_dict.items():
    kinact.run_normalization(num_random_experiments, target_alpha)

In [8]:
rand_experiments, rand_kinact = kstar_activity.normalize_analysis(kinact_dict, activity_log, 2)

### 4. Save Results