# KSTAR Example Analysis using BCR-ABL

In [1]:
import pandas as pd

from kstar.activity import kstar_activity
from kstar.mapper import experiment_mapper
from kstar import helpers, config

import pickle

In [2]:
odir = '/Users/bj8th/Documents/GitHub/KSTAR/analysis/BCR-ABL'
name = 'BCR-ABL'

## 1. Load pre-mapped experiment and map experiment


In [3]:
# Load Data
pre_mapped = pd.read_csv('/Users/bj8th/Documents/GitHub/KSTAR/analysis/BCR-ABL/PRE-MAPPED/experiment.1704.tsv', sep = '\t')
pre_mapped.rename(columns={'average:data:treated_to_untreated:EOE(drug washout)' : 'EOE',
       'average:data:treated_to_untreated:HDP3(3hrs post treatment)' :'HDP3',
       'average:data:treated_to_untreated:HDP6(6hrs post treatment)' :'HDP6',
       'average:data:treated_to_untreated:pre-treatment' : 'PRE'}, inplace=True)

In [4]:
#set column names dictionary, need at least a site and accession or peptide/accession, or all three
map_dict = {'peptide':'aligned_peptides', 'accession_id':'query_accession'}

if not os.path.exists(f"{odir}/MAPPED_DATA"): 
    os.mkdir(f"{odir}/MAPPED_DATA")   
mapping_log = helpers.get_logger(f"mapping_{name}", f"{odir}/MAPPED_DATA/mapping_{name}.log")
exp_mapper = experiment_mapper.ExperimentMapper(experiment = pre_mapped,
                                                columns = map_dict, 
                                                logger = mapping_log)
exp_mapper.experiment.to_csv(f"{odir}/MAPPED_DATA/{name}_mapped.tsv", sep = '\t', index = False)



## 2. Run Kinase Activity Analysis on Experiment

In [5]:
# experiment = pd.read_csv('/Users/bj8th/Documents/GitHub/KSTAR/analysis/BCR-ABL/MAPPED_DATA/BCR-ABL_mapped.tsv', sep = '\t')
experiment = exp_mapper.experiment

In [6]:
activity_log = helpers.get_logger(name, f'{odir}/RESULTS/{name}_kstar_activity.log')

In [7]:
phospho_types = ['Y'] #running on this type of kinase/substrate network

#preamble, setup the network dictionary. Here, using the default pickles from config
# only have to load one of these if running analysis on only one substrate type
networks = {}
networks['Y'] = pickle.load(open(config.NETWORK_Y_PICKLE, "rb" ) )
#networks['ST'] = pickle.load(open(config.NETWORK_ST_PICKLE, "rb" ) )

In [10]:
experiment

Unnamed: 0,MS_id,query_accession,gene,locus,protein_name,species,peptide,mod_sites,gene_site,aligned_peptides,modification_types,EOE,HDP3,HDP6,PRE,KSTAR_ACCESSION,KSTAR_PEPTIDE,KSTAR_SITE,KSTAR_NUM_COMPENDIA,KSTAR_NUM_COMPENDIA_CLASS
0,6086967,P18433,PTPRA,,Receptor-type tyrosine-protein phosphatase alpha,homo sapiens,Y798,Y798,PTPRA_Y798,YIDAFSDyANFK,Phosphotyrosine,0.1468,0.0000,0.0000,1.0,P18433,YIDAFSDyANFK,Y798,4,2
1,6086968,P68363,TUBA1B,,Tubulin alpha-1B chain,homo sapiens,Y357,Y357,TUBA1B_Y357,GFKVGINyQPPTVVP,Phosphotyrosine,1.1824,0.9501,0.9253,1.0,P68363,GFKVGINyQPPTVVP,Y357,2,1
2,6086969,P50395,GDI2,,Rab GDP dissociation inhibitor beta,homo sapiens,Y203,Y203,GDI2_Y203,DYLDQPCyETINRIK,Phosphotyrosine,0.9973,0.8615,0.9303,1.0,P50395,DYLDQPCyETINRIK,Y203,4,2
3,6086970,Q96HC4,PDLIM5,,PDZ and LIM domain protein 5,homo sapiens,Y251,Y251,PDLIM5_Y251,VERYTEFyHVPTHSD,Phosphotyrosine,0.0010,0.0269,0.0680,1.0,Q96HC4,VERYTEFyHVPTHSD,Y251,4,2
4,6086971,P57075,UBASH3A,,Ubiquitin-associated and SH3 domain-containing...,homo sapiens,Y9,Y9,UBASH3A_Y9,AAGETQLyAKVSNKL,Phosphotyrosine,0.0481,0.0025,0.0038,1.0,P57075,AAGETQLyAKVSNKL,Y9,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
156,6087125,P60900,PSMA6,,Proteasome subunit alpha type-6,homo sapiens,Y159,Y159,PSMA6_Y159,YKCDPAGyYCGFKAT,Phosphotyrosine,0.1595,3.3971,2.8981,1.0,P60900,YKCDPAGyYCGFKAT,Y159,3,1
157,6087126,P21980,TGM2,,Protein-glutamine gamma-glutamyltransferase 2,homo sapiens,Y369,Y369,TGM2_Y369,QEKSEGTyCCGPVPV,Phosphotyrosine,0.8797,0.8897,0.8507,1.0,P21980,QEKSEGTyCCGPVPV,Y369,4,2
158,6087127,P49840,GSK3A,,Glycogen synthase kinase-3 alpha,homo sapiens,Y279,Y279,GSK3A_Y279,RGEPNVSyICSRYYR,Phosphotyrosine,0.9997,0.9837,0.9561,1.0,P49840,RGEPNVSyICSRYYR,Y279,5,2
159,6087128,P51692,STAT5B,,Signal transducer and activator of transcripti...,homo sapiens,yYTPVPCESATAK,Y682,STAT5B_Y682,KDEVYSKyYTPVPCE,Phosphotyrosine,1.2137,0.7546,0.0329,1.0,P51692,KDEVYSKyYTPVPCE,Y682,2,1


In [11]:
agg = 'mean'
threshold = 0.5
greater = True
kinact_dict = kstar_activity.run_kstar_analysis(experiment, activity_log, networks, phospho_types = phospho_types, data_columns = None, agg =agg, threshold = threshold,  greater = greater)

ValueError: No objects to concatenate

## 3. Normalize experiment activity results

In [8]:
rand_experiments, rand_kinact = kstar_activity.normalize_analysis(kinact_dict, activity_log, 2)

# 4. Save Results