PhosphoPICK input

In [1]:
import pandas as pd
from kstar.prune import run_pruning, save_pruning, save_run_information, Pruner
from kstar import config, helpers

log = helpers.get_logger('kstar_log', "pruning.log")
#set other parameters 
phospho_type = 'ST'    #type of phosphomodification to generate networks for ('Y', 'ST')
kinase_size = 2000     #minimum number of substrates all kinases must be connected to
site_limit = 10       #maximum number of kinases a particular substrate can be connected to
num_networks = 50     #number of pruned networks to generate
network_id = 'PhsphoPICK_pruning'
use_compendia = True  #indicates whether kinases are forced to be connected to the same substrate study bias distribution
acc_col = 'substrate_acc' # column that contains Uniprot Accession ID of each substrate in the weighted network
site_col = 'site' # column that contains residue and site number of each substrate in the weighted network
netcols_todrop = None
odir = './PRUNED NETWORKS/PhosphoPICK' #where the final pruned networks should be saved
PROCESSES = 4		#number of cores to use (> 1 for multiprocessing)


#Load networks and define parameters used during the pruning run
network_file = './PRUNED NETWORKS/PhosphoPICK/PhosphoPICK_2020-02-26_all_matrix.csv'
network = pd.read_csv(network_file, na_values=['-'])

In [2]:
pruner = run_pruning(network, log, phospho_type, kinase_size, site_limit, num_networks, network_id, odir, use_compendia, 
	acc_col = acc_col, site_col = site_col, netcols_todrop = netcols_todrop, PROCESSES = PROCESSES)
pruner.save_networks()
pruner.save_run_information()

Following non-numeric columns were identified: substrate_id, substrate_name, substrate_acc, site, pep
It is assumed that these columns do not contain kinase-substrate weights and were removed. If this incorrect, please make sure weight containing columns only contain numeric values
Duplicate entries found in network, removing repeat rows


KSDB input

In [2]:
import pandas as pd
from kstar.prune import run_pruning, save_pruning, save_run_information, Pruner
from kstar import config, helpers

log = helpers.get_logger('kstar_log', "pruning.log")
#set other parameters 
phospho_type = 'ST'    #type of phosphomodification to generate networks for ('Y', 'ST')
kinase_size = 30     #minimum number of substrates all kinases must be connected to
site_limit = 15       #maximum number of kinases a particular substrate can be connected to
num_networks = 100     #number of pruned networks to generate
network_id = 'KSDB_pruning'
use_compendia = False  #indicates whether kinases are forced to be connected to the same substrate study bias distribution
acc_col = 'substrate_acc' # column that contains Uniprot Accession ID of each substrate in the weighted network
site_col = 'site' # column that contains residue and site number of each substrate in the weighted network
netcols_todrop = None
odir = './PRUNED NETWORKS/KSDB' #where the final pruned networks should be saved
PROCESSES = 4		#number of cores to use (> 1 for multiprocessing)


#Load networks and define parameters used during the pruning run
network_file = './KSDB_prune_input_for_KSTAR.txt'
network = pd.read_csv(network_file, sep = "\t", keep_default_na = True)
pruner = Pruner(network, log, phospho_type, acc_col = acc_col, site_col = site_col, nonweight_cols = netcols_todrop)

filtered_kinases = ['substrate_acc', 'site']
for kinase in pruner.kinases:
    if sum((pruner.network[kinase] > 0).to_list()) >= 50:
        filtered_kinases += [kinase]

network = network[filtered_kinases]

Following non-numeric columns were identified: substrate_acc, site
It is assumed that these columns do not contain kinase-substrate weights and were removed. If this incorrect, please make sure weight containing columns only contain numeric values
Duplicate entries found in network, removing repeat rows


In [5]:
#run and save pruning
pruner = run_pruning(network, log, phospho_type, kinase_size, site_limit, num_networks, network_id, odir, use_compendia, 
	acc_col = acc_col, site_col = site_col, netcols_todrop = netcols_todrop, PROCESSES = PROCESSES)
pruner.save_networks()
pruner.save_run_information()

Following non-numeric columns were identified: substrate_acc, site
It is assumed that these columns do not contain kinase-substrate weights and were removed. If this incorrect, please make sure weight containing columns only contain numeric values
Duplicate entries found in network, removing repeat rows


In [6]:
import pandas as pd
from kstar.prune import run_pruning, save_pruning, save_run_information, Pruner
from kstar import config, helpers

log = helpers.get_logger('kstar_log', "pruning.log")
#set other parameters 
phospho_type = 'ST'    #type of phosphomodification to generate networks for ('Y', 'ST')
kinase_size = 50     #minimum number of substrates all kinases must be connected to
site_limit = 15       #maximum number of kinases a particular substrate can be connected to
num_networks = 20     #number of pruned networks to generate
network_id = 'KSDB_pruning'
use_compendia = False  #indicates whether kinases are forced to be connected to the same substrate study bias distribution
acc_col = 'substrate_acc' # column that contains Uniprot Accession ID of each substrate in the weighted network
site_col = 'site' # column that contains residue and site number of each substrate in the weighted network
netcols_todrop = None
odir = './PRUNED NETWORKS/KSDB' #where the final pruned networks should be saved
PROCESSES = 4		#number of cores to use (> 1 for multiprocessing)


#Load networks and define parameters used during the pruning run
network_file = './KSDB_prune_input_for_KSTAR.txt'
network = pd.read_csv(network_file, sep = "\t", keep_default_na = True)
pruner = Pruner(network, log, phospho_type, acc_col = acc_col, site_col = site_col, nonweight_cols = netcols_todrop)

filtered_kinases = ['substrate_acc', 'site']
for kinase in pruner.kinases:
    if sum((pruner.network[kinase] > 0).to_list()) >= kinase_size + 10:
        filtered_kinases += [kinase]

network = network[filtered_kinases]

Following non-numeric columns were identified: substrate_acc, site
It is assumed that these columns do not contain kinase-substrate weights and were removed. If this incorrect, please make sure weight containing columns only contain numeric values
Duplicate entries found in network, removing repeat rows


In [7]:
#run and save pruning
pruner = run_pruning(network, log, phospho_type, kinase_size, site_limit, num_networks, network_id, odir, use_compendia, 
	acc_col = acc_col, site_col = site_col, netcols_todrop = netcols_todrop, PROCESSES = PROCESSES)
pruner.save_networks()
pruner.save_run_information()

Following non-numeric columns were identified: substrate_acc, site
It is assumed that these columns do not contain kinase-substrate weights and were removed. If this incorrect, please make sure weight containing columns only contain numeric values
Duplicate entries found in network, removing repeat rows
