# Sensitive Model Search

by adjusting `o_random_seed` and `parameter_generation_seed`

## Init

In [6]:
import os

path = os.getcwd()
# find the string 'project' in the path, return index
index_project = path.find('project')
# slice the path from the index of 'project' to the end
project_path = path[:index_project+7]
# set the working directory
os.chdir(project_path+'\src')
print(f'Project path set to: {os.getcwd()}')

Project path set to: c:\Github\new-peak-project\src


In [7]:
from dotenv import dotenv_values
config = dotenv_values(".env")
print(config["DATA_PATH"])

I:\My Drive\DAWSON PHD PROJECT\Biomarker Data Repository\data\new-peak-project\experiments


In [8]:
from models.ModelBuilder import ModelBuilder
from models.Reaction import Reaction
from models.ReactionArchtype import ReactionArchtype
from models.ArchtypeCollections import *
from models.Utils import *

import matplotlib.pyplot as plt
import seaborn as sns
import roadrunner
import numpy as np
import pandas as pd

# import scikit-learn
from sklearn.linear_model import LinearRegression
# tree models and support vector machines
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
# import pearson correlation
from scipy.stats import pearsonr
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

from copy import deepcopy

## Analysis

In [None]:
import os 

### parameters 
notebook_name = 'sensitive_model_search'
sub_id = '1'

## Generation of ground truth model 
model_name = 'v3_small_model_52'
o_random_seed = 6
parameter_random_seed = 6
no_observable_species = 5
no_feedback_regulations = 2
specie_value_range = (5, 5000)
param_range = (0.1, 10)
param_multiplier_range = (0.7, 1.3)


## Simulation parameters 
simulation_time = 500 
simulation_step = 100

## Feature data generation 
feature_generation_method = 'uniform'
feature_generation_extra_params = {'min': 0.1, 'max': 10}
feature_generation_size = 1000 
feature_generation_seed = 50 # if -1 then 'o_random_seed' is used
if feature_generation_seed == -1:
    feature_generation_seed = o_random_seed
    
## Data engineering parameters

# Suboptimal Model Generation 
'''
Options: 
- 'feedback_prune': removes feedback regulations from the model 
'''

# Engineering Feature Data 
data_engineering_method = 'dynamic_feature'
''' 
Options: 
- 'last_time_point' : only the last time point of the phosphorylated species is used
- 'dynamic_feature': computes the characteristic 'ten' dynamic feature for each specie data 
'''

## General parameters
parallelise = True
save_figures = True 
experiment_id = notebook_name + '_' + sub_id
experiment_folder = config['DATA_PATH'] + '/' + experiment_id + '/'
if not os.path.exists(experiment_folder):
    os.makedirs(experiment_folder)
    
print(experiment_folder)

I:\My Drive\DAWSON PHD PROJECT\Biomarker Data Repository\data\new-peak-project\experiments/sensitive_model_search/


In [None]:
o_random_seeds = list(range(1, 11))
parameter_random_seeds = list(range(1, 11))
species_perturbation_range = np.arange(1, 5000, 100)

## Helper functions
import warnings

def sensitivity_analysis(builder: ModelBuilder, specie_name, specie_range, simulation_time, simulation_step):
    all_results = []
    for specie in specie_range:
        builder.set_state(specie_name, specie)
        runner = roadrunner.RoadRunner(builder.get_sbml_model())
        try: 
            res = runner.simulate(0, simulation_time, simulation_step)
            all_results.append(res)
        except Exception as e:
            warnings.warn(f"Simulation failed for specie {specie}: {e}")
            continue
    return all_results

def extract_states_from_results(results, state_name, time_point):
    all_states = []
    for result in results:
        state = result[f'[{state_name}]']
        all_states.append(state[time_point])
    return all_states

def get_sensitivity_score(states):
    state_sensitivity = []  
    for i, specific_states in enumerate(states):
        # get the max and min of the Cp final state list 
        max_state = max(specific_states)
        min_state = min(specific_states)
        # get the range of the Cp final state list 
        range_state = max_state - min_state
        # append to the list 
        state_sensitivity.append(range_state)
    # print the mean of the state sensitivity
    return sum(state_sensitivity) / len(state_sensitivity)

In [20]:
results = []
# use joblib to parallelise the code
from joblib import Parallel, delayed

def run_sensitivity_analysis(o_random_seed, parameter_random_seed, verbose=0):
    # Generate the model 
    model_spec = ModelSpecification()
    model_spec.generate_specifications(o_random_seed, no_observable_species, no_feedback_regulations, verbose=0)
    G0 = model_spec.generate_network(model_name, specie_value_range, param_range, param_multiplier_range, random_seed=parameter_random_seed, verbose=0)
    all_states = []
    for i in range(no_observable_species):
        all_states.append('A'+str(i))
    for i in range(no_observable_species):
        all_states.append('B'+str(i))
                
    all_init_species_results = []
    for init_species in all_states: 
        all_results = sensitivity_analysis(G0, init_species, species_perturbation_range, simulation_time, simulation_step)
        all_init_species_results.append(all_results)

    # extract the last time point of Cp for each init species
    Cp_final_states = []
    for init_species in all_init_species_results: 
        Cp_final_states.append(extract_states_from_results(init_species, 'Cp', -1))
        
    sens_score = get_sensitivity_score(Cp_final_states)
    if verbose: 
        print(f'Random seed: {o_random_seed}, Parameter random seed: {parameter_random_seed}, Sensitivity score: {sens_score}')
    return [o_random_seed, parameter_random_seed, sens_score]





In [21]:
res = run_sensitivity_analysis(5, 5, verbose=1)

Random seed: 5, Parameter random seed: 5, Sensitivity score: 19.519678553312183


In [None]:
results = Parallel(n_jobs=-1)(delayed(run_sensitivity_analysis)(o_random_seed, parameter_random_seed) for o_random_seed in o_random_seeds for parameter_random_seed in parameter_random_seeds)
# convert to pandas dataframe
df = pd.DataFrame(results, columns=['o_random_seed', 'parameter_random_seed', 'sensitivity_score'])

# sort the dataframe by sensitivity score
df = df.sort_values(by='sensitivity_score', ascending=False)
df

In [None]:
# save df to a pickle file
df.to_pickle(experiment_folder + 'sensitivity_analysis_results.pkl')