# Sensitive Model Search

by adjusting `o_random_seed` and `parameter_generation_seed`

## Init

In [15]:
import os

path = os.getcwd()
# find the string 'project' in the path, return index
index_project = path.find('project')
# slice the path from the index of 'project' to the end
project_path = path[:index_project+7]
# set the working directory
os.chdir(project_path+'\src')
print(f'Project path set to: {os.getcwd()}')

Project path set to: c:\Github\new-peak-project\src


In [16]:
from dotenv import dotenv_values
config = dotenv_values(".env")
print(config["DATA_PATH"])

I:\My Drive\DAWSON PHD PROJECT\Biomarker Data Repository\data\new-peak-project\experiments


In [17]:
from models.ModelBuilder import ModelBuilder
from models.Reaction import Reaction
from models.ReactionArchtype import ReactionArchtype
from models.ArchtypeCollections import *
from models.Utils import *

import matplotlib.pyplot as plt
import seaborn as sns
import roadrunner
import numpy as np
import pandas as pd

# import scikit-learn
from sklearn.linear_model import LinearRegression
# tree models and support vector machines
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
# import pearson correlation
from scipy.stats import pearsonr
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

from copy import deepcopy

## Analysis

In [18]:
import os 

### parameters 
notebook_name = 'sensitive_model_search'
sub_id = '4'

## Model parameters
no_observable_species = 5
no_feedback_regulations = 2
specie_value_range = (5, 5000)
param_range = (0.1, 10)
param_multiplier_range = (0.7, 1.3)
model_name = 'sensitive_model_search'

## Sensitivity analysis parameters 
o_random_seeds = list(range(1,5))
parameter_random_seeds = list(range(1, 2))
species_perturbation_range = np.arange(1, 5000, 500)

## Simulation parameters 
simulation_time = 500 
simulation_step = 100

## General parameters
parallelise = True
save_figures = True 
experiment_id = notebook_name + '_' + sub_id
experiment_folder = config['DATA_PATH'] + '/' + experiment_id + '/'
if not os.path.exists(experiment_folder):
    os.makedirs(experiment_folder)
    
print(experiment_folder)

I:\My Drive\DAWSON PHD PROJECT\Biomarker Data Repository\data\new-peak-project\experiments/sensitive_model_search_4/


In [19]:
## Helper functions
import warnings

from models.SensitivityAnalysis import sensitivity_analysis, get_sensitivity_score, extract_states_from_results

In [20]:
results = []
# use joblib to parallelise the code
from joblib import Parallel, delayed
from models.Solver.RoadrunnerSolver import RoadrunnerSolver

def run_sensitivity_analysis(o_random_seed, parameter_random_seed, verbose=0):
    # Generate the model 
    model_spec = ModelSpecification()
    model_spec.generate_specifications(o_random_seed, no_observable_species, no_feedback_regulations, verbose=0)
    G0 = model_spec.generate_network(model_name, specie_value_range, param_range, param_multiplier_range, random_seed=parameter_random_seed, verbose=0)
    solver = RoadrunnerSolver()
    solver.compile(G0.get_sbml_model())
    all_states = []
    for i in range(no_observable_species):
        all_states.append('A'+str(i))
    for i in range(no_observable_species):
        all_states.append('B'+str(i))
                
    all_init_species_results = []
    for init_species in all_states: 
        all_results = sensitivity_analysis(G0, solver, init_species, species_perturbation_range, simulation_time, simulation_step)
        all_init_species_results.append(all_results)

    # extract the last time point of Cp for each init species
    Cp_final_states = []
    for init_species in all_init_species_results: 
        Cp_final_states.append(extract_states_from_results(init_species, 'Cp', -1))
        
    state_sensitivity = get_sensitivity_score(Cp_final_states)
    sens_score = sum(state_sensitivity) / len(state_sensitivity)
    if verbose: 
        print(f'Random seed: {o_random_seed}, Parameter random seed: {parameter_random_seed}, Sensitivity score: {sens_score}')
    return [o_random_seed, parameter_random_seed, sens_score]





In [21]:
res = run_sensitivity_analysis(5, 5, verbose=1)

Random seed: 5, Parameter random seed: 5, Sensitivity score: 19.478710770239523


In [22]:
results = Parallel(n_jobs=-1)(delayed(run_sensitivity_analysis)(o_random_seed, parameter_random_seed) for o_random_seed in o_random_seeds for parameter_random_seed in parameter_random_seeds)
# convert to pandas dataframe
df = pd.DataFrame(results, columns=['o_random_seed', 'parameter_random_seed', 'sensitivity_score'])

# sort the dataframe by sensitivity score
df = df.sort_values(by='sensitivity_score', ascending=False)
df

Unnamed: 0,o_random_seed,parameter_random_seed,sensitivity_score
2,3,1,11.363381
3,4,1,0.660333
1,2,1,0.004891
0,1,1,0.001584


In [23]:
# save df to a pickle file
df.to_pickle(experiment_folder + 'sensitivity_analysis_results.pkl')
print(f'Saved results to {experiment_folder + "sensitivity_analysis_results.pkl"}')

Saved results to I:\My Drive\DAWSON PHD PROJECT\Biomarker Data Repository\data\new-peak-project\experiments/sensitive_model_search_4/sensitivity_analysis_results.pkl
