## Notebook Initialisation

In [None]:
import os

import numpy as np
import pandas as pd

path = os.getcwd()
# find the string 'project' in the path, return index
index_project = path.find('project')
# slice the path from the index of 'project' to the end
project_path = path[:index_project+7]
# set the working directory
os.chdir(project_path)
print(f'Project path set to: {os.getcwd()}')

In [None]:
from sklearn.datasets import fetch_openml
from sklearn.decomposition import PCA
from sklearn import preprocessing
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Dimension reduction and clustering libraries
import umap
import hdbscan
import sklearn.cluster as cluster
from sklearn.metrics import adjusted_rand_score, adjusted_mutual_info_score

# Plotting 
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme(style="whitegrid")
sns.set_context("talk")

In [None]:
# Bring in CCLE data
from PathLoader import PathLoader
from DataLink import DataLink
path_loader = PathLoader('data_config.env', 'current_user.env')
data_link = DataLink(path_loader, 'data_codes.csv')

In [None]:
# attempt load from loading code

loading_code = 'fgfr4_model_raw_simulation'
simulation_data = data_link.get_data_from_code(loading_code)

In [None]:
loading_code = 'fgfr4_ccle_dynamic_features'
dynamic_features = data_link.get_data_from_code(loading_code)

## Analysis Chain

In [None]:
## CONFIG AREA 
### OPTIONS
# aCbl vs. pSPRY2 vs. pMEK vs. SPRY2
# OR 
# PTP vs aCbl 

# folder output
folder_name = "FGFR4_Dyn_Feats_Sensitivity_Analysis"

# MAIN DEFINITIONS
target_proteins = ['aCbl', 'PTP']
plot_colors = ['red', 'blue']
# file output options 
exp_id = "PTP_control"
# plot options 
dynamic_plot_cols = 2
ncluster_plot_cols = 2
normalise_simulation_dynamics = False
save_figure = True
projection_show_legend = True
## Technical parameters 
fixed_random_seed = 42 # -1 for no seed, not implemented yet
umap_params = {
    # ADD YOUR OWN HERE 
    'random_state': fixed_random_seed
}

hbd_params = {
    # ADD YOUR OWN HERE 
    'min_cluster_size': 500,
    'min_samples': 10,
}

# validation scripts
assert len(target_proteins) >= dynamic_plot_cols, 'Not enough target proteins for the plot'
assert len(target_proteins) >= ncluster_plot_cols, 'Not enough target proteins for the plot'
assert len(target_proteins) == len(plot_colors), 'Not enough plot colors for the plot'
if not os.path.exists(f'{path_loader.get_data_path()}data/results/{folder_name}'):
    os.makedirs(f'{path_loader.get_data_path()}data/results/{folder_name}')

file_save_path = f'{path_loader.get_data_path()}data/results/{folder_name}/'

# save parameters in a text file
with open(f'{file_save_path}{exp_id}_parameters.txt', 'w') as f:
    f.write(f'Fixed random seed: {fixed_random_seed}\n')
    f.write(f'UMAP parameters: {umap_params}\n')
    f.write(f'HDBSCAN parameters: {hbd_params}\n')
    f.write(f'Target proteins: {target_proteins}\n')
    f.write(f'Plot colors: {plot_colors}\n')
    f.write(f'Normalise simulation dynamics: {normalise_simulation_dynamics}\n')
    f.write(f'Projection show legend: {projection_show_legend}\n')
        