# Run ThermalTracks pipline

## Import functions

In [None]:
import pandas as pd
import numpy as np
import warnings
import sys
from pathlib import Path
import random

# Import Thermal Tracks modules
project_root = Path().resolve().parents[0]
src_path = project_root / 'src'
sys.path.append(str(src_path))

from run_full_process import full_gp_process

# Ignore all warnings
warnings.filterwarnings("ignore") 

# Load example TPP-TR data from [Mateus et al. (2018)](https://www.embopress.org/doi/full/10.15252/msb.20188242)  

In [16]:
# Load example data (E.coli cell lysate - Mateus et al. (2018)
example_data_path = project_root / 'Example_data/input_dataframes'

# Load the data
csv_path = example_data_path / "Mateus_MgCl2_2018" / "ecoli_lysate_mgcl2_tpp_scaled_df.csv"
tpp_ecoli_df = pd.read_csv(csv_path)

tpp_ecoli_df = pd.read_csv(rf"{example_data_path}/Mateus_MgCl2_2018/ecoli_lysate_mgcl2_tpp_scaled_df.csv")
tpp_ecoli_df = tpp_ecoli_df[['condition', 'uniqueID', 'y_FC_Scaling', 'x']]
tpp_ecoli_df = tpp_ecoli_df.rename(columns={"y_FC_Scaling": "y"})

# For testing generate a list with 10 proteins at random
unique_ids = tpp_ecoli_df['uniqueID'].unique().tolist()
random_ids = random.sample(unique_ids, 200)

# Produce a reduced TPP-TR dataset with the 200 proteins
tpp_ecoli_df = tpp_ecoli_df[tpp_ecoli_df['uniqueID'].isin(random_ids)]
tpp_ecoli_df.head(5)

Unnamed: 0,condition,uniqueID,y,x
2100,lysate,P02413-(RPLO),1.0,37.0
2101,lysate,P02413-(RPLO),1.169526,40.4
2102,lysate,P02413-(RPLO),1.055569,46.9
2103,lysate,P02413-(RPLO),0.699179,52.9
2104,lysate,P02413-(RPLO),0.107548,58.6


# Run Thermal Tracks for $E. coli$ lysate treated with $MgCl_2$

In [20]:
import gpytorch

# Create Result folder
results_dir = Path(example_data_path) / "Mateus_MgCl2_2018" / "results"
results_dir.mkdir(parents=True, exist_ok=True)

# Set parameters
parameters = {
"result_dir" : rf"{results_dir}/",
"subset_test" : False,
"lengthscale_prior": gpytorch.priors.GammaPrior(10, 1),
"lengthscale_minconstraint" : 'max',
"lengthscale_mult" : 1, 
"control_condition": "lysate",
"perturbation" : "lysate_MgCl2",
"training_iterations" : 150, 
"learningRate" : 0.1,
"amsgrad" : False, 
"n_predictions" : 50, 
"create_plots" : False, 
"exclude_poor_fits" : False,
"samples_per_id" : 10}
warnings.filterwarnings("ignore")

# run
full_gp_process(tpp_ecoli_df, parameters)

- [x] 1. Build and fit full model
- [x] 2. Create a joint model and null dataset
- [x] 3. Evaluate and predict models
- [x] 4. Build and fit null model
- [x] 5. Compute likelihood ratio test statistics
- [x] 6. Combine and create result files
