**This script contains the figure generating functions for Fig 4**

In [None]:
import os
from pathlib import Path
import json

# 1. Find the Repo Root dynamically
# Walks up folders until it finds the README.md file
_root = next(p for p in Path.cwd().parents if (p / "README.md").exists())
REPO_ROOT = str(_root)

# 2. Add to sys.path so standard 'import' statements work
import sys
if REPO_ROOT not in sys.path:
    sys.path.insert(0, REPO_ROOT)
    
# Load the Data Root from the JSON file
with open(Path(REPO_ROOT) / "data_config.json", "r") as f:
    config = json.load(f)
    DATA_ROOT = config["DATA_ROOT"]

print(f"Data is being pulled from: {DATA_ROOT}")
print(f"Repo root identified as: {REPO_ROOT}")

In [None]:
############### LOADING DATA ###############

%run "$REPO_ROOT/config/predictions/model_load.py"

In [None]:
############### LOADING LATENCY INFERENCE FUNCTIONS ###############

%run "$REPO_ROOT/config/latency/model_latency_inference.py"

In [None]:
############### LOADING IN DATA AND MODELS ###############

import joblib
import torch
import sys

#sys.path.append(f"{REPO_ROOT}/config/latency/")
#from model_latency_inference import run_benchmarks

# ============================================================================
# Validate prerequisites
# ============================================================================

try:
    _ = y_validation, predictions_validation
    print("✓ model_load.py data detected")
except NameError:
    raise SystemExit(
        "\nERROR: Please run model_load.py first!\n"
        "Usage:\n"
        "  %run model_load.py\n"
        "  %run benchmark_latency.py\n"
    )

# ============================================================================
# Build x_validation (TF features) from external data
# ============================================================================

print("\nPreparing validation inputs...")

# Load external validation data
validation_dataset = pd.read_csv(
    f"{DATA_ROOT}/Full data files/Liver_bulk_external.tsv',
    sep='\t', header=0, index_col=0
)

# Load network and TF reference
net = pd.read_csv(
    f"{DATA_ROOT}/Full data files/network(full).tsv',
    sep='\t'
)
tf_expression = pd.read_csv(
    '~/Zhang-Lab/Zhang Lab Data/Full data files/TF(full).tsv',
    sep='\t', header=0, index_col=0
)

# Determine features
network_nodes = set(net['TF'].unique()) | set(net['Gene'].unique())
usable_features = [tf for tf in tf_expression.columns if tf in network_nodes]

# Build x_validation with zero-filling for missing features
x_validation = pd.DataFrame(0, index=validation_dataset.index, columns=usable_features)
present_features = [f for f in usable_features if f in validation_dataset.columns]
x_validation[present_features] = validation_dataset[present_features]

print(f"  x_validation: {x_validation.shape}")
print(f"  y_validation: {y_validation.shape}")

# ============================================================================
# Load models
# ============================================================================

print("\nLoading models...")

sys.path.append(f"{REPO_ROOT}run/model scripts/LEMBAS-RNN/")
from RNN_reconstructor import load_model_from_checkpoint

# MLR
mlr_loaded = joblib.load(
    f"{DATA_ROOT}/Saved models/MLR/MLR_v3/MLR_model_v4(uncentered[FINAL]).joblib'
)

# XGBRF
xgbrf_loaded = joblib.load(
    f"{DATA_ROOT}/Saved models/XGBRF/XGBRF_v5/all_models_batch_XGBRF[uncentered_REALFINAL].joblib'
)

# RNN
RNN_val = load_model_from_checkpoint(
    checkpoint_path=f"{DATA_ROOT}/Saved models/RNN/uncentered_data_RNN/signaling_model.v1.pt',
    net_path=f"{DATA_ROOT}/Full data files/network(full).tsv',
    X_in_df=x_validation,
    y_out_df=y_validation,
    device='cpu',
    use_exact_training_params=True
)

print("✓ All models loaded")

In [None]:
############### PLOTTING STEP ###############

results = run_benchmarks(
    mlr_model=mlr_loaded,
    xgbrf_models=xgbrf_loaded,
    rnn_model=RNN_val,
    X_full=x_validation,
    save_path='/home/christianl/Zhang-Lab/Zhang Lab Figures/inference_latency_corrected.png'
)