**This script contains the figure generating functions for Fig 4**

In [1]:
############### LOADING DATA ###############

%run '/home/christianl/Zhang-Lab/Zhang Lab Code/Boilerplate_datahandling/Remote boilerplate/model_load.py'

Loading pre-computed predictions...
âœ“ Loaded predictions for 3 models
  Training samples: 12748, Genes: 16100
  Test samples: 3187, Genes: 16100
  Val samples: 262, Genes: 16100

âœ“ All functions loaded. Ready for analysis!


In [3]:
############### LOADING LATENCY INFERENCE FUNCTIONS ###############

%run '/home/christianl/Zhang-Lab/Zhang Lab Code/Boilerplate_datahandling/Remote boilerplate/model_latency_inference.py'

In [4]:
############### LOADING IN DATA AND MODELS ###############

import joblib
import torch
import sys

sys.path.append('/home/christianl/Zhang-Lab/Zhang Lab Code/Boilerplate_datahandling/Remote boilerplate/')
from model_latency_inference import run_benchmarks

# ============================================================================
# Validate prerequisites
# ============================================================================

try:
    _ = y_validation, predictions_validation
    print("âœ“ model_load.py data detected")
except NameError:
    raise SystemExit(
        "\nERROR: Please run model_load.py first!\n"
        "Usage:\n"
        "  %run model_load.py\n"
        "  %run benchmark_latency.py\n"
    )

# ============================================================================
# Build x_validation (TF features) from external data
# ============================================================================

print("\nPreparing validation inputs...")

# Load external validation data
validation_dataset = pd.read_csv(
    '/home/christianl/Zhang-Lab/Zhang Lab Data/Full data files/Liver_bulk_external.tsv',
    sep='\t', header=0, index_col=0
)

# Load network and TF reference
net = pd.read_csv(
    '/home/christianl/Zhang-Lab/Zhang Lab Data/Full data files/network(full).tsv',
    sep='\t'
)
tf_expression = pd.read_csv(
    '~/Zhang-Lab/Zhang Lab Data/Full data files/TF(full).tsv',
    sep='\t', header=0, index_col=0
)

# Determine features
network_nodes = set(net['TF'].unique()) | set(net['Gene'].unique())
usable_features = [tf for tf in tf_expression.columns if tf in network_nodes]

# Build x_validation with zero-filling for missing features
x_validation = pd.DataFrame(0, index=validation_dataset.index, columns=usable_features)
present_features = [f for f in usable_features if f in validation_dataset.columns]
x_validation[present_features] = validation_dataset[present_features]

print(f"  x_validation: {x_validation.shape}")
print(f"  y_validation: {y_validation.shape}")

# ============================================================================
# Load models
# ============================================================================

print("\nLoading models...")

sys.path.append('/home/christianl/Zhang-Lab/Zhang Lab Code/Tuning/uncentered_RNN_tuning')
from RNN_reconstructor import load_model_from_checkpoint

# MLR
mlr_loaded = joblib.load(
    '/home/christianl/Zhang-Lab/Zhang Lab Data/Saved models/MLR/MLR_v3/MLR_model_v4(uncentered[FINAL]).joblib'
)

# XGBRF
xgbrf_loaded = joblib.load(
    '/home/christianl/Zhang-Lab/Zhang Lab Data/Saved models/XGBRF/XGBRF_v5/all_models_batch_XGBRF[uncentered_REALFINAL].joblib'
)

# RNN
RNN_val = load_model_from_checkpoint(
    checkpoint_path='/home/christianl/Zhang-Lab/Zhang Lab Data/Saved models/RNN/uncentered_data_RNN/signaling_model.v1.pt',
    net_path='/home/christianl/Zhang-Lab/Zhang Lab Data/Full data files/network(full).tsv',
    X_in_df=x_validation,
    y_out_df=y_validation,
    device='cpu',
    use_exact_training_params=True
)

print("âœ“ All models loaded")

âœ“ model_load.py data detected

Preparing validation inputs...
  x_validation: (262, 1197)
  y_validation: (262, 16100)

Loading models...
LOADING MODEL - EXACT TRAINING SCRIPT SEQUENCE

1. Loading checkpoint from: /home/christianl/Zhang-Lab/Zhang Lab Data/Saved models/RNN/uncentered_data_RNN/signaling_model.v1.pt

2. Loading network from: /home/christianl/Zhang-Lab/Zhang Lab Data/Full data files/network(full).tsv
   Network shape: (1153904, 3)
   Network columns: ['TF', 'Gene', 'Interaction']

3. Formatting network...

4. Using EXACT benchmark.py parameters
   projection_amplitude_in: 1.2
   projection_amplitude_out: 1.2
   bionet_params: {'target_steps': 150, 'max_steps': 10, 'exp_factor': 50, 'tolerance': 1e-20, 'leak': 0.01}

5. Initializing model with DataFrames...
   Input X_in shape: (262, 1197)
   Input y_out shape: (262, 16100)
  Filtered X_in: 1197 â†’ 1197 features
  Filtered y_out: 16100 â†’ 16100 features
   âœ“ Model initialized (data automatically filtered)

6. Converti

In [7]:
############### PLOTTING STEP ###############

results = run_benchmarks(
    mlr_model=mlr_loaded,
    xgbrf_models=xgbrf_loaded,
    rnn_model=RNN_val,
    X_full=x_validation,
    save_path='/home/christianl/Zhang-Lab/Zhang Lab Figures/inference_latency_corrected.png'
)


 ðŸš€ STARTING DUAL-MODE BENCHMARK

[Mode A] TRUE LATENCY (Input shape: (1, 1197))
------------------------------------------------------------
  Testing MLR...
  Testing XGBRFRegressor (17 sub-models)...


KeyboardInterrupt: 