In [4]:
# testing uncentered-trained RNN 
# importing pytorch
import torch
import pandas as pd
from RNN_reconstructor import load_model_from_checkpoint
from scipy.stats import pearsonr


In [None]:
# running data config file
%run '/home/christianl/Zhang-Lab/Zhang Lab Code/Remote boilerplate/uncentered_RNN_remote.py'


In [None]:
# reconstructing trained RNN from checkpoint file with learned weights, 
# the fixed network.tsv and the reconstructor script with the class initalisations 
loaded_RNN = load_model_from_checkpoint(
                checkpoint_path='/home/christianl/Zhang-Lab/Zhang Lab Data/Saved models/RNN/signaling_model.v1.pt',
                net_path='/home/christianl/Zhang-Lab/Zhang Lab Data/Full data files/network(full).tsv',
                X_in_df=x_test_df,  # passing as df not tensors
                y_out_df=y_test_df,  # passing as df not tensors
                device='cpu',
                use_exact_training_params=True)

LOADING MODEL - EXACT TRAINING SCRIPT SEQUENCE

1. Loading checkpoint from: /home/christianl/Zhang-Lab/Zhang Lab Data/Saved models/RNN/signaling_model.v1.pt

2. Loading network from: /home/christianl/Zhang-Lab/Zhang Lab Data/Full data files/network(full).tsv
   Network shape: (1153904, 3)
   Network columns: ['TF', 'Gene', 'Interaction']

3. Formatting network...

4. Using EXACT benchmark.py parameters
   projection_amplitude_in: 1.2
   projection_amplitude_out: 1.2
   bionet_params: {'target_steps': 150, 'max_steps': 10, 'exp_factor': 50, 'tolerance': 1e-20, 'leak': 0.01}

5. Initializing model with DataFrames...
   Input X_in shape: (3187, 1198)
   Input y_out shape: (3187, 16101)
  Filtered X_in: 1198 → 1197 features
  Filtered y_out: 16101 → 16100 features
   ✓ Model initialized (data automatically filtered)

6. Converting DataFrames to tensors...
   ✓ Tensors created

7. Applying training settings...
   ✓ Set input_layer.weights.requires_grad = False
   ✓ Applied prescale_weights(

In [6]:
# looking across test dataset to see what the RNN's predictive ability is on my data
# y_hat -> predictions made only across the 16,100 target genes we are looking at in our final output (returned)
# y_full -> predictions across all 16,371 network nodes in the .tsv file, including hidden states (intermediary calculations, returned)
# torch.no_grad() -> command to look across but not change the RNN's learned weights

with torch.no_grad():
    Y_hat, Y_full = loaded_RNN(loaded_RNN.X_in)
    
print(f"\nPredictions shape: {Y_hat.shape}")
print(f"Hidden states shape: {Y_full.shape}")


Predictions shape: torch.Size([3187, 16100])
Hidden states shape: torch.Size([3187, 16371])


In [None]:
# calculating Pearson correlation to check how well model is predicting results vs. a groundtruth
# Y_hat -> the cleaned predictions only including the target genes synched with the .tsv file
# loaded_RNN.y_out -> the test set 'y_test_df' with exact same dimensions, used here as a baseline to compare performance 
# data is run through detach().cpu().numpy() to convert from Pytorch tensors to Numpy arrays
# flatten() compresses multidimensional data into a 1D array
# Agg Pearson correlation coefficient of 0.8587 between y_test and y_hat predictions 

pr, _ = pearsonr(
    loaded_RNN.y_out.detach().flatten().cpu().numpy(),
    Y_hat.detach().flatten().cpu().numpy())

print(f"\n" + "=" * 70)
print(f"RESULTS")
print("=" * 70)
print(f"Pearson correlation coefficient: {pr:.4f}")



RESULTS
Pearson correlation coefficient: 0.8587


In [None]:
# Optional: Save predictions
# output_file = "/home/christianl/Zhang-Lab/Zhang Lab Data/predictions.tsv"
pd.DataFrame(Y_hat.detach().cpu().numpy()).to_csv(
    output_file, sep="\t", index=False, header=False
)
print(f"\nPredictions saved to: {output_file}")

** Retrying the code but with centered data to check differences in Agg PCC ** 

In [8]:
%run '/home/christianl/Zhang-Lab/Zhang Lab Code/Remote boilerplate/centered_RNN_remote.py'

In [9]:
centered_loaded_RNN = load_model_from_checkpoint(
                checkpoint_path='/home/christianl/Zhang-Lab/Zhang Lab Data/Saved models/RNN/signaling_model.v1.pt',
                net_path='/home/christianl/Zhang-Lab/Zhang Lab Data/Full data files/network(full).tsv',
                X_in_df=x_test_centered_df,  # passing as df not tensors (centered)
                y_out_df=y_test_centered_df,  # passing as df not tensors (centered)
                device='cpu',
                use_exact_training_params=True)

NameError: name 'x_test_centered_df' is not defined