# Jores et al 2021 Evaluation 
**Authorship:**
Adam Klie (last updated: *06/08/2023*)
***
**Description:**
Notebook to perform a brief evaluation of trained models on the Jores et al (2021) dataset.
***

In [1]:
# General imports
import os
import sys
import glob
import torch
import numpy as np

# EUGENe imports and settings
import eugene as eu
from eugene import dataload as dl
from eugene import models
from eugene import evaluate
from eugene import settings
settings.dataset_dir = "/cellar/users/aklie/data/eugene/revision/jores21"
settings.output_dir = "/cellar/users/aklie/projects/ML4GLand/EUGENe_paper/output/fix_full/jores21"
settings.logging_dir = "/cellar/users/aklie/projects/ML4GLand/EUGENe_paper/logs/fix_full/jores21"
settings.config_dir = "/cellar/users/aklie/projects/ML4GLand/EUGENe_paper/configs/jores21"

# EUGENe packages
import seqdata as sd

# For illustrator editing
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

# Print versions
print(f"Python version: {sys.version}")
print(f"NumPy version: {np.__version__}")
print(f"Eugene version: {eu.__version__}")
print(f"SeqData version: {sd.__version__}")
print(f"PyTorch version: {torch.__version__}")

Python version: 3.9.16 | packaged by conda-forge | (main, Feb  1 2023, 21:39:03) 
[GCC 11.3.0]
NumPy version: 1.23.5
Eugene version: 0.0.8
SeqData version: 0.0.1
PyTorch version: 2.0.0


# Load in the `leaf`, `proto` and `combined` test `SeqData`s 

In [2]:
# Load in the preprocessed test set data
sdata_leaf = sd.open_zarr(os.path.join(settings.dataset_dir, "jores21_leaf_test.zarr"))
sdata_proto = sd.open_zarr(os.path.join(settings.dataset_dir, "jores21_proto_test.zarr"))
sdata_combined = dl.concat_seqdatas([sdata_leaf, sdata_proto], ["leaf", "proto"])

# Get test set predictions for each model

In [24]:
# Predict with each model that was trained
test_sets = {"leaf": sdata_leaf, "proto": sdata_proto, "combined": sdata_combined}
configs = ["cnn.yaml", "hybrid.yaml", "jores21_cnn.yaml", "deepstarr.yaml"]
trials = 5
for test_set in test_sets:
    
    # Grab the current test set
    sdata = test_sets[test_set]

    # Make an output directory for this dataset if it doesn't exist
    if not os.path.exists(os.path.join(settings.output_dir, test_set)):
        os.makedirs(os.path.join(settings.output_dir, test_set))

    # Iterate over the models
    for config in configs:
        model_name = config.split(".")[0]

        # Iterate over the trials
        for trial in range(1, trials+1):
        
            # Print the model name
            print(f"{test_set} {model_name} trial {trial}")

            # Grab the best model from that training run
            model_file = glob.glob(os.path.join(settings.logging_dir, model_name, f"{test_set}_trial_{trial}", "checkpoints", "*"))[0]
            model = models.load_config(config_path=config)
            best_model = models.SequenceModule.load_from_checkpoint(model_file, arch=model.arch)
            evaluate.predictions_sequence_module(
                model=best_model,
                sdata=sdata,
                seq_key="ohe_seq",
                target_keys="enrichment",
                gpus=1,
                batch_size=2048,
                num_workers=4,
                prefetch_factor=2,
                in_memory=True,
                transforms={"ohe_seq": lambda x: torch.tensor(x, dtype=torch.float32).transpose(1, 2), "target": lambda x: torch.tensor(x, dtype=torch.float32)},
                file_label="test",
                name=model_name,
                version=f"{test_set}_trial_{trial}",
                prefix=f"{model_name}_trial_{trial}_"

            )

    # Save the predictions
    pred_keys = [k for k in sdata.data_vars.keys() if "predictions" in k]
    target_keys = ["enrichment"]
    sdata[["id", *target_keys, *pred_keys]].to_dataframe().to_csv(os.path.join(settings.output_dir, test_set, f"jores21_{test_set}_test_predictions.tsv"), sep="\t", index=False)
    sd.to_zarr(sdata, os.path.join(settings.output_dir, test_set, f"jores21_{test_set}_test_predictions.zarr"), load_first=True, mode="w")

[rank: 0] Global seed set to 1


leaf cnn trial 1
Loading ohe_seq and ['enrichment'] into memory


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

[rank: 0] Global seed set to 2


Adding cnn_trial_1_enrichment_predictions to sdata
leaf cnn trial 2
Loading ohe_seq and ['enrichment'] into memory


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

[rank: 0] Global seed set to 3


Adding cnn_trial_2_enrichment_predictions to sdata
leaf cnn trial 3
Loading ohe_seq and ['enrichment'] into memory


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

[rank: 0] Global seed set to 4


Adding cnn_trial_3_enrichment_predictions to sdata
leaf cnn trial 4
Loading ohe_seq and ['enrichment'] into memory


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

[rank: 0] Global seed set to 5


Adding cnn_trial_4_enrichment_predictions to sdata
leaf cnn trial 5
Loading ohe_seq and ['enrichment'] into memory


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

[rank: 0] Global seed set to 1


Adding cnn_trial_5_enrichment_predictions to sdata
leaf hybrid trial 1
Loading ohe_seq and ['enrichment'] into memory


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

[rank: 0] Global seed set to 2


Adding hybrid_trial_1_enrichment_predictions to sdata
leaf hybrid trial 2
Loading ohe_seq and ['enrichment'] into memory


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

[rank: 0] Global seed set to 3


Adding hybrid_trial_2_enrichment_predictions to sdata
leaf hybrid trial 3
Loading ohe_seq and ['enrichment'] into memory


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

[rank: 0] Global seed set to 4


Adding hybrid_trial_3_enrichment_predictions to sdata
leaf hybrid trial 4
Loading ohe_seq and ['enrichment'] into memory


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

[rank: 0] Global seed set to 5


Adding hybrid_trial_4_enrichment_predictions to sdata
leaf hybrid trial 5
Loading ohe_seq and ['enrichment'] into memory


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

[rank: 0] Global seed set to 1


Adding hybrid_trial_5_enrichment_predictions to sdata
leaf jores21_cnn trial 1
Loading ohe_seq and ['enrichment'] into memory


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

[rank: 0] Global seed set to 2


Adding jores21_cnn_trial_1_enrichment_predictions to sdata
leaf jores21_cnn trial 2
Loading ohe_seq and ['enrichment'] into memory


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

[rank: 0] Global seed set to 3


Adding jores21_cnn_trial_2_enrichment_predictions to sdata
leaf jores21_cnn trial 3
Loading ohe_seq and ['enrichment'] into memory


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

[rank: 0] Global seed set to 4


Adding jores21_cnn_trial_3_enrichment_predictions to sdata
leaf jores21_cnn trial 4
Loading ohe_seq and ['enrichment'] into memory


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

[rank: 0] Global seed set to 5


Adding jores21_cnn_trial_4_enrichment_predictions to sdata
leaf jores21_cnn trial 5
Loading ohe_seq and ['enrichment'] into memory


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

[rank: 0] Global seed set to 1


Adding jores21_cnn_trial_5_enrichment_predictions to sdata
leaf deepstarr trial 1
Loading ohe_seq and ['enrichment'] into memory


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

[rank: 0] Global seed set to 2


Adding deepstarr_trial_1_enrichment_predictions to sdata
leaf deepstarr trial 2
Loading ohe_seq and ['enrichment'] into memory


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

[rank: 0] Global seed set to 3


Adding deepstarr_trial_2_enrichment_predictions to sdata
leaf deepstarr trial 3
Loading ohe_seq and ['enrichment'] into memory


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

[rank: 0] Global seed set to 4


Adding deepstarr_trial_3_enrichment_predictions to sdata
leaf deepstarr trial 4
Loading ohe_seq and ['enrichment'] into memory


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

[rank: 0] Global seed set to 5


Adding deepstarr_trial_4_enrichment_predictions to sdata
leaf deepstarr trial 5
Loading ohe_seq and ['enrichment'] into memory


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding deepstarr_trial_5_enrichment_predictions to sdata
proto cnn trial 1


[rank: 0] Global seed set to 1
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding cnn_trial_1_enrichment_predictions to sdata
proto cnn trial 2


[rank: 0] Global seed set to 2
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding cnn_trial_2_enrichment_predictions to sdata
proto cnn trial 3


[rank: 0] Global seed set to 3
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding cnn_trial_3_enrichment_predictions to sdata
proto cnn trial 4


[rank: 0] Global seed set to 4
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding cnn_trial_4_enrichment_predictions to sdata
proto cnn trial 5


[rank: 0] Global seed set to 5
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding cnn_trial_5_enrichment_predictions to sdata
proto hybrid trial 1


[rank: 0] Global seed set to 1
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding hybrid_trial_1_enrichment_predictions to sdata
proto hybrid trial 2


[rank: 0] Global seed set to 2
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

[rank: 0] Global seed set to 3


Adding hybrid_trial_2_enrichment_predictions to sdata
proto hybrid trial 3
Loading ohe_seq and ['enrichment'] into memory


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding hybrid_trial_3_enrichment_predictions to sdata
proto hybrid trial 4


[rank: 0] Global seed set to 4
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding hybrid_trial_4_enrichment_predictions to sdata
proto hybrid trial 5


[rank: 0] Global seed set to 5
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding hybrid_trial_5_enrichment_predictions to sdata
proto jores21_cnn trial 1


[rank: 0] Global seed set to 1
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding jores21_cnn_trial_1_enrichment_predictions to sdata
proto jores21_cnn trial 2


[rank: 0] Global seed set to 2
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding jores21_cnn_trial_2_enrichment_predictions to sdata
proto jores21_cnn trial 3


[rank: 0] Global seed set to 3
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding jores21_cnn_trial_3_enrichment_predictions to sdata
proto jores21_cnn trial 4


[rank: 0] Global seed set to 4
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding jores21_cnn_trial_4_enrichment_predictions to sdata
proto jores21_cnn trial 5


[rank: 0] Global seed set to 5
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding jores21_cnn_trial_5_enrichment_predictions to sdata
proto deepstarr trial 1


[rank: 0] Global seed set to 1
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding deepstarr_trial_1_enrichment_predictions to sdata
proto deepstarr trial 2


[rank: 0] Global seed set to 2
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding deepstarr_trial_2_enrichment_predictions to sdata
proto deepstarr trial 3


[rank: 0] Global seed set to 3
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding deepstarr_trial_3_enrichment_predictions to sdata
proto deepstarr trial 4


[rank: 0] Global seed set to 4
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding deepstarr_trial_4_enrichment_predictions to sdata
proto deepstarr trial 5


[rank: 0] Global seed set to 5
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding deepstarr_trial_5_enrichment_predictions to sdata
combined cnn trial 1


[rank: 0] Global seed set to 1
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding cnn_trial_1_enrichment_predictions to sdata
combined cnn trial 2


[rank: 0] Global seed set to 2
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding cnn_trial_2_enrichment_predictions to sdata
combined cnn trial 3


[rank: 0] Global seed set to 3
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding cnn_trial_3_enrichment_predictions to sdata
combined cnn trial 4


[rank: 0] Global seed set to 4
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding cnn_trial_4_enrichment_predictions to sdata
combined cnn trial 5


[rank: 0] Global seed set to 5
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding cnn_trial_5_enrichment_predictions to sdata
combined hybrid trial 1


[rank: 0] Global seed set to 1
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding hybrid_trial_1_enrichment_predictions to sdata
combined hybrid trial 2


[rank: 0] Global seed set to 2
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

[rank: 0] Global seed set to 3


Adding hybrid_trial_2_enrichment_predictions to sdata
combined hybrid trial 3
Loading ohe_seq and ['enrichment'] into memory


  rank_zero_warn(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding hybrid_trial_3_enrichment_predictions to sdata
combined hybrid trial 4


[rank: 0] Global seed set to 4
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding hybrid_trial_4_enrichment_predictions to sdata
combined hybrid trial 5


[rank: 0] Global seed set to 5
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding hybrid_trial_5_enrichment_predictions to sdata
combined jores21_cnn trial 1


[rank: 0] Global seed set to 1
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding jores21_cnn_trial_1_enrichment_predictions to sdata
combined jores21_cnn trial 2


[rank: 0] Global seed set to 2
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding jores21_cnn_trial_2_enrichment_predictions to sdata
combined jores21_cnn trial 3


[rank: 0] Global seed set to 3
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding jores21_cnn_trial_3_enrichment_predictions to sdata
combined jores21_cnn trial 4


[rank: 0] Global seed set to 4
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding jores21_cnn_trial_4_enrichment_predictions to sdata
combined jores21_cnn trial 5


[rank: 0] Global seed set to 5
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding jores21_cnn_trial_5_enrichment_predictions to sdata
combined deepstarr trial 1


[rank: 0] Global seed set to 1
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding deepstarr_trial_1_enrichment_predictions to sdata
combined deepstarr trial 2


[rank: 0] Global seed set to 2
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding deepstarr_trial_2_enrichment_predictions to sdata
combined deepstarr trial 3


[rank: 0] Global seed set to 3
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding deepstarr_trial_3_enrichment_predictions to sdata
combined deepstarr trial 4


[rank: 0] Global seed set to 4
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding deepstarr_trial_4_enrichment_predictions to sdata
combined deepstarr trial 5


[rank: 0] Global seed set to 5
  rank_zero_warn(


Loading ohe_seq and ['enrichment'] into memory


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

Adding deepstarr_trial_5_enrichment_predictions to sdata


# DONE!

---

# Scratch

In [29]:
for zarr in ["jores21_leaf_test_predictions.zarr", "jores21_proto_test_predictions.zarr", "jores21_combined_test_predictions.zarr"]:
    print(zarr)
    system = zarr.split("_")[1]
    sdata = sd.open_zarr(os.path.join(settings.output_dir, system, zarr))
    print(sdata.dims["_sequence"])
    print(np.unique(sdata["set"].values, return_counts=True))
    if "train_val" in sdata.data_vars:
        print(np.unique(sdata["train_val"].values, return_counts=True))
    else:
        print("No train_val column found")
    print(np.unique(sdata["sp"].values, return_counts=True))
    print(sdata["id"].values[:5])
    print(np.unique(sdata["batch"].values, return_counts=True))

    # Check to see if predictions columns are are there
    print(f"Found {len(sdata[pred_keys].data_vars.keys())} predictions columns")

jores21_leaf_test_predictions.zarr
7154
(array(['test'], dtype=object), array([7154]))
No train_val column found
(array(['At', 'Sb', 'Zm'], dtype=object), array([1686, 2467, 3001]))
['seq000000' 'seq000001' 'seq000002' 'seq000003' 'seq000004']
(array(['leaf'], dtype='<U4'), array([1]))
Found 20 predictions columns
jores21_proto_test_predictions.zarr
7595
(array(['test'], dtype=object), array([7595]))
No train_val column found
(array(['At', 'Sb', 'Zm'], dtype=object), array([1690, 2638, 3267]))
['seq140371' 'seq140372' 'seq140373' 'seq140374' 'seq140375']
(array(['proto'], dtype='<U5'), array([1]))
Found 20 predictions columns
jores21_combined_test_predictions.zarr
14749
(array(['test'], dtype=object), array([14749]))
No train_val column found
(array(['At', 'Sb', 'Zm'], dtype=object), array([3376, 5105, 6268]))
['seq000000' 'seq000001' 'seq000002' 'seq000003' 'seq000004']
(array(['leaf', 'proto'], dtype='<U5'), array([7154, 7595]))
Found 20 predictions columns
