# Kopp et al 2021 Evaluation 
**Authorship:**
Adam Klie, *08/12/2022*
***
**Description:**
Notebook to perform a brief evaluation of trained models on the Kopp21 et al dataset.
***

In [1]:
if 'autoreload' not in get_ipython().extension_manager.loaded:
    %load_ext autoreload
%autoreload 2

import os
import glob
import logging
import torch
import numpy as np
import pandas as pd
import eugene as eu
import matplotlib.pyplot as plt
import matplotlib

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

Global seed set to 13
  min_coords = np.vstack(data.min(0) for data in polygons_data).min(0)
  max_coords = np.vstack(data.max(0) for data in polygons_data).max(0)
Matplotlib is building the font cache; this may take a moment.


In [2]:
# Configure EUGENe 
eu.settings.dataset_dir = "/cellar/users/aklie/data/eugene/kopp21"
eu.settings.output_dir = "/cellar/users/aklie/projects/EUGENe/EUGENe_paper/output/kopp21"
eu.settings.logging_dir = "/cellar/users/aklie/projects/EUGENe/EUGENe_paper/logs/kopp21"
eu.settings.config_dir = "/cellar/users/aklie/projects/EUGENe/EUGENe_paper/configs/kopp21"
figure_dir = "/cellar/users/aklie/projects/EUGENe/EUGENe_paper/figures/kopp21"
eu.settings.verbosity = logging.ERROR

# Load in the test `SeqData`(s)

In [3]:
# Load in the training data that's been predicted on
sdata_test = eu.dl.read_h5sd(
    filename=os.path.join(eu.settings.dataset_dir, "jund_test_processed.h5sd")
)
sdata_test

SeqData object with = 64309 seqs
seqs = (64309,)
names = (64309,)
rev_seqs = (64309,)
ohe_seqs = (64309, 500, 4)
ohe_rev_seqs = (64309, 500, 4)
seqs_annot: 'chr', 'end', 'start', 'target', 'train_test'
pos_annot: None
seqsm: None
uns: None

# Get test set predictions for each model

In [4]:
# Predict on test set with each model
model_types = ["FCN", "CNN", "RNN", "Hybrid", "Kopp21CNN"]
model_names = ["dsFCN", "dsCNN", "dsRNN", "dsHybrid", "Kopp21CNN"]
trials = 1
for model_name, model_type in zip(model_names, model_types):
    for trial in range(1, trials+1):
        print(f"{model_name} trial {trial}")
        model_file = glob.glob(os.path.join(eu.settings.logging_dir, model_name, f"trial_{trial}", "checkpoints", "*"))[0]
        if model_type == "FCN":
            model = eu.models.FCN.load_from_checkpoint(model_file)
        if model_type == "CNN":
            model = eu.models.CNN.load_from_checkpoint(model_file)
        if model_type == "RNN":
            model = eu.models.RNN.load_from_checkpoint(model_file)
        elif model_type == "Hybrid":
            model = eu.models.Hybrid.load_from_checkpoint(model_file)
        elif model_type == "Kopp21CNN":
            model = eu.models.Kopp21CNN.load_from_checkpoint(model_file)
            
        if model_type == "RNN":
            t_kwargs = transform_kwargs={"transpose": False}
        else:
            t_kwargs = transform_kwargs={"transpose": True}
            
        eu.predict.predictions(
            model,
            sdata=sdata_test,
            target="target",
            name=model_name,
            transform_kwargs=t_kwargs,
            version=f"trial_{trial}",
            file_label="test",
            prefix=f"{model_name}_trial_{trial}_"
        )
        del model
sdata_test.write_h5sd(os.path.join(eu.settings.output_dir, "test_predictions.h5sd"))

dsFCN trial 1




No transforms given, assuming just need to tensorize).


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."


Predicting: 0it [00:00, ?it/s]

SeqData object modified:
    seqs_annot:
        + dsFCN_trial_1_target_predictions
dsCNN trial 1
No transforms given, assuming just need to tensorize).


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


Predicting: 0it [00:00, ?it/s]

SeqData object modified:
    seqs_annot:
        + dsCNN_trial_1_target_predictions
dsRNN trial 1
No transforms given, assuming just need to tensorize).


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


Predicting: 0it [00:00, ?it/s]

SeqData object modified:
    seqs_annot:
        + dsRNN_trial_1_target_predictions
dsHybrid trial 1
No transforms given, assuming just need to tensorize).


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


Predicting: 0it [00:00, ?it/s]

SeqData object modified:
    seqs_annot:
        + dsHybrid_trial_1_target_predictions
Kopp21CNN trial 1


OSError: [Errno 121] Remote I/O error

---

# Scratch