# Testing EUGENE `predict` module

**Authorship:**
Adam Klie, *03/19/2022*
***
**Description:**
Notebook for testing functionality of EUGENE `predict` module on single task models. 

In [1]:
import os
import numpy as np
import pandas as pd

# Autoreload extension
if 'autoreload' not in get_ipython().extension_manager.loaded:
    %load_ext autoreload
%autoreload 2

import eugene as eu
eu.settings.batch_size = 128
eu.settings.dl_num_workers = 4
eu.settings.logging_dir = "/cellar/users/aklie/projects/EUGENE/tests/_logs/"
eu.settings.datasetdir = "/cellar/users/aklie/projects/EUGENE/tests/_data/datasets/"
eu.settings.output_dir = "/cellar/users/aklie/projects/EUGENE/tests/_out/"

Global seed set to 13


GPU is available: True
Number of GPUs: 1
Current GPU: 0
GPUs: Quadro RTX 5000


# Single task prediction workflow


In [2]:
# Set-up
sdata = eu.datasets.random1000()
eu.pp.prepare_data(sdata)
model = eu.models.DeepBind(input_len=66, output_dim=1)
eu.models.base.init_weights(model)

Kept 1000 sequences with targets, dropped 0 sequences with no targets


  0%|          | 0/3 [00:00<?, ?it/s]

SeqData object modified:
	rev_seqs: None -> 1000 rev_seqs added
	ohe_seqs: None -> 1000 ohe_seqs added
	ohe_rev_seqs: None -> 1000 ohe_rev_seqs added
    seqs_annot:
        + TRAIN


In [3]:
# Train/Val predictions
eu.predict.train_val_predictions(model, sdata=sdata, target_label="TARGETS")

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


No transforms given, assuming just need to tensorize).
No transforms given, assuming just need to tensorize).


Predicting: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

SeqData object modified:
    seqs_annot:
        + TARGETS_PREDICTIONS


In [7]:
saved_t = pd.read_csv(f"{eu.settings.output_dir}/train_predictions.tsv", index_col=0, sep="\t")
np.allclose(saved_t["PREDICTIONS_0"].values, sdata.seqs_annot.loc[saved_t.index]["TARGETS_PREDICTIONS"].values)

True

In [6]:
saved_v = pd.read_csv(f"{eu.settings.output_dir}/val_predictions.tsv", index_col=0, sep="\t")
np.allclose(saved_t["PREDICTIONS_0"].values, sdata.seqs_annot.loc[saved_t.index]["TARGETS_PREDICTIONS"].values)

True

In [11]:
# Predictions
eu.predict.predictions(model, sdata=sdata, target_label="TARGETS", label="random1000")
sdata, model

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


/cellar/users/aklie/projects/EUGENE/tests/_out
No transforms given, assuming just need to tensorize).


Predicting: 0it [00:00, ?it/s]

SeqData object modified:
    seqs_annot:
        + TARGETS_PREDICTIONS


(SeqData object with = 1000 seqs
 seqs = (1000,)
 names = (1000,)
 rev_seqs = (1000,)
 ohe_seqs = (1000, 66, 4)
 ohe_rev_seqs = (1000, 66, 4)
 seqs_annot: 'TARGETS', 'TRAIN', 'TARGETS_PREDICTIONS'
 pos_annot: PyRanges object with 1400 features
 seqsm: None
 uns: None,
 DeepBind(
   (hp_metric): R2Score()
   (max_pool): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
   (avg_pool): AvgPool1d(kernel_size=(4,), stride=(4,), padding=(0,))
   (convnet): BasicConv1D(
     (module): Sequential(
       (0): Conv1d(4, 16, kernel_size=(4,), stride=(1,))
       (1): ReLU()
       (2): Dropout(p=0.2, inplace=False)
     )
   )
   (fcn): BasicFullyConnectedModule(
     (module): Sequential(
       (0): Linear(in_features=504, out_features=256, bias=True)
       (1): ReLU()
       (2): Dropout(p=0.2, inplace=False)
       (3): Linear(in_features=256, out_features=64, bias=True)
       (4): ReLU()
       (5): Dropout(p=0.2, inplace=False)
       (6): Linear(in_features=64, 

In [13]:
# Tests
saved_t = pd.read_csv(f"{eu.settings.out_dir}/random1000_predictions.tsv", index_col=0, sep="\t")
np.allclose(saved_t["PREDICTIONS_0"].values, sdata.seqs_annot.loc[saved_t.index]["TARGETS_PREDICTIONS"].values)

True

---

# Scratch