# Testing EUGENE prediction

**Authorship:**
Adam Klie, *03/19/2022*
***
**Description:**
Notebook for testing predictions with EUGENE architectures

<div class="alert alert-block alert-warning">
<b>TODOs</b>:
<ul>
    <b><li>Config for EUGENES</li></b>
    </ul>
</div>

In [1]:
import os
import numpy as np
import pandas as pd
import torch

# Autoreload extension
if 'autoreload' not in get_ipython().extension_manager.loaded:
    %load_ext autoreload
%autoreload 2

# Predict with dsEUGENE
<div class="alert alert-info" role="alert">
  <b>Predict with dsEUGENE models</b>
</div>

In [2]:
BATCH_SIZE = 32
DATA_TYPE = "tsv"
NUM_WORKERS = 0
NUM_SEQS = 100
SEQ_LEN = 66

## Load data

In [7]:
from torchvision import transforms
from eugene.utils.seq_transforms import ReverseComplement, Augment, OneHotEncode, ToTensor
from eugene.dataloading.SeqDataModule import SeqDataModule

In [8]:
data_transform = transforms.Compose([
    Augment(randomize_linker_p=0.1, enhancer="WT-otx-a"), 
    ReverseComplement(ohe_encoded=False), 
    OneHotEncode(), 
    ToTensor(transpose=True)
])

In [19]:
DATA_DIR = "test_{}seqs_{}/".format(NUM_SEQS, SEQ_LEN)
mod = SeqDataModule(seq_file=DATA_DIR + "test_seqs.tsv",
                    transform=data_transform,
                    num_workers=4,
                    batch_size=BATCH_SIZE,
                    test= True,
                    load_kwargs=dict(seq_col="SEQ", target_col="LABEL"))
#load_kwargs=dict(target_file=DATA_DIR + "test_labels.npy", rev_seq_file=DATA_DIR + "test_rev_ohe_seqs.npy"))

## Instantiate EUGENE architecture: dsEUGENE

In [20]:
from eugene.models.dsEUGENE import dsEUGENE

In [21]:
CKT_PTH  = "/cellar/users/aklie/projects/EUGENE/test/test_logs/batch_size-1024.num_workers-0.data_type-npy.num_seq-500000.seq_len-66/dsEUGENE/version_0/checkpoints/epoch=9-step=4399.ckpt"

In [22]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [23]:
eugene = dsEUGENE.load_from_checkpoint(
    checkpoint_path=CKT_PTH,
    map_location=device,
)

## Testing with PyTorch Lightning

In [24]:
import pytorch_lightning as pl
from eugene.utils.custom_callbacks import PredictionWriter

In [25]:
trainer = pl.Trainer(gpus=1, callbacks=PredictionWriter(DATA_DIR, write_interval="epoch", prefix="test_500000seqs_66_"))

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [26]:
preds = trainer.predict(model=eugene, datamodule=mod)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

  return array(a, dtype, copy=False, order=order, subok=True)


In [27]:
pd.read_csv(os.path.join(DATA_DIR, "test_500000seqs_66_predictions.tsv"), sep="\t")

Unnamed: 0,NAME,PREDICTION
0,-1.0,-0.132297
1,-1.0,-0.012364
2,-1.0,0.147150
3,-1.0,0.155485
4,-1.0,-0.156971
...,...,...
95,-1.0,0.004575
96,-1.0,-0.067324
97,-1.0,-0.032131
98,-1.0,0.079378


# References