# Testing the `models` module

**Authorship:**
Adam Klie, *10/04/2022*
***
**Description:**
Notebook for testing out the `models` module.

In [70]:
if 'autoreload' not in get_ipython().extension_manager.loaded:
    %load_ext autoreload
%autoreload 2

import os
import torch
import numpy as np
import pandas as pd
import eugene as eu

In [150]:
# For all models, just teest some basics
SEQ_LEN = 100
OUT_DIMS = 2
MODEL = "hybrid"
STRAND = "ss"
TASK = "regression"
LOSS_FXN = "mse"
CNN_KWARGS=dict(channels=[4, 16, 32], 
                conv_kernels=[15, 5], 
                pool_kernels=[1, 1])
RNN_KWARGS=dict(output_dim=32,
                bidirectional=True,
                batch_first=True)
FCN_KWARGS=dict(hidden_dims=[50])

# Make a fixture
sdata = eu.datasets.random1000()
eu.pp.ohe_seqs_sdata(sdata)
eu.pp.reverse_complement_seqs_sdata(sdata)

One-hot encoding sequences:   0%|          | 0/1000 [00:00<?, ?it/s]

SeqData object modified:
	ohe_seqs: None -> 1000 ohe_seqs added
SeqData object modified:
	ohe_rev_seqs: None -> 1000 ohe_rev_seqs added


In [120]:
def check_model(test_model, transpose=False):
    x = torch.randn(10, 4, SEQ_LEN)
    x_rev = torch.randn(10, 4, SEQ_LEN)
    eu.models.init_weights(test_model)
    if transpose:
        x = x.transpose(1, 2)
        x_rev = x_rev.transpose(1, 2)
        transform_kwargs = {"transpose": True}
    else:
        x = x
        x_rev = x_rev
        transform_kwargs = {"transpose": False}
    output = test_model(x, x_rev)
    assert output.shape == (10, 2)
    eu.evaluate.predictions(
        test_model,
        sdata,
        target_keys=["activity_0", "activity_1"],
        transform_kwargs=transform_kwargs,
        store_only=True
    )
    assert "activity_0_predictions" in sdata.seqs_annot.columns

# Base Models

## FCN

In [111]:
def test_FCN():
    model = eu.models.FCN(
        input_len=SEQ_LEN,
        output_dim=OUT_DIMS,
        strand=STRAND,
        task=TASK,
        aggr=None,
        loss_fxn=LOSS_FXN,
        fc_kwargs=FCN_KWARGS
    )
    check_model(model)
test_FCN()

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


Initializing Linear(in_features=400, out_features=50, bias=True) with kaiming_normal
Initializing Linear(in_features=50, out_features=2, bias=True) with kaiming_normal
No transforms given, assuming just need to tensorize.


  f"The dataloader, {name}, does not have many workers which may be a bottleneck."


Predicting: 0it [00:00, ?it/s]

## CNN

In [112]:
def test_CNN():
    model = eu.models.CNN(
        input_len=SEQ_LEN,
        output_dim=OUT_DIMS,
        strand=STRAND,
        task=TASK,
        aggr=None,
        loss_fxn=LOSS_FXN,
        fc_kwargs=FCN_KWARGS,
        conv_kwargs=CNN_KWARGS
    )
    check_model(model)
test_CNN()

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


Initializing Conv1d(4, 16, kernel_size=(15,), stride=(1,), padding=valid) with kaiming_normal
Initializing Conv1d(16, 32, kernel_size=(5,), stride=(1,), padding=valid) with kaiming_normal
Initializing Linear(in_features=2624, out_features=50, bias=True) with kaiming_normal
Initializing Linear(in_features=50, out_features=2, bias=True) with kaiming_normal
No transforms given, assuming just need to tensorize.


Predicting: 0it [00:00, ?it/s]

## RNN

In [114]:
def test_RNN():
    model = eu.models.RNN(
        input_len=SEQ_LEN,
        output_dim=OUT_DIMS,
        strand=STRAND,
        task=TASK,
        aggr=None,
        loss_fxn=LOSS_FXN,
        rnn_kwargs=RNN_KWARGS
    )
    check_model(model, transpose=True)
test_RNN()

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


Initializing Linear(in_features=64, out_features=2, bias=True) with kaiming_normal
No transforms given, assuming just need to tensorize.


  f"The dataloader, {name}, does not have many workers which may be a bottleneck."


Predicting: 0it [00:00, ?it/s]

## Hybrid

In [115]:
def test_Hybrid():
    model = eu.models.Hybrid(
        input_len=SEQ_LEN,
        output_dim=OUT_DIMS,
        strand=STRAND,
        task=TASK,
        aggr=None,
        loss_fxn=LOSS_FXN,
        fc_kwargs=FCN_KWARGS,
        conv_kwargs=CNN_KWARGS,
        rnn_kwargs=RNN_KWARGS
    )
    check_model(model) 
test_Hybrid()

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


Initializing Conv1d(4, 16, kernel_size=(15,), stride=(1,), padding=valid) with kaiming_normal
Initializing Conv1d(16, 32, kernel_size=(5,), stride=(1,), padding=valid) with kaiming_normal
Initializing Linear(in_features=64, out_features=50, bias=True) with kaiming_normal
Initializing Linear(in_features=50, out_features=2, bias=True) with kaiming_normal
No transforms given, assuming just need to tensorize.


  f"The dataloader, {name}, does not have many workers which may be a bottleneck."


Predicting: 0it [00:00, ?it/s]

# SOTA Models

## DeepBind

In [121]:
def test_DeepBind():
    model = eu.models.DeepBind(
        input_len=SEQ_LEN,
        output_dim=OUT_DIMS,
        strand=STRAND,
        task=TASK,
        aggr="max",
        loss_fxn=LOSS_FXN,
    )
    check_model(model)
test_DeepBind()

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


Initializing Conv1d(4, 16, kernel_size=(16,), stride=(1,), padding=valid) with kaiming_normal
Initializing Linear(in_features=32, out_features=32, bias=True) with kaiming_normal
Initializing Linear(in_features=32, out_features=2, bias=True) with kaiming_normal
No transforms given, assuming just need to tensorize.


  f"The dataloader, {name}, does not have many workers which may be a bottleneck."


Predicting: 0it [00:00, ?it/s]

## DeepSEA

In [122]:
def test_DeepSEA():
    model = eu.models.DeepBind(
        input_len=SEQ_LEN,
        output_dim=OUT_DIMS,
        strand=STRAND,
        task=TASK,
        aggr=None,
        loss_fxn=LOSS_FXN,
    )
    check_model(model)
test_DeepSEA()

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


Initializing Conv1d(4, 16, kernel_size=(16,), stride=(1,), padding=valid) with kaiming_normal
Initializing Linear(in_features=32, out_features=32, bias=True) with kaiming_normal
Initializing Linear(in_features=32, out_features=2, bias=True) with kaiming_normal
No transforms given, assuming just need to tensorize.


  f"The dataloader, {name}, does not have many workers which may be a bottleneck."


Predicting: 0it [00:00, ?it/s]

# Custom Models

## Jores21CNN

In [124]:
def test_Jores21CNN():
    model = eu.models.Jores21CNN(
        input_len=SEQ_LEN,
        output_dim=OUT_DIMS,
        strand=STRAND,
        task=TASK,
        aggr=None,
        loss_fxn=LOSS_FXN,
    )
    check_model(model)
test_Jores21CNN()

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


No transforms given, assuming just need to tensorize.


Predicting: 0it [00:00, ?it/s]

## Kopp21CNN

In [151]:
def test_Kopp21CNN():
    model = eu.models.Kopp21CNN(
        input_len=SEQ_LEN,
        output_dim=OUT_DIMS,
        strand=STRAND,
        task=TASK,
        aggr="max",
        loss_fxn=LOSS_FXN,
    )
    check_model(model, transpose=False)
test_Kopp21CNN()

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


No transforms given, assuming just need to tensorize.


  f"The dataloader, {name}, does not have many workers which may be a bottleneck."


Predicting: 0it [00:00, ?it/s]

SeqData object modified:
    seqs_annot:
        + activity_0_predictions, activity_1_predictions


# Load from config

In [155]:
def test_load_config():
    model_config = "../../_configs/ssHybrid.yaml"
    model = eu.models.load_config("Hybrid", model_config)
    check_model(model)
test_load_config()


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


No transforms given, assuming just need to tensorize.


  f"The dataloader, {name}, does not have many workers which may be a bottleneck."


Predicting: 0it [00:00, ?it/s]

---