# Testing `_base_models.py` models

**Authorship:**
Adam Klie, *03/19/2022*
***
**Description:**
Notebook for testing out the Base Model EUGENe architectures (`FCN`, `CNN`, `RNN`, `Hybrid`).

# Set-up

In [2]:
import numpy as np
import pandas as pd
import torch

# Autoreload extension
if 'autoreload' not in get_ipython().extension_manager.loaded:
    %load_ext autoreload
%autoreload 2

In [3]:
import eugene as eu

Global seed set to 13


# Load data

In [4]:
x = torch.randn(10, 4, 66)
x_t = x.transpose(2, 1)
x_rev = torch.randn(10, 4, 66)
x_rev_t = x_rev.transpose(2, 1)

# `FCN` Only

In [5]:
fcn = eu.models.FCN(
    input_len=66, 
    output_dim=1,
    strand="ss",
    task="binary_classification",
    fc_kwargs=dict(hidden_dims=[100]))
fcn



FCN(
  (hp_metric): AUROC()
  (fcn): BasicFullyConnectedModule(
    (module): Sequential(
      (0): Linear(in_features=264, out_features=100, bias=True)
      (1): ReLU()
      (2): Linear(in_features=100, out_features=1, bias=True)
    )
  )
)

In [6]:
out = fcn(x, x_rev)
out, out.shape

(tensor([[ 0.1548],
         [ 0.1647],
         [ 0.1098],
         [-0.0852],
         [ 0.3124],
         [ 0.0656],
         [-0.2696],
         [-0.1712],
         [-0.1495],
         [-0.1621]], grad_fn=<AddmmBackward0>),
 torch.Size([10, 1]))

# `CNN` Only

In [7]:
cnn = eu.models.load_config("CNN", "/cellar/users/aklie/projects/EUGENe/EUGENe_paper/configs/junD/dsCNN.yaml")

{'input_len': 500, 'output_dim': 1, 'conv_kwargs': {'channels': [4, 10, 8], 'conv_kernels': [11, 3], 'pool_kernels': [30, 10], 'pool_strides': [1, 1], 'dropout_rates': 0.2, 'batchnorm': True, 'omit_final_pool': False}, 'strand': 'ds', 'task': 'binary_classification', 'aggr': None, 'loss_fxn': 'cross_entropy', 'fc_kwargs': {}, 'optimizer': 'adam', 'lr': 0.001, 'scheduler': 'reduce_lr_on_plateau', 'scheduler_patience': 2, 'hp_metric': None}


In [8]:
cnn

CNN(
  (hp_metric): AUROC()
  (convnet): BasicConv1D(
    (module): Sequential(
      (0): Conv1d(4, 10, kernel_size=(11,), stride=(1,))
      (1): ReLU()
      (2): MaxPool1d(kernel_size=30, stride=1, padding=0, dilation=1, ceil_mode=False)
      (3): Dropout(p=0.2, inplace=False)
      (4): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): Conv1d(10, 8, kernel_size=(3,), stride=(1,))
      (6): ReLU()
      (7): MaxPool1d(kernel_size=10, stride=1, padding=0, dilation=1, ceil_mode=False)
      (8): Dropout(p=0.2, inplace=False)
      (9): BatchNorm1d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (fcnet): BasicFullyConnectedModule(
    (module): Sequential(
      (0): Linear(in_features=7200, out_features=1, bias=True)
    )
  )
)

In [11]:
cnn = eu.models.CNN(
    input_len=66, 
    output_dim=1,
    strand="ss",
    task="binary_classification", 
    conv_kwargs=dict(
        channels=[4, 16], 
        conv_kernels=[15], 
        pool_kernels=[1], 
        pool_strides = [1], 
        dropout_rates=0.2,
        omit_final_pool=False
    )
)
cnn

CNN(
  (hp_metric): AUROC()
  (convnet): BasicConv1D(
    (module): Sequential(
      (0): Conv1d(4, 16, kernel_size=(15,), stride=(1,))
      (1): ReLU()
      (2): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
      (3): Dropout(p=0.2, inplace=False)
    )
  )
  (fcnet): BasicFullyConnectedModule(
    (module): Sequential(
      (0): Linear(in_features=832, out_features=1, bias=True)
    )
  )
)

In [13]:
cnn = eu.models.DeepBind(
    input_len=66,
    output_dim=1,
)

In [14]:
out = cnn(x, x_rev)
out, out.shape

(tensor([[0.2066],
         [0.2066],
         [0.2066],
         [0.2066],
         [0.2066],
         [0.2066],
         [0.2066],
         [0.2066],
         [0.2066],
         [0.2066]], grad_fn=<AddmmBackward0>),
 torch.Size([10, 1]))

In [15]:
t = eu.datasets.random1000()

In [16]:
eu.pp.prepare_data(t)

{'steps': ['reverse_complement', 'one_hot_encode', 'train_test_split'], 'copy': False} (SeqData object with = 1000 seqs
seqs = (1000,)
names = (1000,)
rev_seqs = None
ohe_seqs = None
ohe_rev_seqs = None
seqs_annot: 'target'
pos_annot: PyRanges object with 1400 features
seqsm: None
uns: None,) <class 'eugene.dataloading.dataloaders._SeqData.SeqData'>


  0%|          | 0/3 [00:00<?, ?it/s]

SeqData object modified:
	rev_seqs: None -> 1000 rev_seqs added
	ohe_seqs: None -> 1000 ohe_seqs added
	ohe_rev_seqs: None -> 1000 ohe_rev_seqs added
    seqs_annot:
        + train


In [None]:
sdataset = t.to_dataset(target="target", transform_kwargs={"transpose": True})

No transforms given, assuming just need to tensorize).


In [None]:
sdataloader = sdataset.to_dataloader(batch_size=128)

In [None]:
x = next(iter(sdataloader))[1].size()

In [None]:
x

torch.Size([128, 4, 66])

In [None]:
cnn(x)

In [None]:
model.convnet(x)

TypeError: conv1d() received an invalid combination of arguments - got (torch.Size, Parameter, Parameter, tuple, tuple, tuple, int), but expected one of:
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!torch.Size!, !Parameter!, !Parameter!, !tuple!, !tuple!, !tuple!, int)
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!torch.Size!, !Parameter!, !Parameter!, !tuple!, !tuple!, !tuple!, int)


# RNN Only

In [15]:
rnn = eu.models.RNN(input_len=66, 
          strand="ss",
          task="regression",
          rnn_kwargs=dict(output_dim=32, unit_type="lstm", bidirectional=True, batch_first=True), 
          fc_kwargs=dict(output_dim=1))
rnn

RNN(
  (rnn): BasicRecurrent(
    (module): LSTM(4, 32, batch_first=True, bidirectional=True)
  )
  (fcnet): BasicFullyConnectedModule(
    (module): Sequential(
      (0): Linear(in_features=64, out_features=1, bias=True)
    )
  )
  (r_squared): R2Score()
)

In [16]:
out = rnn(x_t, x_rev_t)
out, out.shape

(tensor([[ 0.0410],
         [ 0.0373],
         [-0.0069],
         [ 0.0746],
         [ 0.0769],
         [ 0.0542],
         [ 0.0185],
         [ 0.0386],
         [ 0.0927],
         [ 0.1275]], grad_fn=<AddmmBackward0>),
 torch.Size([10, 1]))

# Hybrid CNN-RNN

In [17]:
cnn_rnn = eu.models.Hybrid(input_len=66, 
                 strand="ts",
                 task="binary_classification", 
                 conv_kwargs=dict(channels=[4, 16, 32], conv_kernels=[15, 5], pool_kernels=[1, 1, 1], pool_strides = [1, 1], dropout_rates=0.2), 
                 rnn_kwargs=dict(output_dim=32, unit_type="lstm", bidirectional=True, batch_first=True),  
                 fc_kwargs=dict(output_dim=1))
cnn_rnn



Hybrid(
  (convnet): BasicConv1D(
    (module): Sequential(
      (0): Conv1d(4, 16, kernel_size=(15,), stride=(1,))
      (1): ReLU(inplace=True)
      (2): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
      (3): Dropout(p=0.2, inplace=False)
      (4): Conv1d(16, 32, kernel_size=(5,), stride=(1,))
      (5): ReLU(inplace=True)
      (6): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
      (7): Dropout(p=0.2, inplace=False)
    )
  )
  (reverse_convnet): BasicConv1D(
    (module): Sequential(
      (0): Conv1d(4, 16, kernel_size=(15,), stride=(1,))
      (1): ReLU(inplace=True)
      (2): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
      (3): Dropout(p=0.2, inplace=False)
      (4): Conv1d(16, 32, kernel_size=(5,), stride=(1,))
      (5): ReLU(inplace=True)
      (6): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
      (7): Dropout(p=0.2, inplace=False)
    )
  )
  (r

In [18]:
out = cnn_rnn(x, x_rev)
out, out.shape

(tensor([[-0.0302],
         [-0.0532],
         [-0.0707],
         [-0.0539],
         [-0.0465],
         [-0.0774],
         [-0.0164],
         [-0.0476],
         [-0.0719],
         [-0.0618]], grad_fn=<AddmmBackward0>),
 torch.Size([10, 1]))

# DeepBind

In [20]:
deepbind = eu.models.DeepBind(input_len=66)

# Load from config

In [47]:
model_config = "../_configs/test_fcn.yaml"
eu.models.load_config("FCN", model_config)

FCN(
  (fcn): BasicFullyConnectedModule(
    (module): Sequential(
      (0): Linear(in_features=264, out_features=128, bias=True)
      (1): ReLU(inplace=True)
      (2): Linear(in_features=128, out_features=64, bias=True)
      (3): ReLU(inplace=True)
      (4): Linear(in_features=64, out_features=1, bias=True)
    )
  )
  (r_squared): R2Score()
)

In [40]:
test_dict = {'input_len': 66, 'strand': 'ss', 'fc_kwargs': {'output_dim': 1, 'hidden_dims': [128, 66]}}

In [44]:
out = "../_configs/test_fcn.yaml"

In [42]:
test_dict

{'input_len': 66,
 'strand': 'ss',
 'fc_kwargs': {'output_dim': 1, 'hidden_dims': [128, 66]}}

In [45]:
import yaml
with open(out, 'w') as yaml_file:
    yaml.dump(test_dict, yaml_file, sort_keys=False, default_flow_style=False)

---

# Scratch