# Testing EUGENE `train` module

**Authorship:**
Adam Klie, *03/19/2022*
***
**Description:**
Notebook for testing the training of EUGENE architectures

<div class="alert alert-block alert-warning">
<b>TODOs</b>:
<ul>
    <b><li></li></b>
    </ul>
</div>

# Set-up

In [1]:
import numpy as np
import pandas as pd

# Autoreload extension
if 'autoreload' not in get_ipython().extension_manager.loaded:
    %load_ext autoreload
%autoreload 2

In [2]:
import eugene as eu

Global seed set to 13


# Benchmark params

In [3]:
BATCH_SIZE = 32
NUM_SEQS = 1000
SEQ_LEN = 66
NUM_WORKERS = 0

In [4]:
CNN_KWARGS=dict(channels=[4, 16, 32], 
                conv_kernels=[15, 5], 
                pool_kernels=[1, 1])
RNN_KWARGS=dict(output_dim=32,
                bidirectional=True,
                batch_first=True)
FCN_KWARGS=dict(hidden_dims=[50], 
                output_dim=1)

In [54]:
MODEL = "hybrid"
STRAND = "ss"
TASK = "regression"

# Instantiate model

In [55]:
eugene = eu.models.Hybrid(input_len=66,
strand=STRAND,
task=TASK,
conv_kwargs=CNN_KWARGS,
rnn_kwargs=RNN_KWARGS,
fc_kwargs=FCN_KWARGS)
eugene

Hybrid(
  (convnet): BasicConv1D(
    (module): Sequential(
      (0): Conv1d(4, 16, kernel_size=(15,), stride=(1,))
      (1): ReLU(inplace=True)
      (2): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
      (3): Conv1d(16, 32, kernel_size=(5,), stride=(1,))
      (4): ReLU(inplace=True)
      (5): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
    )
  )
  (recurrentnet): BasicRecurrent(
    (module): LSTM(32, 32, batch_first=True, bidirectional=True)
  )
  (fcnet): BasicFullyConnectedModule(
    (module): Sequential(
      (0): Linear(in_features=64, out_features=50, bias=True)
      (1): ReLU(inplace=True)
      (2): Linear(in_features=50, out_features=1, bias=True)
    )
  )
  (r_squared): R2Score()
)

In [56]:
eu.models.base.init_weights(eugene)

# Load data

In [57]:
sdata = eu.datasets.random1000()
sdata

SeqData object with = 1000 seqs
seqs = (1000,)
names = (1000,)
rev_seqs = None
ohe_seqs = None
ohe_rev_seqs = None
    seqs_annot: 'TARGETS'

In [58]:
sdataset = sdata.to_dataset(label="TARGETS", seq_transforms=["one_hot_encode"], transform_kwargs={"transpose": True})
sdataset[0][1].shape

torch.Size([4, 66])

In [59]:
sdataloader = sdataset.to_dataloader(batch_size=BATCH_SIZE, num_workers=NUM_WORKERS)

# Train (and time) with PyTorch Lightning

In [60]:
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger

In [63]:
LOG_DIR = f"../_logs/batch_size-{BATCH_SIZE}.num_workers-{NUM_WORKERS}.num_seq-{NUM_SEQS}.seq_len-{SEQ_LEN}"
logger = TensorBoardLogger(LOG_DIR, name=MODEL, version=f"{STRAND}_{TASK}")
trainer = pl.Trainer(max_epochs=10, logger=logger)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [64]:
trainer.fit(eugene, sdataloader)


  | Name         | Type                      | Params
-----------------------------------------------------------
0 | convnet      | BasicConv1D               | 3.6 K 
1 | recurrentnet | BasicRecurrent            | 16.9 K
2 | fcnet        | BasicFullyConnectedModule | 3.3 K 
3 | r_squared    | R2Score                   | 0     
-----------------------------------------------------------
23.8 K    Trainable params
0         Non-trainable params
23.8 K    Total params
0.095     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

---

# Scratch