In [323]:
# Extensions
if 'autoreload' not in get_ipython().extension_manager.loaded:
    %load_ext autoreload
%autoreload 2

# Imports
import torch
import pandas as pd
import numpy as np

# Configuration
ID = torch.Tensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
x = torch.randn(10, 4, 100)
x_rev_comp = torch.randn(10, 4, 100)
y = torch.randint(0, 2, (10, 1))
batch = (ID, x, x_rev_comp, y)

# Layers

In [324]:
from eugene.models.base import _layers as layers

## Activations

### Identity

In [325]:
layer = layers.Identity()

In [326]:
layer

Identity()

In [327]:
layer(x).shape

torch.Size([10, 4, 100])

### Exponential

In [328]:
layer = layers.Exponential()

In [329]:
layer

Exponential()

In [330]:
layer(x).shape

torch.Size([10, 4, 100])

## Convolutional

### Conv1D

In [331]:
layer = torch.nn.Conv1d(4, 8, 10)

In [332]:
layer

Conv1d(4, 8, kernel_size=(10,), stride=(1,))

In [333]:
layer.weight.shape

torch.Size([8, 4, 10])

In [334]:
layer(x).shape

torch.Size([10, 8, 91])

### BiConv1D

In [335]:
layer = layers.BiConv1D(4, 8, 10)

In [336]:
layer

BiConv1D(4, 8, kernel_size=10, stride=1, padding=same, dilation=1, groups=1, bias=True)

In [337]:
layer.weight.shape

torch.Size([8, 4, 10])

In [338]:
layer(x).shape

  x_fwd = F.conv1d(x, self.weight, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups)


torch.Size([10, 8, 100])

## Pooling

## Recurrent

## Transformer

### MultiHeadAttention

In [345]:
layer = layers.MultiHeadAttention(
    input_dim=4,
    head_dim=10,
    num_heads=2,
)
layer

MultiHeadAttention(
  (qkv): Linear(in_features=4, out_features=60, bias=False)
  (softmax): Softmax(dim=-1)
  (dropout_layer): Dropout(p=0.0, inplace=False)
  (projection_layer): Sequential(
    (0): Linear(in_features=20, out_features=4, bias=True)
    (1): Dropout(p=0.0, inplace=False)
  )
)

In [346]:
layer(x.transpose(1,2), mask=None).shape

torch.Size([10, 100, 4])

## Normalization

## Wrappers

### Residual

In [347]:
conv_layer = torch.nn.Conv1d(4, 4, 10, padding="same")

In [348]:
layer = layers.Residual(conv_layer)

In [349]:
layer(x).shape

torch.Size([10, 4, 100])

## Gluers

### Flatten

In [52]:
layer = layers.Flatten()

In [55]:
layer(x).shape

torch.Size([10, 400])

### Unsqueeze

In [57]:
layer = layers.Unsqueeze(1)

In [59]:
layer(x).shape

torch.Size([10, 1, 4, 100])

### View

In [65]:
layer = layers.View((100, 4))


In [66]:
layer(x).shape

torch.Size([10, 100, 4])

## Miscallaneous

### Clip

In [68]:
layer = layers.Clip(0, 0.1)

In [70]:
layer(x).max()

tensor(0.1000)

# Blocks

In [252]:
from eugene.models.base import _blocks as blocks

## Conv1DBlock

In [387]:
conv1d_block = blocks.Conv1DBlock(
    input_len=100,
    input_channels=4,
    output_channels=4,
    conv_kernel=10,
    conv_type="biconv1d",
    conv_padding="same",
    pool_type="max",
    norm_type="layernorm",
    norm_dim=(4, 100),
    dropout_rate=0.5,
    order="conv-norm-act-dropout"
)
conv1d_block

Conv1DBlock(
  (layers): Sequential(
    (conv): BiConv1D(4, 4, kernel_size=10, stride=1, padding=same, dilation=1, groups=1, bias=True)
    (norm): LayerNorm((4, 100), eps=1e-05, elementwise_affine=True)
    (act): ReLU()
    (dropout): Dropout(p=0.5, inplace=False)
  )
)

In [388]:
conv1d_block(x).shape

torch.Size([10, 4, 100])

In [389]:
conv1d_block.output_size

[4, 100]

In [390]:
layers.Residual(conv1d_block)(x).shape

torch.Size([10, 4, 100])

## DenseBlock

In [287]:
dense_block = blocks.DenseBlock(
    input_dim=400, 
    output_dim=1, 
    hidden_dims=[200, 100, 50, 10],
    activations="exponential",
    batchnorm=True,
    batchnorm_first=True, 
    dropout_rates=[0.1, None],
    biases=False
)
dense_block

DenseBlock(
  (layers): Sequential(
    (0): Linear(in_features=400, out_features=200, bias=False)
    (1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): Exponential()
    (3): Dropout(p=0.1, inplace=False)
    (4): Linear(in_features=200, out_features=100, bias=False)
    (5): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Exponential()
    (7): Linear(in_features=100, out_features=50, bias=False)
    (8): BatchNorm1d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): Exponential()
    (10): Linear(in_features=50, out_features=10, bias=False)
    (11): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): Exponential()
    (13): Linear(in_features=10, out_features=1, bias=False)
  )
)

In [289]:
dense_block(x.reshape(10, -1))

tensor([[-0.4820],
        [-1.0450],
        [-0.3707],
        [ 0.1420],
        [-1.7296],
        [-1.5558],
        [ 0.0736],
        [-0.3563],
        [-1.6115],
        [-0.5148]], grad_fn=<MmBackward0>)

## ReccurentBlock

In [290]:
recurrent_block = blocks.RecurrentBlock(
    input_dim=4,
    hidden_dim=10,
    num_layers=2,
    unit_type="lstm",
    bidirectional=True,
    dropout_rates=0.1,
    bias=False,
    batch_first=True
)
recurrent_block

RecurrentBlock(
  (layers): LSTM(4, 10, num_layers=2, bias=False, batch_first=True, dropout=0.1, bidirectional=True)
)

In [291]:
recurrent_block(x.transpose(1, 2))[0].shape

torch.Size([10, 100, 20])

# Towers

In [350]:
from eugene.models.base import _towers as towers

### Feed Forward Tower

In [354]:
tower = towers.Tower(
    input_size=400,
    block=torch.nn.Linear,
    repeats=3,
    dynamic_block_args={'in_features': [400, 200, 100], 'out_features': [200, 100, 10]},
)
tower

Tower(
  (blocks): Sequential(
    (linear_0): Linear(in_features=400, out_features=200, bias=True)
    (linear_1): Linear(in_features=200, out_features=100, bias=True)
    (linear_2): Linear(in_features=100, out_features=10, bias=True)
  )
)

In [355]:
tower.input_size, tower.output_size

(400, [10])

In [356]:
tower(x.reshape(10, -1)).shape

torch.Size([10, 10])

### Tower of Conv1D blocks

In [357]:
from eugene.models.base._blocks import Conv1DBlock

In [359]:
tower = towers.Tower(
    input_size=(4, 100),
    block=Conv1DBlock,
    repeats=3,
    static_block_args={'input_len': 100, 'conv_kernel': 3, 'conv_padding': 'same'},
    dynamic_block_args={'input_channels': [4, 10, 20], 'output_channels': [10, 20, 30]}
)
tower

Tower(
  (blocks): Sequential(
    (conv1dblock_0): Conv1DBlock(
      (layers): Sequential(
        (conv): Conv1d(4, 10, kernel_size=(3,), stride=(1,), padding=same)
        (norm): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): ReLU()
        (pool): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
      )
    )
    (conv1dblock_1): Conv1DBlock(
      (layers): Sequential(
        (conv): Conv1d(10, 20, kernel_size=(3,), stride=(1,), padding=same)
        (norm): BatchNorm1d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act): ReLU()
        (pool): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
      )
    )
    (conv1dblock_2): Conv1DBlock(
      (layers): Sequential(
        (conv): Conv1d(20, 30, kernel_size=(3,), stride=(1,), padding=same)
        (norm): BatchNorm1d(30, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act):

In [360]:
tower.input_size, tower.output_size

((4, 100), [30, 100])

In [361]:
tower(x).shape

torch.Size([10, 30, 100])

### Conv1DTower
Allows for more explicit definition of a tower of convolutions. Will be deprecated, but currently used to work with models.

In [365]:
conv1d_tower = towers.Conv1DTower(
    input_len=100,
    input_channels=4,
    conv_channels=[10, 10],
    conv_kernels=[3, 3],
    conv_strides=[1, 1],
    conv_dilations=[1, 1],
    conv_padding=["valid", "valid"],
    activations=[None, "relu"],
    pool_types="avg",
    pool_kernels=[1, 1],
    pool_strides=[1, 1],
    pool_padding=[0, 0],
    dropout_rates=[None, 0.5],
    batchnorm=True,
    batchnorm_first=True
)
conv1d_tower

Conv1DTower(
  (layers): Sequential(
    (0): Conv1d(4, 10, kernel_size=(3,), stride=(1,), padding=valid)
    (1): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): AvgPool1d(kernel_size=1, stride=1, padding=(0,))
    (3): Conv1d(10, 10, kernel_size=(3,), stride=(1,), padding=valid)
    (4): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Dropout(p=0.5, inplace=False)
    (7): AvgPool1d(kernel_size=1, stride=1, padding=(0,))
  )
)

In [391]:
conv1d_tower(x).shape

torch.Size([10, 10, 96])

### BiConv1DTower

In [370]:
biconv1d_tower = towers.BiConv1DTower(
    filters = 10,
    kernel_size = 3,
    layers = 2
)
biconv1d_tower

BiConv1DTower(
  (kernels): ParameterList(
      (0): Parameter containing: [torch.FloatTensor of size 10x4x3]
      (1): Parameter containing: [torch.FloatTensor of size 10x10x3]
  )
  (biases): ParameterList(
      (0): Parameter containing: [torch.FloatTensor of size 10]
      (1): Parameter containing: [torch.FloatTensor of size 10]
  )
)

In [372]:
biconv1d_tower(x).shape

torch.Size([10, 10, 100])

# Base Model

## `SequenceModel`

In [111]:
from eugene.models.base._base_models import SequenceModel

# Models

## `base._sequence_models.py` 

### FCN

In [112]:
from eugene.models.base._sequence_models import FCN 

In [113]:
model = FCN(
    input_len=100,
    output_dim=10,
    task="regression",
    optimizer="adam",
    dense_kwargs={
        "hidden_dims": [50],
        "batchnorm": True,
    }
)
model

FCN(
  (metric): R2Score()
  (dense_block): DenseBlock(
    (layers): Sequential(
      (0): Linear(in_features=400, out_features=50, bias=True)
      (1): ReLU()
      (2): BatchNorm1d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): Linear(in_features=50, out_features=10, bias=True)
    )
  )
)

In [115]:
model(x).shape

torch.Size([10, 10])

### CNN

In [112]:
from eugene.models.base._sequence_models import CNN
from eugene.models.base._blocks import DenseBlock

In [113]:
model = torch.nn.Sequential(
    DenseBlock(input_dim=100, output_dim=100),
    DenseBlock(input_dim=100, output_dim=100),
    DenseBlock(input_dim=100, output_dim=100)
)

In [114]:
model

Sequential(
  (0): DenseBlock(
    (layers): Sequential(
      (0): Linear(in_features=100, out_features=100, bias=True)
    )
  )
  (1): DenseBlock(
    (layers): Sequential(
      (0): Linear(in_features=100, out_features=100, bias=True)
    )
  )
  (2): DenseBlock(
    (layers): Sequential(
      (0): Linear(in_features=100, out_features=100, bias=True)
    )
  )
)

In [133]:
model = CNN(
    input_len=100,
    output_dim=10,
    strand="ss",
    aggr="max",
    task="regression",
    optimizer="adam",
    conv_kwargs={
        "conv_channels": [10, 10],
        "conv_kernels": [5, 3],
        "activations": [],
        "pool_types": []
    }
)
model

CNN(
  (metric): R2Score()
  (conv1d_block): Conv1DBlock(
    (layers): Sequential(
      (0): Conv1d(4, 10, kernel_size=(5,), stride=(1,), padding=valid)
      (1): Conv1d(10, 10, kernel_size=(3,), stride=(1,), padding=valid)
    )
  )
  (dense_block): DenseBlock(
    (layers): Sequential(
      (0): Linear(in_features=940, out_features=10, bias=True)
    )
  )
)

In [134]:
import torch 
import torch.nn as nn
import logging
from typing import Dict, Iterable, Callable

#Define relevant classes and functions for feature extraction
class FeatureExtractor(nn.Module):
    def __init__(self, model: nn.Module, keyWord: str):
        super().__init__()
        self.model = model
        layers = sorted([k for k in dict([*model.named_modules()]) if keyWord in k])
        logging.info("{} model layers identified with key word {}".format(len(layers), keyWord))
        self.features = {layer: torch.empty(0) for layer in layers}
        self.handles = dict() 

        for layerID in layers:
            layer = dict([*self.model.named_modules()])[layerID]
            handle = layer.register_forward_hook(self.SaveOutputHook(layerID))
            self.handles[layerID] = handle
            
    def SaveOutputHook(self, layerID: str) -> Callable:
        def fn(laya, weValueYourInput, output): #laya = layer (e.g. Linear(...); weValueYourInput = input tensor
            self.features[layerID] = output
        return fn

    def forward(self, x, **kwargs) -> Dict[str, torch.Tensor]:
        preds = self.model(x, **kwargs)
        return self.features, self.handles, preds

In [135]:
dict([*model.named_modules()])

{'': CNN(
   (metric): R2Score()
   (conv1d_block): Conv1DBlock(
     (layers): Sequential(
       (0): Conv1d(4, 10, kernel_size=(5,), stride=(1,), padding=valid)
       (1): Conv1d(10, 10, kernel_size=(3,), stride=(1,), padding=valid)
     )
   )
   (dense_block): DenseBlock(
     (layers): Sequential(
       (0): Linear(in_features=940, out_features=10, bias=True)
     )
   )
 ),
 'metric': R2Score(),
 'conv1d_block': Conv1DBlock(
   (layers): Sequential(
     (0): Conv1d(4, 10, kernel_size=(5,), stride=(1,), padding=valid)
     (1): Conv1d(10, 10, kernel_size=(3,), stride=(1,), padding=valid)
   )
 ),
 'conv1d_block.layers': Sequential(
   (0): Conv1d(4, 10, kernel_size=(5,), stride=(1,), padding=valid)
   (1): Conv1d(10, 10, kernel_size=(3,), stride=(1,), padding=valid)
 ),
 'conv1d_block.layers.0': Conv1d(4, 10, kernel_size=(5,), stride=(1,), padding=valid),
 'conv1d_block.layers.1': Conv1d(10, 10, kernel_size=(3,), stride=(1,), padding=valid),
 'dense_block': DenseBlock(
   (lay

In [144]:
model_extract = FeatureExtractor(model=model, keyWord="conv1d_block.layers.")

In [147]:
model_extract(x)[0]["conv1d_block.layers.0"].shape

torch.Size([10, 10, 96])

In [100]:
dict([*model.named_modules()])

{'': CNN(
   (metric): R2Score()
   (conv1d_block): Conv1DBlock(
     (layers): Sequential(
       (0): Conv1d(4, 10, kernel_size=(5,), stride=(1,), padding=valid)
       (1): ReLU()
       (2): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
       (3): Conv1d(10, 10, kernel_size=(3,), stride=(1,), padding=valid)
       (4): ReLU()
       (5): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
     )
   )
   (dense_block): DenseBlock(
     (layers): Sequential(
       (0): Linear(in_features=940, out_features=10, bias=True)
     )
   )
 ),
 'metric': R2Score(),
 'conv1d_block': Conv1DBlock(
   (layers): Sequential(
     (0): Conv1d(4, 10, kernel_size=(5,), stride=(1,), padding=valid)
     (1): ReLU()
     (2): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
     (3): Conv1d(10, 10, kernel_size=(3,), stride=(1,), padding=valid)
     (4): ReLU()
     (5): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=

In [118]:
model(x, x_rev_comp).shape

torch.Size([10, 10])

### RNN

In [119]:
from eugene.models.base._sequence_models import RNN

In [120]:
model = RNN(
    input_len=100,
    output_dim=10,
    strand="ds",
    aggr="max",
    task="regression",
    optimizer="adam",
    recurrent_kwargs={
        "hidden_dim": 10,
        "num_layers": 2,
        "bidirectional": True
    }
)
model

RNN(
  (metric): R2Score()
  (recurrent_block): RecurrentBlock(
    (layers): LSTM(4, 10, num_layers=2, batch_first=True, bidirectional=True)
  )
  (dense_block): DenseBlock(
    (layers): Sequential(
      (0): Linear(in_features=20, out_features=10, bias=True)
    )
  )
)

In [121]:
model(x.transpose(1, 2), x_rev_comp.transpose(1, 2)).shape

torch.Size([10, 10])

### Hybrid

In [107]:
from eugene.models.base._sequence_models import Hybrid

In [111]:
model = Hybrid(
    input_len=100,
    output_dim=10,
    strand="ss",
    aggr="max",
    task="regression",
    optimizer="adam",
    conv_kwargs={
        "conv_channels": [10, 10],
        "conv_kernels": [5, 3],
        "activations": "relu",
        "pool_types": "max"
    },
    recurrent_kwargs={
        "hidden_dim": 10,
        "num_layers": 10,
        "bidirectional": True
    }
)
model

Hybrid(
  (metric): R2Score()
  (conv1d_block): Conv1DBlock(
    (layers): Sequential(
      (0): Conv1d(4, 10, kernel_size=(5,), stride=(1,), padding=valid)
      (1): ReLU()
      (2): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
      (3): Conv1d(10, 10, kernel_size=(3,), stride=(1,), padding=valid)
      (4): ReLU()
      (5): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
    )
  )
  (recurrent_block): RecurrentBlock(
    (layers): LSTM(10, 10, num_layers=10, batch_first=True, bidirectional=True)
  )
  (dense_block): DenseBlock(
    (layers): Sequential(
      (0): Linear(in_features=20, out_features=10, bias=True)
    )
  )
)

In [110]:
model(x, x_rev_comp).shape

torch.Size([10, 10])

## `_model_zoo.py`

### TutorialCNN

In [127]:
from eugene.models._model_zoo import TutorialCNN

In [128]:
model = TutorialCNN(
    input_len=100,
    output_dim=10
)

TutorialCNN(
  (metric): R2Score()
  (conv1): Conv1d(4, 30, kernel_size=(21,), stride=(1,))
  (dense): Linear(in_features=30, out_features=10, bias=True)
)

In [129]:
model(x).shape

torch.Size([10, 10])

### Jores21CNN

In [130]:
from eugene.models._model_zoo import Jores21CNN

In [11]:
model = Jores21CNN(
    input_len=100,
    output_dim=10
)

NameError: name 'Jores21CNN' is not defined

In [132]:
model(x).shape

torch.Size([10, 10])

### Kopp21CNN

In [133]:
from eugene.models._model_zoo import Kopp21CNN

In [134]:
model = Kopp21CNN(
    input_len=100,
    output_dim=10
)



In [135]:
model(x, x_rev_comp).shape

torch.Size([10, 10])

### FactorizedBasset

In [138]:
from eugene.models._model_zoo import FactorizedBasset

In [10]:
model = FactorizedBasset(input_len=100, output_dim=1)

NameError: name 'FactorizedBasset' is not defined

In [141]:
model(x).shape

torch.Size([10, 1])

### ResidualBind 

In [147]:
from eugene.models.base._blocks import Conv1DBlock
from eugene.models.base._layers import Residual

In [169]:
conv1d_block = Conv1DBlock(
    input_len=100,
    input_channels=4,
    conv_channels=[4],
    conv_kernels=[10],
    conv_padding="same",
    pool_types=None
)

In [170]:
res_block = Residual(conv1d_block)

In [174]:
res_block(x).shape

torch.Size([10, 4, 100])

In [189]:
from eugene.models._model_zoo import ResidualBind

In [9]:
model = ResidualBind(
    input_len=100,
    output_dim=1
)

NameError: name 'ResidualBind' is not defined

In [196]:
model(x).shape

torch.Size([10, 1])

### DeepBind

In [3]:
from eugene.models._model_zoo import DeepBind

Global seed set to 13
2022-12-12 14:06:39.768074: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [8]:
model = DeepBind(
    input_len=100,
    output_dim=1
)

In [7]:
model(x).shape

torch.Size([10, 1])

### DeepSEA

In [30]:
from eugene.models._model_zoo import DeepSEA

In [40]:
model = DeepSEA(
    input_len=100,
    output_dim=1,
    conv_kwargs={
        "pool_types": ["avg", "max", None]
    }
)

AvgPool1d 4 1
AvgPool1d 4 1
MaxPool1d 4 1
AvgPool1d 4 1
MaxPool1d 4 1


In [42]:
model(x).shape

torch.Size([10, 1])

### Basset

In [43]:
from eugene.models._model_zoo import Basset

In [47]:
model = Basset(
    input_len=100,
    output_dim=1
)



In [48]:
model(x).shape

torch.Size([10, 1])

### DanQ

In [49]:
from eugene.models._model_zoo import DanQ

In [56]:
model = DanQ(
    input_len=100,
    output_dim=1
)

In [58]:
model(x).shape

  self.padding, self.dilation, self.groups)


torch.Size([10, 1])

### DeepSTARR

In [59]:
from eugene.models._model_zoo import DeepSTARR

In [79]:
model = DeepSTARR(
    input_len=100,
    output_dim=1
)

In [83]:
model(x).shape

torch.Size([10, 120, 96])


torch.Size([10, 1])

# Losses

# Metrics

In [2]:
import torchmetrics
from torchmetrics import Metric

In [3]:
from torchmetrics import Metric

class MyAccuracy(Metric):
    def __init__(self):
        super().__init__()
        self.add_state("correct", default=torch.tensor(0), dist_reduce_fx="sum")
        self.add_state("total", default=torch.tensor(0), dist_reduce_fx="sum")

    def update(self, preds: torch.Tensor, target: torch.Tensor):
        preds, target = self._input_format(preds, target)
        assert preds.shape == target.shape

        self.correct += torch.sum(preds == target)
        self.total += target.numel()

    def compute(self):
        return self.correct.float() / self.total

In [4]:
metric = MyAccuracy()

                not been set for this class (MyAccuracy). The property determines if `update` by
                default needs access to the full metric state. If this is not the case, significant speedups can be
                achieved and we recommend setting this to `False`.
                We provide an checking function
                `from torchmetrics.utilities import check_forward_full_state_property`
                that can be used to check if the `full_state_update=True` (old and potential slower behaviour,
                default for now) or if `full_state_update=False` can be used safely.
                


In [5]:
metric.__class__.__name__.lower()

'myaccuracy'

In [7]:
from eugene.models.base._sequence_models import CNN

In [38]:
model = CNN(
    input_len=100,
    output_dim=1,
    strand="ss",
    aggr="max",
    task="binary_classification",
    optimizer="adam",
    conv_kwargs={
        "conv_channels": [10, 10],
        "conv_kernels": [5, 3],
        "activations": "relu",
        "pool_types": "max"
    },
    scheduler="reduce_lr_on_plateau",
    metric="accuracy",
    seed=13
)
model

[autoreload of eugene.models failed: Traceback (most recent call last):
  File "/Users/adamklie/miniconda3/envs/eugene/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "/Users/adamklie/miniconda3/envs/eugene/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 394, in superreload
    module = reload(module)
  File "/Users/adamklie/miniconda3/envs/eugene/lib/python3.7/imp.py", line 314, in reload
    return importlib.reload(module)
  File "/Users/adamklie/miniconda3/envs/eugene/lib/python3.7/importlib/__init__.py", line 160, in reload
    name=parent_name) from None
ImportError: parent 'eugene' not in sys.modules
]
Global seed set to 13


TypeError: super(type, obj): obj must be an instance or subtype of type

In [33]:
model.configure_optimizers()

{'optimizer': Adam (
 Parameter Group 0
     amsgrad: False
     betas: (0.9, 0.999)
     eps: 1e-08
     lr: 0.001
     maximize: False
     weight_decay: 0
 ),
 'lr_scheduler': <torch.optim.lr_scheduler.ReduceLROnPlateau at 0x7fbe3e911bd0>,
 'monitor': 'val_loss'}

In [36]:
model.lr_schedulers()

AttributeError: 'NoneType' object has no attribute 'lr_schedulers'

In [10]:
from eugene.datasets._datasets import random1000
from eugene.preprocess import ohe_seqs_sdata

In [11]:
sdata = random1000()
sdata

SeqData object with = 1000 seqs
seqs = (1000,)
names = (1000,)
rev_seqs = None
ohe_seqs = None
ohe_rev_seqs = None
seqs_annot: 'label_0', 'label_1', 'label_2', 'label_3', 'label_4', 'label_5', 'label_6', 'label_7', 'label_8', 'label_9', 'activity_0', 'activity_1', 'activity_2', 'activity_3', 'activity_4', 'activity_5', 'activity_6', 'activity_7', 'activity_8', 'activity_9'
pos_annot: None
seqsm: None
uns: None

In [12]:
ohe_seqs_sdata(sdata)

One-hot encoding sequences:   0%|          | 0/1000 [00:00<?, ?it/s]

SeqData object modified:
	ohe_seqs: None -> 1000 ohe_seqs added


In [13]:
sdataset = sdata.to_dataset(target_keys="label_0")

No transforms given, assuming just need to tensorize.


In [14]:
sdataloader = sdataset.to_dataloader(batch_size=128, shuffle=False)
ID, x, x_rev_comp, y = next(iter(sdataloader))

In [15]:
model.metric.reset()
metric_vals = []
for batch_idx, batch in enumerate(sdataloader):
    ID, x, x_rev_comp, y = batch
    outs = model(x, x_rev_comp=None).squeeze(dim=1)
    loss = model.loss_fxn(outs, y)
    print(f"Loss is {loss}", end=", ")
    met = model.metric(outs, y.long())
    print(f"{model.metric_name} is {met} at batch {batch_idx}")
    metric_vals.append(met)
    if batch_idx == 5:
        break

Loss is 0.5474207401275635, accuracy is 0.515625 at batch 0
Loss is 0.5789369344711304, accuracy is 0.4765625 at batch 1
Loss is 0.5303661823272705, accuracy is 0.5234375 at batch 2
Loss is 0.6017324924468994, accuracy is 0.453125 at batch 3
Loss is 0.5952660441398621, accuracy is 0.46875 at batch 4
Loss is 0.5732230544090271, accuracy is 0.484375 at batch 5


In [17]:
model.metric

Accuracy()

In [18]:
model.loss_fxn

<function torch.nn.functional.mse_loss(input: torch.Tensor, target: torch.Tensor, size_average: Union[bool, NoneType] = None, reduce: Union[bool, NoneType] = None, reduction: str = 'mean') -> torch.Tensor>

In [19]:
model.optimizer

torch.optim.adam.Adam

In [21]:
model.configure_optimizers()

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.001
    maximize: False
    weight_decay: 0
)

In [20]:
model.scheduler

In [268]:
from torchmetrics.functional.classification import binary_accuracy

In [None]:
model.metric.

In [270]:
all_preds = torch.concat(model.metric.pred)
all_targets = torch.concat(model.metric.target)
binary_accuracy(all_preds, all_targets)

AttributeError: 'Accuracy' object has no attribute 'pred'

In [271]:
model.metric.compute()

tensor(0.4870)

In [272]:
torch.mean(torch.Tensor(metric_vals))

tensor(0.4870)

In [206]:
model.calculate_metrics(probs, y)

tensor(0.5029)

tensor(0.4725)

In [28]:
batch[1].shape, batch[3].shape

(torch.Size([10, 4, 100]), torch.Size([10, 1]))

In [32]:
model._common_step(batch=batch, batch_idx=0, optimizer_idx=0, stage="train")

tensor(0.2946, grad_fn=<MseLossBackward0>)


  loss = self.loss_fxn(outs, y)


TypeError: log() missing 2 required positional arguments: 'name' and 'value'

In [34]:
model.log_dict

<bound method LightningModule.log_dict of CNN(
  (metric): AUROC()
  (conv1d_block): Conv1DBlock(
    (layers): Sequential(
      (0): Conv1d(4, 10, kernel_size=(5,), stride=(1,), padding=valid)
      (1): ReLU()
      (2): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
      (3): Conv1d(10, 10, kernel_size=(3,), stride=(1,), padding=valid)
      (4): ReLU()
      (5): MaxPool1d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
    )
  )
  (dense_block): DenseBlock(
    (layers): Sequential(
      (0): Linear(in_features=940, out_features=1, bias=True)
    )
  )
)>

In [26]:
model.lr_schedulers()

AttributeError: 'NoneType' object has no attribute 'lr_schedulers'

In [29]:
model.lr_schedulers()

AttributeError: 'NoneType' object has no attribute 'lr_schedulers'