# Testing `ResidualBind` model class

**Authorship:**
Adam Klie, *11/05/2022*
***
**Description:**
Notebook for testing out the custom `ResidualBind` model class.

In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F

# Autoreload extension
if 'autoreload' not in get_ipython().extension_manager.loaded:
    %load_ext autoreload
%autoreload 2

import eugene as eu

Global seed set to 13


In [3]:
#evoaug
class Basset(nn.Module):
    """Basset model from Kelley et al., 2016; 
        see <https://genome.cshlp.org/content/early/2016/05/03/gr.200535.115.abstract>
        and <https://github.com/davek44/Basset/blob/master/data/models/pretrained_params.txt>
    """
    def __init__(self, output_dim, d=300, 
                 conv1_filters=None, learn_conv1_filters=True,
                 conv2_filters=None, learn_conv2_filters=True,
                 conv3_filters=None, learn_conv3_filters=True):
        super().__init__()
        
        if d != 300:
            print("NB: number of first-layer convolutional filters in original Basset model is 300; current number of first-layer convolutional filters is not set to 300")
        
        self.activation = nn.ReLU()
        self.dropout3 = nn.Dropout(0.3)
        self.flatten = nn.Flatten()
        
        self.init_conv1_filters = conv1_filters
        self.init_conv2_filters = conv2_filters
        self.init_conv3_filters = conv3_filters
        
        assert (not (conv1_filters is None and not learn_conv1_filters)), "initial conv1_filters cannot be set to None while learn_conv1_filters is set to False"
        assert (not (conv2_filters is None and not learn_conv2_filters)), "initial conv2_filters cannot be set to None while learn_conv2_filters is set to False"
        assert (not (conv3_filters is None and not learn_conv3_filters)), "initial conv3_filters cannot be set to None while learn_conv3_filters is set to False"
        
        # Layer 1 (convolutional), constituent parts
        if conv1_filters is not None:
            if learn_conv1_filters: # continue modifying existing conv1_filters through learning
                self.conv1_filters = nn.Parameter( torch.Tensor(conv1_filters) )
            else:
                self.register_buffer("conv1_filters", torch.Tensor(conv1_filters))
        else:
            self.conv1_filters = nn.Parameter(torch.zeros(d, 4, 19))
            nn.init.kaiming_normal_(self.conv1_filters)
        self.batchnorm1 = nn.BatchNorm1d(d)
        self.activation1 = nn.ReLU() # name the first-layer activation function for hook purposes
        self.maxpool1 = nn.MaxPool1d(3)
        
        # Layer 2 (convolutional), constituent parts
        if conv2_filters is not None:
            if learn_conv2_filters: # continue modifying existing conv2_filters through learning
                self.conv2_filters = nn.Parameter( torch.Tensor(conv2_filters) )
            else:
                self.register_buffer("conv2_filters", torch.Tensor(conv2_filters))
        else:
            self.conv2_filters = nn.Parameter(torch.zeros(200, d, 11))
            nn.init.kaiming_normal_(self.conv2_filters)
        self.batchnorm2 = nn.BatchNorm1d(200)
        self.maxpool2 = nn.MaxPool1d(4)
        
        # Layer 3 (convolutional), constituent parts
        if conv3_filters is not None:
            if learn_conv3_filters: # continue modifying existing conv3_filters through learning
                self.conv3_filters = nn.Parameter( torch.Tensor(conv3_filters) )
            else:
                self.register_buffer("conv3_filters", torch.Tensor(conv3_filters))
        else:
            self.conv3_filters = nn.Parameter(torch.zeros(200, 200, 7))
            nn.init.kaiming_normal_(self.conv3_filters)
        self.batchnorm3 = nn.BatchNorm1d(200)
        self.maxpool3 = nn.MaxPool1d(4)
        
        # Layer 4 (fully connected), constituent parts
        self.fc4 = nn.LazyLinear(1000, bias=False)
        self.batchnorm4 = nn.BatchNorm1d(1000)
        
        # Layer 5 (fully connected), constituent parts
        self.fc5 = nn.Linear(1000, 1000, bias=False)
        self.batchnorm5 = nn.BatchNorm1d(1000)
        
        # Output layer (fully connected), constituent parts
        self.fc6 = nn.Linear(1000, output_dim)
        self.sigmoid = nn.Sigmoid()
    
    def get_which_conv_layers_transferred(self):
        layers = []
        if self.init_conv1_filters is not None:
            layers.append(1)
        if self.init_conv2_filters is not None:
            layers.append(2)
        if self.init_conv3_filters is not None:
            layers.append(3)
        return layers
    
    def forward(self, x):
        # Layer 1
        cnn = torch.conv1d(x, self.conv1_filters, stride=1, padding=(self.conv1_filters.shape[-1]//2))
        cnn = self.batchnorm1(cnn)
        cnn = self.activation1(cnn)
        cnn = self.maxpool1(cnn)
        
        # Layer 2
        cnn = torch.conv1d(cnn, self.conv2_filters, stride=1, padding=(self.conv2_filters.shape[-1]//2))
        cnn = self.batchnorm2(cnn)
        cnn = self.activation(cnn)
        cnn = self.maxpool2(cnn)
        
        # Layer 3
        cnn = torch.conv1d(cnn, self.conv3_filters, stride=1, padding=(self.conv3_filters.shape[-1]//2))
        cnn = self.batchnorm3(cnn)
        cnn = self.activation(cnn)
        cnn = self.maxpool3(cnn)
        
        # Layer 4
        cnn = self.flatten(cnn)
        cnn = self.fc4(cnn)
        cnn = self.batchnorm4(cnn)
        cnn = self.activation(cnn)
        cnn = self.dropout3(cnn)
        
        # Layer 5
        cnn = self.fc5(cnn)
        cnn = self.batchnorm5(cnn)
        cnn = self.activation(cnn)
        cnn = self.dropout3(cnn)
        
        # Output layer
        cnn = self.fc6(cnn) 
        y_pred = self.sigmoid(cnn)
        
        return y_pred


In [4]:
model = Basset(2)



In [None]:
#yuzu
class Basset(torch.nn.Module):
	def __init__(self, n_inputs, seq_len=None, random_state=0):
		super(Basset, self).__init__()
		torch.manual_seed(random_state)

		self.conv1 = torch.nn.Conv1d(4, 300, kernel_size=19, padding=9)
		self.relu1 = torch.nn.ReLU()
		self.bn1 = torch.nn.BatchNorm1d(300)
		self.maxpool1 = torch.nn.MaxPool1d(3)

		self.conv2 = torch.nn.Conv1d(300, 200, kernel_size=11, padding=5)
		self.relu2 = torch.nn.ReLU()
		self.bn2 = torch.nn.BatchNorm1d(200)
		self.maxpool2 = torch.nn.MaxPool1d(4)

		self.conv3 = torch.nn.Conv1d(200, 200, kernel_size=7, padding=3)
		self.relu3 = torch.nn.ReLU()
		self.bn3 = torch.nn.BatchNorm1d(200)
		self.maxpool3 = torch.nn.MaxPool1d(4)

		self.reshape = Flatten()

		self.fc1 = torch.nn.Linear((seq_len // 3 // 4 // 4) * 200, 1000)
		self.relu4 = torch.nn.ReLU()
		self.bn4 = torch.nn.BatchNorm1d(1000)

		self.fc2 = torch.nn.Linear(1000, 1000)
		self.relu5 = torch.nn.ReLU()
		self.bn5 = torch.nn.BatchNorm1d(1000)
		

		self.fc3 = torch.nn.Linear(1000, 164)
		self.unsqueeze = Unsqueeze(1)

	def forward(self, X):
		with torch.no_grad():
			X = self.maxpool1(self.bn1(self.relu1(self.conv1(X))))
			X = self.maxpool2(self.bn2(self.relu2(self.conv2(X))))
			X = self.maxpool3(self.bn3(self.relu3(self.conv3(X))))

			X = self.reshape(X)

			X = self.bn4(self.relu4(self.fc1(X)))
			X = self.bn5(self.relu5(self.fc2(X)))
			X = self.fc3(X)
			X = self.unsqueeze(X)
			return X

In [None]:
class FactorizedBasset(torch.nn.Module):
	def __init__(self, n_inputs, seq_len=None, random_state=0):
		super(FactorizedBasset, self).__init__()
		torch.manual_seed(random_state)

		# 
		self.conv11 = torch.nn.Conv1d(n_inputs, 48, kernel_size=3, padding=1)
		self.bn11 = torch.nn.BatchNorm1d(48)
		self.relu11 = torch.nn.ReLU()
		
		self.conv12 = torch.nn.Conv1d(48, 64, kernel_size=3, padding=1)
		self.bn12 = torch.nn.BatchNorm1d(64)
		self.relu12 = torch.nn.ReLU()

		self.conv13 = torch.nn.Conv1d(64, 100, kernel_size=3, padding=1)
		self.bn13 = torch.nn.BatchNorm1d(100)
		self.relu13 = torch.nn.ReLU()

		self.conv14 = torch.nn.Conv1d(100, 150, kernel_size=7, padding=3)
		self.bn14 = torch.nn.BatchNorm1d(150)
		self.relu14 = torch.nn.ReLU()

		self.conv15 = torch.nn.Conv1d(150, 300, kernel_size=7, padding=3)
		self.bn15 = torch.nn.BatchNorm1d(300)
		self.relu15 = torch.nn.ReLU()

		self.mp1 = torch.nn.MaxPool1d(3)
		#

		self.conv21 = torch.nn.Conv1d(300, 200, kernel_size=7, padding=3)
		self.bn21 = torch.nn.BatchNorm1d(200)
		self.relu21 = torch.nn.ReLU()

		self.conv22 = torch.nn.Conv1d(200, 200, kernel_size=3, padding=1)
		self.bn22 = torch.nn.BatchNorm1d(200)
		self.relu22 = torch.nn.ReLU()

		self.conv23 = torch.nn.Conv1d(200, 200, kernel_size=3, padding=1)
		self.bn23 = torch.nn.BatchNorm1d(200)
		self.relu23 = torch.nn.ReLU()

		self.mp2 = torch.nn.MaxPool1d(4)
		#

		self.conv3 = torch.nn.Conv1d(200, 200, kernel_size=7, padding=3)
		self.bn3 = torch.nn.BatchNorm1d(200)
		self.relu3 = torch.nn.ReLU()

		self.mp3 = torch.nn.MaxPool1d(4)

		self.flatten = Flatten()
		self.fc1 = torch.nn.Linear((seq_len // 3 // 4 // 4) * 200, 1000)
		self.relu4 = torch.nn.ReLU()
		self.bn4 = torch.nn.BatchNorm1d(1000)
		self.fc2 = torch.nn.Linear(1000, 1000)
		self.relu5 = torch.nn.ReLU()
		self.bn5 = torch.nn.BatchNorm1d(1000)
		self.fc3 = torch.nn.Linear(1000, 164)
		self.unsqueeze = Unsqueeze(1)

	def forward(self, x):
		with torch.no_grad():
			x = self.relu11(self.bn11(self.conv11(x)))
			x = self.relu12(self.bn12(self.conv12(x)))
			x = self.relu13(self.bn13(self.conv13(x)))
			x = self.relu14(self.bn14(self.conv14(x)))
			x = self.relu15(self.bn15(self.conv15(x)))
			x = self.mp1(x)

			x = self.relu21(self.bn21(self.conv21(x)))
			x = self.relu22(self.bn22(self.conv22(x)))
			x = self.relu23(self.bn23(self.conv23(x)))
			x = self.mp2(x)

			x = self.relu3(self.bn3(self.conv3(x)))
			x = self.mp3(x)

			x = self.flatten(x)
			x = self.bn4(self.relu4(self.fc1(x)))
			x = self.bn5(self.relu5(self.fc2(x)))
			x = self.fc3(x)
			x = self.unsqueeze(x)
			return x


In [5]:
x = torch.randn(10, 4, 100)
model(x)

tensor([[0.5405, 0.4398],
        [0.6291, 0.5908],
        [0.5014, 0.6883],
        [0.3922, 0.7281],
        [0.5423, 0.5404],
        [0.4932, 0.5351],
        [0.5355, 0.4664],
        [0.4262, 0.4420],
        [0.4865, 0.6399],
        [0.4958, 0.5314]], grad_fn=<SigmoidBackward0>)

In [25]:
sdata = eu.datasets.random1000()
eu.pp.ohe_seqs_sdata(sdata)
eu.pp.train_test_split_sdata(sdata)

One-hot encoding sequences:   0%|          | 0/1000 [00:00<?, ?it/s]

SeqData object modified:
	ohe_seqs: None -> 1000 ohe_seqs added
SeqData object modified:
    seqs_annot:
        + train_val


In [26]:
eu.train.fit(model, sdata, target_keys="activity_0", epochs=1, batch_size=32)

Global seed set to 13
Missing logger folder: /workspaces/EUGENe/tests/notebooks/implement/models/eugene_logs/ssResidualBind_regression
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name           | Type                      | Params
-------------------------------------------------------------
0 | hp_metric      | R2Score                   | 0     
1 | conv           | BasicConv1D               | 4.5 K 
2 | residual_block | ResidualModule            | 83.8 K
3 | average_pool   | AvgPool1d                 | 0     
4 | dropout        | Dropout                   | 0     
5 | flatten        | Flatten                   | 0     
6 | fc             | BasicFullyConnectedModule | 2.0 M 
-------------------------------------------------------------
2.1 M     Trainable params
0         Non-trainable params
2.1 M     Total params
8.320     Total estimated model params size (MB)


Dropping 0 sequences with NaN targets.
No transforms given, assuming just need to tensorize.
No transforms given, assuming just need to tensorize.


Validation sanity check: 0it [00:00, ?it/s]

  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
Global seed set to 13
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."
  f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]