# Setup of file and variables

In [1]:
import numpy as np
import pandas as pd
import torch

# Autoreload extension
if 'autoreload' not in get_ipython().extension_manager.loaded:
    %load_ext autoreload
%autoreload 2

In [2]:
from eugene.models import DeepBind

Global seed set to 13


c:\users\lab\documents\eugene\eugene\utils\..\datasets/auxiliary/parsed_Gata6_3769_contig8mers.txt


In [3]:
# Length of strand
x_len = 66

# Substitute x and x_rev for their respective data
x = torch.randn(10, 4, x_len)
x_rev = torch.randn(10, 4, x_len)

# Simple usage of DeepBind module

The DeepBind module only requires one parameter to function. It can be further customized with optional parameters.

In [4]:
simple_deepbind_instance = DeepBind(input_len=x_len)

simple_deepbind_instance

DeepBind(
  (max_pool): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  (avg_pool): AvgPool1d(kernel_size=(4,), stride=(4,), padding=(0,))
  (convnet): BasicConv1D(
    (module): Sequential(
      (0): Conv1d(4, 16, kernel_size=(4,), stride=(1,))
      (1): ReLU(inplace=True)
      (2): Dropout(p=0.2, inplace=False)
    )
  )
  (fcn): BasicFullyConnectedModule(
    (module): Sequential(
      (0): Linear(in_features=504, out_features=256, bias=True)
      (1): ReLU(inplace=True)
      (2): Dropout(p=0.2, inplace=False)
      (3): Linear(in_features=256, out_features=64, bias=True)
      (4): ReLU(inplace=True)
      (5): Dropout(p=0.2, inplace=False)
      (6): Linear(in_features=64, out_features=16, bias=True)
      (7): ReLU(inplace=True)
      (8): Dropout(p=0.2, inplace=False)
      (9): Linear(in_features=16, out_features=4, bias=True)
      (10): ReLU(inplace=True)
      (11): Dropout(p=0.2, inplace=False)
      (12): Linear(in_features=4, out_feature

In [5]:
# x_rev does not need to be specified in single strand mode
out = simple_deepbind_instance(x)

out, out.shape

(tensor([[0.1823],
         [0.1911],
         [0.2143],
         [0.2120],
         [0.1868],
         [0.1964],
         [0.1680],
         [0.1929],
         [0.1392],
         [0.1944]], grad_fn=<AddmmBackward0>),
 torch.Size([10, 1]))

# DeepBind Parameters

- input_len : int - length of input strand
- strand : string - type of strand to process, only  || default : "ss" || res: single ("ss"), double ("ds"), twin ("ts")
- task : string - task for the model to perform || default : "regression" || res: "regression", "binary_classification"

#### mp_kwargs

- kernel_size : int - motif pooling kernel size || default : 4 || res : multiples of 2

#### conv_kwargs

- channels : list of ints - amount of channels for each convolutional layer || default : [4, 16]
- conv_kernels : list of int(s) - size of convolutional kernels || default : [4] || res : length must be 1 less than that of channels
- pool_kernels : list of int(s) - size of max pooling kernels || default : [4] || res: length must be 1 less than that of channels, only applies is length of channels is > 2
- omit_final_pool : boolean - bypass final max pooling step of output || default : True
- dropout_rates : float - probability for the dropout of any given node || default : 0.2
- batchnorm : boolean - enable batch normalization between layers || default : False

#### fc_kwargs

- output_dim : int - number of output dimensions for the network || default : 1
- hidden_dims : list of int(s) - width of each hidden dimension || default : [256, 64, 16, 4]
- dropout_rate : float - probability for the dropout of any given node || default : 0.2
- batchnorm : boolean - enable batch normalization between layers || default : False

In [6]:
customized_deepbind_instance = DeepBind(
    input_len=x_len,
    strand="ts",
    mp_kwargs=dict(kernel_size=16),
    conv_kwargs=dict(channels=[4, 16, 32], conv_kernels=[4, 4], pool_kernels=[4, 4], omit_final_pool=False, dropout_rates=0.5, batchnorm=True),
    fc_kwargs=dict(output_dim=5, hidden_dims=[256, 128, 64, 32, 16], dropout_rate=0.3, batchnorm=True)
)

customized_deepbind_instance

DeepBind(
  (max_pool): MaxPool1d(kernel_size=16, stride=16, padding=0, dilation=1, ceil_mode=False)
  (avg_pool): AvgPool1d(kernel_size=(16,), stride=(16,), padding=(0,))
  (convnet): BasicConv1D(
    (module): Sequential(
      (0): Conv1d(4, 16, kernel_size=(4,), stride=(1,))
      (1): ReLU(inplace=True)
      (2): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
      (3): Dropout(p=0.5, inplace=False)
      (4): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): Conv1d(16, 32, kernel_size=(4,), stride=(1,))
      (6): ReLU(inplace=True)
      (7): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
      (8): Dropout(p=0.5, inplace=False)
      (9): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (fcn): BasicFullyConnectedModule(
    (module): Sequential(
      (0): Linear(in_features=12, out_features=256, bias=True)
      (1): ReLU(inplace=True)
  

In [7]:
# x_rev must be specified in double and twin strand modes
out = customized_deepbind_instance(x, x_rev)

out, out.shape

(tensor([[-0.0992],
         [ 0.3343],
         [-0.1015],
         [-0.4366],
         [ 0.0307],
         [-0.1742],
         [-0.1436],
         [ 0.2214],
         [-0.1211],
         [ 0.0179]], grad_fn=<UnsqueezeBackward0>),
 torch.Size([10, 1]))