# Testing Kipoi Basset model for inference
**Authorship:**
Adam Klie (last updated: *07/19/2023*)
***
**Description:**
Test the Kipoi hosted implementation of Basenji for inference on some randomly generated sequences. This is a good place to check if your installation is working properly.
***

# Set-up

In [73]:
# General imports
import os
import sys
import kipoi
import torch
import numpy as np
import pandas as pd

In [2]:
# Makes sure that git is on the path
bin_dir = os.path.dirname(sys.executable)
os.environ["PATH"] += os.pathsep + bin_dir

# Load in the model from Kipoi

In [22]:
# Load the model
basset = kipoi.get_model('Basset').model

Using downloaded and verified file: /cellar/users/aklie/.kipoi/models/Basset/downloaded/model_files/weights/4878981d84499eb575abd0f3b45570d3


In [43]:
import torchinfo

In [46]:
torchinfo.summary(basset, input_size=(1, 4, 600, 1))

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [1, 164]                  --
├─Conv2d: 1-1                            [1, 300, 582, 1]          23,100
├─BatchNorm2d: 1-2                       [1, 300, 582, 1]          600
├─ReLU: 1-3                              [1, 300, 582, 1]          --
├─MaxPool2d: 1-4                         [1, 300, 194, 1]          --
├─Conv2d: 1-5                            [1, 200, 184, 1]          660,200
├─BatchNorm2d: 1-6                       [1, 200, 184, 1]          400
├─ReLU: 1-7                              [1, 200, 184, 1]          --
├─MaxPool2d: 1-8                         [1, 200, 46, 1]           --
├─Conv2d: 1-9                            [1, 200, 40, 1]           280,200
├─BatchNorm2d: 1-10                      [1, 200, 40, 1]           400
├─ReLU: 1-11                             [1, 200, 40, 1]           --
├─MaxPool2d: 1-12                        [1, 200, 10, 1]           -

# Test inference on batch of sequences

In [4]:
import seqpro as sp

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


In [66]:
# Kipoi Basenji requires a batch size of 2
seqs = sp.random_seqs((128, 600), sp.alphabets.DNA)

In [69]:
# One-hot encode the sequences
ohe_seqs = sp.ohe(seqs, alphabet=sp.alphabets.DNA).transpose(0, 2, 1)
ohe_seqs_torch = torch.tensor(np.expand_dims(ohe_seqs, axis=3), dtype=torch.float32).contiguous()
ohe_seqs_torch.shape

torch.Size([128, 4, 600, 1])

In [70]:
# See how big this sequence is in memory
print(f"Size of sequence in memory: {ohe_seqs.nbytes / 1e6} MB")

Size of sequence in memory: 0.3072 MB


In [71]:
basset.cpu().eval()

Sequential(
  (0): Conv2d(4, 300, kernel_size=(19, 1), stride=(1, 1))
  (1): BatchNorm2d(300, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
  (3): MaxPool2d(kernel_size=(3, 1), stride=(3, 1), padding=0, dilation=1, ceil_mode=False)
  (4): Conv2d(300, 200, kernel_size=(11, 1), stride=(1, 1))
  (5): BatchNorm2d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): ReLU()
  (7): MaxPool2d(kernel_size=(4, 1), stride=(4, 1), padding=0, dilation=1, ceil_mode=False)
  (8): Conv2d(200, 200, kernel_size=(7, 1), stride=(1, 1))
  (9): BatchNorm2d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (10): ReLU()
  (11): MaxPool2d(kernel_size=(4, 1), stride=(4, 1), padding=0, dilation=1, ceil_mode=False)
  (12): Lambda()
  (13): Sequential(
    (0): Lambda()
    (1): Linear(in_features=2000, out_features=1000, bias=True)
  )
  (14): BatchNorm1d(1000, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (15): ReLU()

In [72]:
# Predict on the sequences
basset(ohe_seqs_torch).shape

torch.Size([128, 164])

# DONE!

----