In [1]:
import torch
import numpy as np
from torch import nn, optim
import torch.nn.functional as F
import scipy.io
import h5py

In [2]:
DATA_PATH = 'deepsea_train/'

### Data exploration

In [3]:
## TRAIN DATA ###
train_data_raw = h5py.File(DATA_PATH+'train.mat')

x_train = train_data_raw['trainxdata']
y_train = train_data_raw['traindata']

In [4]:
## VALIDATION DATA ###
valid_data_raw = scipy.io.loadmat(DATA_PATH+'valid.mat')
x_valid = valid_data_raw['validxdata']
y_valid = valid_data_raw['validdata']

Each training sample consists of a 1,000-bp sequence from the human GRCh37 reference genome centered on each 200-bp bin and is paired with a label vector for 919 chromatin features. 

The 1,000-bp DNA sequence is represented by a 1,000 × 4 binary matrix, with columns corresponding to A, G, C and T.

In [10]:
x_train.shape

(1000, 4, 4400000)

In [7]:
x_train[0].shape

(4, 4400000)

In [9]:
y_train[0].shape

(4400000,)

# Deep Sea

In [None]:
nfeats = 4
height = 1
nkernels = [320,480,960]
dropouts = [0.2,0.5]

In [None]:
class deep_sea_nn(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv1d(in_channels=nfeats,      out_channels=nkernels[0], kernel_size=8)
        self.conv2 = nn.Conv1d(in_channels=nkernels[0], out_channels=nkernels[1], kernel_size=8)
        self.conv3 = nn.Conv1d(in_channels=nkernels[1], out_channels=nkernels[2], kernel_size=8)
        self.maxpool = nn.MaxPool1d(kernel_size=4, stride=4)
        self.drop1 = nn.Dropout(p=dropouts[0])
        self.drop2 = nn.Dropout(p=dropouts[1])
        self.linear1 = nn.Linear(53*960, 925)
        self.linear2 = nn.Linear(925, 919)
    
    def foward(self, input):
        ## convolution 1 ##
        ds = self.conv1(input)
        ds = F.relu(ds)
        ds = self.maxpool(ds)
        ds = self.drop1(ds)
        
        ## convolution 2 ##
        ds = self.conv2(input)
        ds = F.relu(ds)
        ds = self.maxpool(ds)
        ds = self.drop1(ds)
        
        ## convolution 3 ##
        ds = self.conv3(input)
        ds = F.relu(ds)
        ds = self.drop2(ds)
        
        ds = ds.view(-1, 53*960)
        ds = self.Linear1(ds)
        ds = F.relu(x)
        ds = self.Linear2(ds)
        
        return ds
        

In [None]:
## HYPERPARMETERS
learning_rate = 0.01

In [None]:
deep_sea = deep_sea_nn()
deep_sea.cuda()
print(deep_sea)

optimizer = optim.SGD(deep_sea.parameters(), lr=learning_rate,momentum=0.9)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5,verbose=1)
loss_func = nn.BCEWithLogitsLoss()