In [13]:
molembed_path = "/home/wbm001/deeplpi/DeepLPI/data/mol_embed.csv"
seqembed_path = "/home/wbm001/deeplpi/DeepLPI/data/seq_embed.csv"
train_path = "/home/wbm001/deeplpi/DeepLPI/data/kd_train.csv"
test_path = "/home/wbm001/deeplpi/DeepLPI/data/kd_test.csv"

RAMDOMSEED = 11
CLASSIFYBOUND = -2

In [3]:
import pandas as pd

seqembed = pd.read_csv(seqembed_path,header=None)
molembed = pd.read_csv(molembed_path,)
train = pd.read_csv(train_path)
test = pd.read_csv(test_path)

In [4]:
molembed = molembed.set_index("0")
train["exist"] = train["mol"].map(lambda x : 1 if x in molembed.index.values else None)
train = train.dropna()

test["exist"] = test["mol"].map(lambda x : 1 if x in molembed.index.values else None)
test = test.dropna()

In [15]:
import torch
from torch import tensor
from torch.utils.data import DataLoader,TensorDataset,SequentialSampler,RandomSampler
import numpy as np

# train
train_seq = tensor(np.array(seqembed.loc[train["seq"]])).to(torch.float32)
train_mol = tensor(np.array(molembed.loc[train["mol"]])).to(torch.float32)
train_classify = tensor(np.array(train["pkd"].map(lambda x : 1 if x >= CLASSIFYBOUND else 0))).to(torch.float32)

trainDataset = TensorDataset(train_mol,train_seq,train_classify)
trainDataLoader = DataLoader(trainDataset, batch_size=256)

#test
test_seq = tensor(np.array(seqembed.loc[test["seq"]])).to(torch.float32)
test_mol = tensor(np.array(molembed.loc[test["mol"]])).to(torch.float32)
test_classify = tensor(np.array(test["pkd"].map(lambda x : 1 if x >= CLASSIFYBOUND else 0))).to(torch.float32)

testDataset = TensorDataset(test_mol,test_seq,test_classify)
testDataLoader = DataLoader(testDataset, batch_size=256)

In [24]:
# test dataset
next(iter(testDataLoader))[0].shape

torch.Size([256, 300])

In [10]:
from torch.nn import Module
from torch import nn
import torch.nn.functional as F
import torch

In [19]:
class resBlock(nn.Module):
    def __init__(self, in_channels, out_channels, use_conv1=False, strides=1, dropout=0.3):
        super().__init__()
        
        self.process = nn.Sequential (
            nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=strides, padding=1),
            nn.BatchNorm1d(out_channels),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout),
            nn.Conv1d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm1d(out_channels)
        )
        
        if use_conv1:
            self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=strides)
        else:
            self.conv1 = None
        
    def forward(self, x):
        left = self.process(x)
        right = x if self.conv1 is None else self.conv1(x)
        
        return F.relu(left + right)

In [33]:
test = resBlock(32,32)
test(torch.randn(256,32,300)).shape

torch.Size([256, 32, 300])

In [38]:
class cnnModule(nn.Module):
    def __init__(self, in_channel, out_channel, hidden_channel=32, dropout=0.3):
        super().__init__()
        
        self.head = nn.Sequential (
            nn.Conv1d(in_channel, hidden_channel, 7, stride=2, padding=3, bias=False),
            nn.BatchNorm1d(hidden_channel),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout),
            nn.MaxPool1d(2)
        )
        
        self.cnn = nn.Sequential (
            resBlock(hidden_channel, out_channel, use_conv1=True, strides=1),
            resBlock(out_channel, out_channel, strides=1),
            resBlock(out_channel, out_channel, strides=1),
        )
    
    def forward(self, x):
        x = self.head(x)
        x = self.cnn(x)
        
        return x

In [89]:
test = cnnModule(1,16)
test(torch.randn(256,1,400)).shape

torch.Size([256, 16, 100])

In [100]:
class DeepLPI(nn.Module):
    def __init__(self, molshape, seqshape, dropout=0.3):
        super().__init__()
        
        self.molshape = molshape
        self.seqshape = seqshape

        self.molcnn = cnnModule(1,16)
        self.seqcnn = cnnModule(1,16)
        
        self.pool = nn.AvgPool1d(5, stride = 3)
        self.lstm = nn.LSTM(round((((molshape+seqshape)/4)-2)/3), 64, num_layers=2, batch_first=True, bidirectional=True)
        
        self.mlp = nn.Sequential (
            nn.Linear(2048, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            
            nn.Linear(512, 32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            
            nn.Linear(32, 2),
            nn.Softmax()
        )

    def forward(self, mol, seq):
        mol = self.molcnn(mol.reshape(-1,1,self.molshape))
        seq = self.seqcnn(seq.reshape(-1,1,self.seqshape))
        
        # put data into lstm        
        x = torch.cat((mol,seq),2)
        x = self.pool(x)
        x,_ = self.lstm(x)
        
        # fully connect layer
        x = self.mlp(x.flatten(1))
        x = x[:,0]
        
        return x

In [104]:
model = DeepLPI(300,6165)
model(torch.randn(128,300),torch.randn(128,6165)).shape

  input = module(input)


torch.Size([128])