In [24]:
# !pip3 install torch
# !pip3 install pandas
!pip3 install pytorch-ignite

You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.[0m


In [25]:
import ignite

In [26]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from torch.utils.data import DataLoader, TensorDataset, Dataset
from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
from ignite.metrics import Accuracy, Loss

In [27]:
EMBEDDING_VEC_SIZE = 32
DNA_BASES = 4
BAT_SIZE = 128

In [28]:
class SeqData(Dataset):   
    def __init__(self, sequences, labels):
        self.data = torch.from_numpy(sequences)
        self.labels = torch.tensor(labels, dtype=torch.float)
        self.labels = self.labels.view(-1, 1)
        
    def __len__(self):
        return len(self.data)   
    
    def __getitem__(self, index):
        label = self.labels[index]
        data_val = self.data[index]
        return data_val,label

In [29]:
def get_data_loader(fname):
    df = pd.read_csv(fname, names=["name", "seq", "class"])
    mapping = {'A': 0, 'T': 1, 'C': 2, 'G': 3}
    def mapping_fn(string):
        x = [mapping[x] for x in string]
        return x
    column = df["seq"].apply(lambda x: mapping_fn(x))
    data = np.zeros((len(df), 300), dtype=np.int64)
    for i, d in enumerate(data):
        data[i, :] = d
    print(df['class'].sum())
    dataset = SeqData(data, df['class'].values)
    data_loader = DataLoader(dataset, batch_size=BAT_SIZE, shuffle=True, drop_last=True)
    return data_loader

In [30]:
class Example(nn.Module):
    def __init__(self):
        super(Example, self).__init__()
        self.embed = nn.Embedding(DNA_BASES, EMBEDDING_VEC_SIZE)
        self.fc = nn.Linear(300 * 32, 1)
    
    def forward(self, x):
        x = self.embed(x)
        x = x.view(BAT_SIZE, -1)
        x = self.fc(x)
        return torch.sigmoid(x)

In [31]:
train_loader = get_data_loader("data/fullset_test.csv")

In [32]:
net = Example()
criterion = nn.BCELoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [33]:
for epoch in range(1):  # loop over the dataset multiple times
    epoch_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader):

        # zero the parameter gradients
        optimizer.zero_grad()

        labels = labels.view(BAT_SIZE, 1)
        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        epoch_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, epoch_loss / (i + 1)))

print('Finished Training')

Finished Training


In [40]:
model = Example()
train_loader = get_data_loader("data/fullset_train.csv")
val_loader = get_data_loader("data/fullset_test.csv")
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.8)
criterion = nn.BCELoss()

trainer = create_supervised_trainer(model, optimizer, criterion)

val_metrics = {
    "accuracy": Accuracy(),
    "nll": Loss(criterion)
}
evaluator = create_supervised_evaluator(model, metrics=val_metrics)

@trainer.on(Events.ITERATION_COMPLETED(every=50))
def log_training_loss(trainer):
    print("Epoch[{}] Loss: {:.2f}".format(trainer.state.epoch, trainer.state.output))

@trainer.on(Events.EPOCH_COMPLETED)
def log_training_results(trainer):
    evaluator.run(train_loader)
    metrics = evaluator.state.metrics
    print("Training Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
          .format(trainer.state.epoch, metrics["accuracy"], metrics["nll"]))

@trainer.on(Events.EPOCH_COMPLETED)
def log_validation_results(trainer):
    evaluator.run(val_loader)
    metrics = evaluator.state.metrics
    print("Validation Results - Epoch: {}  Avg accuracy: {:.2f} Avg loss: {:.2f}"
          .format(trainer.state.epoch, metrics["accuracy"], metrics["nll"]))
trainer.run(train_loader, max_epochs=5)

Epoch[1] Loss: 3.12
Epoch[1] Loss: 1.56
Epoch[1] Loss: 0.78
Epoch[1] Loss: 4.69
Epoch[1] Loss: 1.56
Epoch[1] Loss: 0.00
Epoch[1] Loss: 0.00
Epoch[1] Loss: 0.78
Epoch[1] Loss: 0.78
Epoch[1] Loss: 3.12
Epoch[1] Loss: 1.56
Epoch[1] Loss: 1.56
Epoch[1] Loss: 0.00
Epoch[1] Loss: 1.56
Epoch[1] Loss: 0.78
Epoch[1] Loss: 1.56
Epoch[1] Loss: 0.00
Epoch[1] Loss: 2.34
Epoch[1] Loss: 2.34
Epoch[1] Loss: 3.12
Epoch[1] Loss: 0.78
Epoch[1] Loss: 2.34
Epoch[1] Loss: 0.78
Epoch[1] Loss: 0.78
Epoch[1] Loss: 4.69
Epoch[1] Loss: 3.12
Epoch[1] Loss: 0.78
Engine run is terminating due to exception: .
Epoch[1] Loss: 0.78


KeyboardInterrupt: 