In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch.nn.functional as torch_functional
import torch.nn as nn
import torch
import numpy as np
import logging

from sonosco.common.constants import SONOSCO
from sonosco.models.seq2seq_tds import TDSEncoder, TDSDecoder, TDSSeq2Seq
from sonosco.training import Experiment, ModelTrainer
from sonosco.common.path_utils import parse_yaml
from sonosco.datasets import create_data_loaders
from sonosco.common.utils import setup_logging

In [3]:
config_path = "../sonosco/config/train_seq2seq_tds.yaml"
config = parse_yaml(config_path)["train"]
LOGGER = logging.getLogger(SONOSCO)
setup_logging(LOGGER)

In [4]:
# create model
model = TDSSeq2Seq(config["encoder"], config["decoder"])

2019-08-07 12:05:52,583 - sonosco.models.seq2seq_tds - INFO - ===== Initialize TDSEncoder =====
2019-08-07 12:05:52,609 - sonosco.models.seq2seq_tds - INFO - Initialize bridge.fc.weight with uniform / 0.037
2019-08-07 12:05:52,610 - sonosco.models.seq2seq_tds - INFO - Initialize bridge.fc.bias with constant / 0.000
2019-08-07 12:05:52,612 - sonosco.models.seq2seq_tds - INFO - Initialize layers.subsample0.conv1d.weight with uniform / 1.414
2019-08-07 12:05:52,613 - sonosco.models.seq2seq_tds - INFO - Initialize layers.subsample0.conv1d.bias with constant / 0.000
2019-08-07 12:05:52,615 - sonosco.models.seq2seq_tds - INFO - Initialize layers.subsample0.layer_norm.weight with constant / 0.000
2019-08-07 12:05:52,617 - sonosco.models.seq2seq_tds - INFO - Initialize layers.subsample0.layer_norm.bias with constant / 0.000
2019-08-07 12:05:52,619 - sonosco.models.seq2seq_tds - INFO - Initialize layers.tds10_block0.conv2d.weight with uniform / 0.138
2019-08-07 12:05:52,620 - sonosco.models.seq

In [5]:
# create fake data
batch_dim = 8
time_dim = 80
input_dim = 161
output_time_dim = 20
vocab_dim = 100

xs = np.random.rand(batch_dim, time_dim, input_dim)
xlens = np.random.randint(low=time_dim, high=time_dim + 1, size=batch_dim)
y_labels = np.random.randint(low=1, high=vocab_dim, size=(batch_dim, output_time_dim))
ylens = np.random.randint(low=output_time_dim, high=output_time_dim + 1, size=batch_dim)

In [None]:
# run model
probs = model(torch.from_numpy(xs).float(), torch.from_numpy(xlens), torch.from_numpy(y_labels), torch.from_numpy(ylens))
probs.size()

In [7]:
Experiment.create("CV_STS")

train_loader, val_loader = create_data_loaders(**config)

def cross_entropy_loss(batch, model):
    batch_x, batch_y, input_lengths, target_lengths = batch
    # check out the _collate_fn in loader to understand the next transformations
    batch_x = batch_x.squeeze(1).transpose(1, 2)
    batch_y = torch.split(batch_y, target_lengths.tolist())
    
    max_len = max(batch_y, key=lambda x: x.size()[0]).size()[0]
    #padded_batch_y = []
    #for y in batch_y:
    #    padded_y = torch_functional.pad(y, (0, max_len - y.size()[0]))
    #    padded_batch_y.append(padded_y)        
    #batch_y = torch.stack(padded_batch_y).type(torch.LongTensor)
    batch_y = torch.nn.utils.rnn.pad_sequence(batch_y, batch_first=True).type(torch.LongTensor)
    model_output = model(batch_x, input_lengths, batch_y, target_lengths)
    loss = torch_functional.cross_entropy(model_output.permute(0, 2, 1), batch_y)
    return loss, model_output

trainer = ModelTrainer(model, loss=cross_entropy_loss, epochs=config["max_epochs"],
                       train_data_loader=train_loader, val_data_loader=val_loader,
                       lr=config["learning_rate"], custom_model_eval=True)

In [None]:
trainer.start_training()

In [31]:
class View(nn.Module):
    def __init__(self, shape):
        super(View, self).__init__()
        self.shape = shape
        
    def forward(self, input):
        return input.view(self.shape)

class Reorder(nn.Module):
    def __init__(self, shape):
        super(Reorder, self).__init__()
        self.shape = shape
        
    def forward(self, input):
        return input.permute(self.shape)

In [32]:
class TDSBlock(nn.Module):
    
    def __init__(self, c: int, kw: int, h: int, dropout: float)-> None:
        super().__init__()
        l:int = c * h
        self.conv = nn.Sequential(
            nn.Conv2d(1, 10, kernel_size=(21, 1), stride=(2, 1), padding=(20, 5)), #calc padding
            nn.ReLU(),
            nn.Dropout2d(0.2),
            nn.LayerNorm(3),
            View((-1,l,1,0)),
            Reorder((1,0,2,3)),
            nn.Linear(l,l),
            nn.ReLU(),
            nn.Dropout2d(dropout),
            nn.Linear(l,l),
            Reorder((1,0,2,3)),
            View((-1,h,c,0)),
            nn.Dropout2d(dropout),
            nn.LayerNorm(3)
        )
        
    def forward(self, input):
        return self.conv(input)

In [33]:
b = TDSBlock(1,2,3,0.5)


In [34]:
Experiment.create("CV_STS")
config = parse_yaml("./config.yaml")["train"]

In [35]:
train_loader, val_loader = create_data_loaders(**config)


In [46]:
def custom_loss(batch, model):
    batch_x, batch_y, input_lengths, target_lengths = batch
    model_output, output_lengths = model(batch_x)
    loss = torch_functional.ctc_loss(model_output.transpose(0, 1), batch_y, output_lengths, target_lengths)
    return loss, model_output

In [47]:
class ConvSeqNet(nn.Module):
    def __init__(self):
        super(ConvSeqNet, self).__init__()
        self.net = nn.Sequential(
            nn.Conv2d(1, 10, kernel_size=(21, 1), stride=(2, 1), padding=(20, 5)), #calc padding
            nn.ReLU(),
            nn.Dropout2d(0.2),
            nn.LayerNorm(3),
            TDSBlock(10,21,82,0.2)
        #     TDSBlock(10,21,82,0.2),
        #     nn.Conv2d(10, 14, kernel_size=(21, 1), stride=(2, 1), padding=(20, 5)), #calc padding
        #     nn.ReLU(),
        #     nn.Dropout2d(0.2),
        #     nn.LayerNorm(3),
        #     TDSBlock(10,21,82,0.2),
        #     TDSBlock(10,21,82,0.2),
        #     TDSBlock(10,21,82,0.2),
        #     nn.Conv2d(14, 18, kernel_size=(21, 1), stride=(2, 1), padding=(20, 5)), #calc padding
        #     nn.ReLU(),
        #     nn.Dropout2d(0.2),
        #     nn.LayerNorm(3),
        #     TDSBlock(10,21,82,0.2),
        #     TDSBlock(10,21,82,0.2),
        #     TDSBlock(10,21,82,0.2),
        #     TDSBlock(10,21,82,0.2),
        #     TDSBlock(10,21,82,0.2),
        #     TDSBlock(10,21,82,0.2),
        #     View((-1,1440,1,0)),
        #     Reorder((1,0,3,2)),
        #     nn.Linear(1440,1024),
        )
    def forward(self, input):
        return self.net(input)

In [48]:
model = ConvSeqNet()

trainer = ModelTrainer(model, loss=custom_loss, epochs=config["max_epochs"],
                           train_data_loader=train_loader, val_data_loader=val_loader,
                           lr=config["learning_rate"], custom_model_eval=True)

In [49]:
trainer.start_training()

RuntimeError: Given normalized_shape=[3], expected input with shape [*, 3], but got input of size[32, 10, 51, 213]