In [4]:
import torch
from torch import Tensor, LongTensor
import torch.nn as nn
from torch.nn import Module
from torch.autograd import Variable
from torch import optim
import torch.nn.functional as F
from data import  OpenSub, pad_batch
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from utils import parse, length_to_mask, masked_cross_entropy_loss
from tensorboardX import SummaryWriter
import os
from time import strftime, localtime

In [17]:
args = {
    "epoch":20,
    "batch_size":256,
    "num_workers":4,
    "train_path":"../data/processed/t_given_s_dialogue_length2_3_temp.txt",
    "test_path":"../data/processed/t_given_s_dialogue_length2_3_test.txt",
    "vocab_size":25000,
    "embed_size":1000,
    "hidden_size":1000,
    "num_layers":1,
    "clip_thresh":1,
    "seed":1,
    "lr":0.1,
    "global_max_target_len":20,
    "cuda":torch.cuda.is_available
}
class AttributeDict(dict):
    def __getattr__(self, attr):
        return self[attr]
    def __setattr__(self, attr, value):
        self[attr] = value
args = AttributeDict(args)

In [18]:
class Encoder(Module):

    def __init__(self, args):
        super(Encoder, self).__init__()
        self.embedding = nn.Embedding(args.vocab_size+4, args.embed_size)
        # Only accept 4 layers bi-directional LSTM right now
        self.rnn = nn.LSTM(input_size=args.embed_size,
                                  hidden_size=args.hidden_size,
                                  num_layers=args.num_layers,
                                  bidirectional=True)
        for name, param in self.rnn.named_parameters():
            if 'bias' in name:
                nn.init.constant(param, 0.0)
            elif 'weight' in name:
                nn.init.uniform(param, -0.08, 0.08)

    def forward(self, source, lens, hidden=None):
        dense = self.embedding(source)
        packed_dense = pack_padded_sequence(dense, lens)
        packed_outputs, hidden = self.rnn(packed_dense, hidden)
        def _cat(hidden):
            return torch.cat((hidden[0:hidden.size(0):2], hidden[1:hidden.size(0):2]), 2)
        hidden = tuple(_cat(h) for h in hidden)
        outputs, output_lens = pad_packed_sequence(packed_outputs)
        return outputs, hidden

    def save_model(self, path):
        torch.save(self, PATH)

In [19]:
class Attention(Module):

    def __init__(self, args):
        super(Attention, self).__init__()
        self.score = nn.Linear(2*args.hidden_size, 2*args.hidden_size)

    def forward(self, decoder_outputs, encoder_outputs, source_lengths):
        """
        Return attention scores.
        args:
        decoder_outputs: TxBx*
        encoder_outputs: TxBx*
        returns:
        attention scores: Bx1xT
        """
        projected_encoder_outputs = self.score(encoder_outputs) \
                                        .permute(1, 2, 0) # batch first
        decoder_outputs = decoder_outputs.transpose(0,1)
        scores = decoder_outputs.bmm(projected_encoder_outputs)
        scores = scores.squeeze(1)
        mask = length_to_mask(source_lengths, source_lengths[0])
        if args.cuda: mask = mask.cuda()
        scores.data.masked_fill_(mask, float('-inf'))
        scores = F.softmax(scores, dim=1)
        return scores.unsqueeze(1)

In [20]:
class Decoder(Module):

    def __init__(self, args):
        super(Decoder, self).__init__()
        self.embed = nn.Embedding(args.vocab_size+4, args.embed_size)
        self.rnn = nn.LSTM(input_size = args.embed_size,
                           hidden_size = 2 * args.hidden_size,
                           num_layers = args.num_layers,
                           bidirectional=False)
        self.output = nn.Linear(4*args.hidden_size, args.hidden_size)
        self.predict = nn.Linear(args.hidden_size, args.vocab_size+4)
        self.attention = Attention(args).cuda() if args.cuda else Attention(args)
        for name, param in self.rnn.named_parameters():
            if 'bias' in name:
                nn.init.constant(param, 0.0)
            elif 'weight' in name:
                nn.init.uniform(param, -0.08, 0.08)

    def forward(self, target, encoder_outputs, source_lengths, hidden=None):
        """
        args:
        target: A LongTensor contains a word of the target sentence. size: 1*B.
        """
        target = target.unsqueeze(0)
        embed_target = self.embed(target)
        decoder_outputs, decoder_hiddens = self.rnn(embed_target, hidden)
        atten_scores = self.attention(decoder_outputs, encoder_outputs, source_lengths)
        context = atten_scores.bmm(encoder_outputs.transpose(0,1))
        concat = torch.cat([context, decoder_outputs.transpose(0,1)], -1)
        atten_outputs = F.tanh(self.output(concat))
        predictions = self.predict(atten_outputs)
        predictions = predictions.squeeze(1)
        return predictions, decoder_hiddens, atten_scores

In [14]:
print("start data loading: train data at {}, test data at {}".format(args.train_path, args.test_path))
train_data = OpenSub(args, args.train_path)
test_data = OpenSub(args, args.test_path)
PAD = train_data.PAD
collate = lambda x:pad_batch(x, PAD)
train_loader = torch.utils.data.DataLoader(train_data,
                                           batch_size=args.batch_size,
                                           shuffle=True, collate_fn=collate,
                                           num_workers=args.num_workers)
test_loader = torch.utils.data.DataLoader(test_data,
                                          batch_size=1000,
                                          shuffle=True, collate_fn=collate,
                                          num_workers=args.num_workers)
print("finish data loading.")


start data loading: train data at ../data/processed/t_given_s_dialogue_length2_3_temp.txt, test data at ../data/processed/t_given_s_dialogue_length2_3_test.txt


FileNotFoundError: [Errno 2] No such file or directory: '../data/processed/t_given_s_dialogue_length2_3_temp.txt'

In [21]:
encoder = Encoder(args).cuda() if args.cuda else Encoder(args)
decoder = Decoder(args).cuda() if args.cuda else Decoder(args)

encoder_optim = optim.SGD(encoder.parameters(), lr=args.lr)
decoder_optim = optim.SGD(decoder.parameters(), lr=args.lr)

AssertionError: Torch not compiled with CUDA enabled