In [1]:
from __future__ import print_function

import copyreg as copy_reg
import random
import os
import time

import pickle as pkl
import torch
from torch import nn, optim
import numpy as np
import nltk

from models.vrnn import VRNN
from data_apis.data_utils import SWDADataLoader
from data_apis.SWDADialogCorpus import SWDADialogCorpus
import params

In [2]:
with open("data/cambridge_data/api_cambridge.txt", "rb") as fh:
    api = pkl.load(fh, encoding='latin1')

In [3]:
dial_corpus = api.get_dialog_corpus()

In [4]:
train_dial, labeled_dial, test_dial = dial_corpus.get("train"), dial_corpus.get("labeled"), dial_corpus.get("test")

In [12]:
train_dial[0]

[[[4, 12, 80, 57, 68, 55, 2, 21, 3, 32, 65, 14, 7, 2, 18, 3, 2, 19, 3, 6, 5],
  [4,
   26,
   28,
   115,
   15,
   14,
   7,
   2,
   18,
   3,
   2,
   19,
   3,
   32,
   76,
   2,
   21,
   3,
   2,
   20,
   3,
   6,
   53,
   9,
   40,
   23,
   2,
   20,
   3,
   94,
   10,
   5]],
 [[4, 47, 12, 2, 41, 3, 63, 7, 2, 20, 3, 6, 5],
  [4, 2, 17, 3, 8, 23, 2, 11, 3, 15, 48, 14, 7, 2, 18, 3, 2, 19, 3, 6, 5]],
 [[4, 33, 8, 38, 2, 29, 3, 10, 5],
  [4,
   26,
   2,
   29,
   3,
   8,
   2,
   35,
   3,
   13,
   54,
   26,
   49,
   43,
   12,
   37,
   51,
   9,
   58,
   10,
   5]],
 [[4, 33, 8, 7, 2, 20, 3, 75, 76, 10, 5],
  [4, 75, 76, 2, 11, 3, 2, 20, 3, 6, 5]],
 [[4, 13, 32, 65, 33, 12, 226, 50, 27, 6, 96, 23, 299, 46, 5],
  [4, 24, 9, 102, 119, 27, 83, 7, 106, 15, 78, 6, 5]]]

In [11]:
api.train_corpus[0]

[[[['<s>',
    'i',
    'need',
    'to',
    'find',
    'an',
    '[',
    'value_pricerange',
    ']',
    'that',
    's',
    'in',
    'the',
    '[',
    'value_area',
    ']',
    '[',
    'slot_area',
    ']',
    '.',
    '</s>'],
   ['<s>',
    'there',
    'are',
    'several',
    'restaurant',
    'in',
    'the',
    '[',
    'value_area',
    ']',
    '[',
    'slot_area',
    ']',
    'that',
    'serve',
    '[',
    'value_pricerange',
    ']',
    '[',
    'slot_food',
    ']',
    '.',
    'do',
    'you',
    'have',
    'a',
    '[',
    'slot_food',
    ']',
    'preference',
    '?',
    '</s>']],
  [['<s>',
    'no',
    'i',
    '[',
    'value_dontcare',
    ']',
    'about',
    'the',
    '[',
    'slot_food',
    ']',
    '.',
    '</s>'],
   ['<s>',
    '[',
    'value_name',
    ']',
    'is',
    'a',
    '[',
    'value_food',
    ']',
    'restaurant',
    'located',
    'in',
    'the',
    '[',
    'value_area',
    ']',
    '[',
    'slot_area',
 

In [6]:
# convert to numeric input outputs
train_loader = SWDADataLoader("Train", train_dial, params.max_utt_len,
                            params.max_dialog_len)
valid_loader = test_loader = SWDADataLoader("Test", test_dial,
                                        params.max_utt_len,
                                        params.max_dialog_len)

Max dialog len 8 and min dialog len 2 and avg len 4.113580
Max dialog len 7 and min dialog len 2 and avg len 3.963235


In [9]:
# set random seeds
seed = params.seed
random.seed(seed)
np.random.seed(seed + 1)
torch.manual_seed(seed + 2)

# set device
use_cuda = params.use_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

In [7]:
params.work_dir

'working'

In [8]:
log_dir = os.path.join(params.work_dir, "run" + str(int(time.time())))

In [9]:
model = VRNN()
#model = model.cuda()

In [10]:
optimizer = optim.Adam(model.parameters(), lr=params.init_lr)

In [11]:
if train_loader.num_batch is None or train_loader.ptr >= train_loader.num_batch:
    train_loader.epoch_init(params.batch_size, shuffle=True)

Train begins with 25 batches with 5 left over samples


In [12]:
batch = train_loader.next_batch()

In [34]:
batch[3].shape

torch.Size([16, 10, 40])

In [28]:
embedding = nn.Embedding(params.max_vocab_cnt, params.embed_size)

In [29]:
embedding(batch[0]).shape

torch.Size([16, 10, 40, 300])

In [16]:
for epoch in range(1, params.max_epoch + 1):
    model.train()
    while True:
        optimizer.zero_grad()
        usr_input_sent, sys_input_sent, dialog_length_mask, usr_input_mask, sys_input_mask = train_loader.next_batch()
#         usr_input_sent = usr_input_sent.cuda()
#         sys_input_sent = sys_input_sent.cuda()
#         dialog_length_mask = dialog_length_mask.cuda()
#         usr_input_mask = usr_input_mask.cuda()
#         sys_input_mask = sys_input_mask.cuda()
        loss = model(usr_input_sent, sys_input_sent, dialog_length_mask, usr_input_mask, sys_input_mask)
        loss.backward()
        optimizer.step()
        print(loss)

vrnn  tensor([16063.1094, 16063.1094, 16063.1094,  ..., 11428.6426, 11428.6426,
        11428.6426], grad_fn=<CatBackward>)
vrnn  tensor(35494.3047, grad_fn=<DivBackward0>)


RuntimeError: Function MulBackward0 returned an invalid gradient at index 0 - expected type torch.FloatTensor but got torch.LongTensor

In [None]:
# # my fake code
# for p in model.parameters():
#     if p.requires_grad:
#          print(p.name, p.type())