In [1]:
#import os
#os.environ["CUDA_VISIBLE_DEVICES"] ="-1"
import torch
import numpy as np
import torch.nn as nn
# import torch.nn.functional as F
# import torchvision

#from keras.preprocessing.sequence import pad_sequences
#from keras.utils import to_categorical

from tqdm import tqdm_notebook as tqdm

import json
import nltk
from nltk import word_tokenize

from torch.utils.data.dataloader import default_collate


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
DIC_word_index = json.load(open("vocab.json", "r", encoding='utf-8'))
DIC_index_word = {index:word for word, index in DIC_word_index.items()}
word_vectors = np.load("wv_matrix100d.npy")

cuda


In [3]:
DIC_index_word[97]

'別'

In [4]:
def seq2sent(seq):
    return [DIC_index_word[i] for i in seq]

In [5]:
def to_words(ys):       
    tokens = []
    #print(ys)
    #input("")
    for catagorical_word in ys:
        index = np.argmax(catagorical_word.cpu().detach().numpy())
        #print(index)
        if index in DIC_index_word:
            tokens.append(DIC_index_word[index])
        else:
            tokens.append('<unk>');
    return tokens

In [6]:
def _run_iter(batch, training, model, loss_function):
    with torch.no_grad():
        e_x = batch['encoder_x'].long().to(device)
        d_x = batch['decoder_x'].long().to(device)
    output = model.forward(e_x, d_x, 0.5)
    #print(output.shape)
    #print(batch['decoder_y'].shape)
    #input("")
    #print(output)
    #input("")
    loss = loss_function(output.view(-1, len(word_vectors)), batch['decoder_y'].view(-1).long().to(device))
    return output, loss

In [7]:
def _run_epoch(dataloader, training, model, optimizer, loss_function):
    model.train(training)
    if training:
        iter_in_epoch = min(len(dataloader), 1000000)
        description = 'train'
    else:
        iter_in_epoch = len(dataloader)
        description = 'test'
    grad_accumulate_steps = 1
    trange = tqdm(enumerate(dataloader), total=iter_in_epoch, desc=description)
    loss = 0
    for i, batch in trange:   
        if training and i >= iter_in_epoch:
            break

        if training:
            #print("batch:{}".format(batch))
            #print(batch['context'].dtype)
            optimizer.zero_grad()
            output, batch_loss = _run_iter(batch, training, model, loss_function)            
            
            batch_loss /= grad_accumulate_steps
            
            if i % grad_accumulate_steps == 0:
                optimizer.zero_grad()
            
            batch_loss.backward()
            
            print("loss: {}".format(loss/(i+1)), end='\r')

            if (i + 1) % grad_accumulate_steps == 0:
                optimizer.step()
            if((i+1) % 1500 == 0):
                print([DIC_index_word[i.item()] for i in batch['decoder_y'][0].cpu().detach()])
                print(to_words(output[0]))
                print(batch_loss)
        else:
            with torch.no_grad():
                output, batch_loss = _run_iter(batch, training, model, loss_function)
                if((i+1) % 1500 == 0):
                    print([DIC_index_word[i.item()] for i in batch['decoder_y'][0].cpu().detach()])
                    print(to_words(output[0]))
                
        loss += batch_loss.item()

    loss /= iter_in_epoch
    print('loss=%f\n' % loss)

In [8]:
def pad_post_zero(a, length):
    ret = []
    for _list in a:
        if(len(_list) < length):
            for ct in range(len(_list),length,1):
                _list.append(DIC_word_index["<pad>"])
        if(len(_list) > length):
            _list = _list[:length]
            
        ret.append(_list)
    return ret

In [None]:
def to_categorical(y, num_classes=None, dtype='float32'):
    """Converts a class vector (integers) to binary class matrix.
    E.g. for use with categorical_crossentropy.
    # Arguments
        y: class vector to be converted into a matrix
            (integers from 0 to num_classes).
        num_classes: total number of classes.
        dtype: The data type expected by the input, as a string
            (`float32`, `float64`, `int32`...)
    # Returns
        A binary matrix representation of the input. The classes axis
        is placed last.
    # Example
    ```python
    # Consider an array of 5 labels out of a set of 3 classes {0, 1, 2}:
    > labels
    array([0, 2, 1, 2, 0])
    # `to_categorical` converts this into a matrix with as many
    # columns as there are classes. The number of rows
    # stays the same.
    > to_categorical(labels)
    array([[ 1.,  0.,  0.],
           [ 0.,  0.,  1.],
           [ 0.,  1.,  0.],
           [ 0.,  0.,  1.],
           [ 1.,  0.,  0.]], dtype=float32)
    ```
    """

    y = np.array(y, dtype='int')
    input_shape = y.shape
    if input_shape and input_shape[-1] == 1 and len(input_shape) > 1:
        input_shape = tuple(input_shape[:-1])
    y = y.ravel()
    if not num_classes:
        num_classes = np.max(y) + 1
    n = y.shape[0]
    categorical = np.zeros((n, num_classes), dtype=dtype)
    categorical[np.arange(n), y] = 1
    output_shape = input_shape + (num_classes,)
    categorical = np.reshape(categorical, output_shape)
    return categorical


In [None]:
def my_collate(datas):
    batch = {}
    # collate lists
    batch['decoder_x'] = torch.tensor([data['decoder_x'] for data in datas])
    batch['decoder_y'] = torch.tensor([data['decoder_y'] for data in datas])
    batch['encoder_x'] = torch.tensor([data['encoder_x'] for data in datas])
    return batch

In [None]:
sent_len = 10

In [None]:
encode_x = []
with open('sel_conversation/question.txt', 'r') as f:
    for l in f:
        l = l.split()
        tmp = []
        for word in l:
            try:
                index = DIC_word_index[word]
            except KeyError:
                index = DIC_word_index['<unk>']
            tmp.append(index)
        encode_x.append([DIC_word_index['<bos>']] + tmp + [DIC_word_index['<eos>']])        
encode_x = pad_post_zero(encode_x, sent_len)




In [None]:
print(seq2sent(encode_x[73]))

['<bos>', '導致', '四個', '州', '停電', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>']


In [None]:
decode_x = []
decode_y = []
with open('sel_conversation/answer.txt', 'r') as f:
    for l in f:
        l = l.split()
        tmp = []
        for word in l:
            try:
                index = DIC_word_index[word]
            except KeyError:
                index = DIC_word_index['<unk>']
            tmp.append(index)
        decode_x.append([DIC_word_index['<bos>']] + tmp)    
        decode_y.append( tmp + [DIC_word_index['<eos>']])
    decode_x = pad_post_zero(decode_x, sent_len)
    decode_y = pad_post_zero(decode_y, sent_len)

In [None]:
print(seq2sent(decode_x[68]))
print(seq2sent(decode_y[68]))

['<bos>', '在', '最新', '民調', '中', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
['在', '最新', '民調', '中', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']


In [None]:
datas = []
for a,b,c in zip(decode_x, decode_y , encode_x):
    data = {}

    data['decoder_x'] = a
    data['decoder_y'] = b;
    data['encoder_x'] = c;
    
    datas.append(data)

In [None]:
class SimpleRNN(torch.nn.Module):
    def __init__(self, latent_dim, seq_length, embed_dim, word_vectors):
        super(SimpleRNN, self).__init__()
        
        self.hidden_dim = latent_dim        
        self.seq_length = seq_length
        self.embed_dim = embed_dim
        
#         self.embedding = torch.nn.Embedding(len(word_vectors), embed_dim)
#         self.embedding.load_state_dict({'weight': word_vectors})
#         self.embedding.weight.requires_grad = False
        self.embedding = torch.nn.Embedding.from_pretrained(torch.from_numpy(word_vectors),freeze=True)
#        self.embedding = torch.nn.Embedding(len(word_vectors), embed_dim)
    
        self.GRU1 = torch.nn.GRU(self.embed_dim, latent_dim,  num_layers=1, batch_first = True)
        self.GRU2 = torch.nn.GRU(self.embed_dim, latent_dim,  num_layers=1, batch_first = True)
        
        self.Projection_layer = torch.nn.Sequential(
            torch.nn.Linear(latent_dim, len(word_vectors)),
        )
    def encoder_GRU(self, e_x):
        e_y, hiddens = self.GRU1(e_x)
        
        return e_y, hiddens
    
    def decoder_GRU(self, d_x, hiddens):
        d_y, hiddens2 = self.GRU2(d_x, hiddens)
        return d_y, hiddens2
    
    def forward(self, e_x, d_x):
        e_x = self.embedding(e_x).float()
        
        d_x = self.embedding(d_x).float()
        
        
        e_y, hiddens = self.encoder_GRU(e_x)
        
        first = True
        
#         for word in d_x.transpose(1,0):
#             word = word.view(word.shape[0], 1, -1)
#             d_y, hiddens = self.decoder_GRU(word, hiddens)
            
# #             print(d_y.shape)
#             if(first):
#                 first = False;
#                 last_layer_input = d_y
#             else:
#                 last_layer_input = torch.cat((last_layer_input, d_y), 1)
#         #print(last_layer_input.shape)
#         output = self.Projection_layer(last_layer_input)
        for word in d_x.transpose(1,0):
            word = word.view(word.shape[0], 1, -1)
            d_y, hiddens = self.decoder_GRU(word, hiddens)
            
            if(first):
                first = False;
                last_layer_input = self.Projection_layer(d_y)
            else:
                last_layer_input = torch.cat((last_layer_input, self.Projection_layer(d_y)), 1)

        output = last_layer_input
        #print(output)
        
        return output

    

In [None]:
class AttModel(torch.nn.Module):
    def __init__(self, latent_dim, seq_length, embed_dim, word_vectors):
        super(AttModel, self).__init__()
        
        self.hidden_dim = latent_dim        
        self.seq_length = seq_length
        self.embed_dim = embed_dim
        
#         self.embedding = torch.nn.Embedding(len(word_vectors), embed_dim)
#         self.embedding.load_state_dict({'weight': word_vectors})
#         self.embedding.weight.requires_grad = False
        self.embedding = torch.nn.Embedding.from_pretrained(torch.from_numpy(word_vectors), freeze=True)
#        self.embedding = torch.nn.Embedding(len(word_vectors), embed_dim)
    
        self.soft = torch.nn.Softmax(-2)
        
        self.RNN1 = torch.nn.LSTM(self.embed_dim, latent_dim,  num_layers=2, batch_first = True)
        self.RNN2 = torch.nn.LSTM(self.embed_dim, latent_dim,  num_layers=2, batch_first = True)
        
        self.Projection_layer = torch.nn.Sequential(
            torch.nn.Linear(2 * latent_dim, len(word_vectors)),
        )
        
        self.trainable_W = torch.nn.Sequential(
            torch.nn.Linear(latent_dim ,  latent_dim),
            torch.nn.Sigmoid(),
            torch.nn.Dropout(p=0.2),
        )
    def encoder_RNN(self, e_x):
        e_y, hiddens = self.RNN1(e_x)
        
        return e_y, hiddens
    
    def decoder_RNN(self, d_x, hiddens):
        d_y, hiddens2 = self.RNN2(d_x, hiddens)
        return d_y, hiddens2
    
    def forward(self, e_x, d_x, probs):
        e_x = self.embedding(e_x).float()
        
        d_x = self.embedding(d_x).float()

        #print(sent.shape) torch.Size([2, 15, 6087])

        e_output, hiddens = self.encoder_RNN(e_x)
        
        #attention
        uW = self.trainable_W(e_output)
        
        first = True
        pre = None
        for word in d_x.transpose(1,0):
            #print(word.shape) torch.Size([2, 6087])
            word = torch.unsqueeze(word , 1)
            #torch.Size([128, 1, 100])
            
#             #sample a word here
#             if (not first) and np.random.rand() < probs:
#                 wordprob = self.Projection_layer(pre)
#                 #print(wordprob.shape)torch.Size([128, 1, 46801])
#                 ans_indices = torch.argmax(wordprob, dim=-1, keepdim=False)
#                 word = self.embedding(ans_indices).float()
#                 #torch.Size([128, 1, 100])
            
            #or randomize for each data in batch
            if not first:
                useTeacher = (torch.rand(word.shape[0]) > probs).float().view(-1, 1, 1).to(device)
                useSample = 1.0 - useTeacher
                
                #get previous output
                wordprob = self.Projection_layer(pre)
                ans_indices = torch.argmax(wordprob, dim=-1, keepdim=False)
                preword = self.embedding(ans_indices).float()
                
                #mixture
                word = useTeacher*word + useSample*preword
                
                
                
            

            #print(word.shape)
            #one for each word, therefore d_output = d_state
            d_output, hiddens = self.decoder_RNN(word, hiddens)
            uWv = torch.matmul(uW, d_output.transpose(2,1))
            #print(uWv.shape) torch.Size([2, 80, 1])
            alpha = self.soft(uWv)
            #print(uWv)
            #print(alpha)
            #input("")
            new_context = e_output.transpose(2,1) @ alpha;
            #print(e_output.shape) torch.Size([2, 80, 256])
            #print(alpha.shape) torch.Size([2, 80, 1])
            
            #print(d_output.shape) torch.Size([2, 1, 256])
            #print(new_context.shape) torch.Size([2, 256, 1])          
            pre = torch.cat((d_output, new_context.transpose(2,1)), 2)
            if(first):
                first = 0;
                last_layer_input = pre
            else:
                last_layer_input = torch.cat((last_layer_input, pre), 1)
            #print(last_layer_input.shape) #torch.Size([2, 1, 512])
            
        logits = self.Projection_layer(last_layer_input)

        #logits = torch.stack(logits, 1)
        return logits

In [None]:
#hyperparameters
lr = 5e-3  # learning rate
batch_size = 256
latent_dim = 1024
iter_in_epoch = 500
embed_dim = word_vectors.shape[1]
print(word_vectors.shape)

(46801, 100)


In [None]:
model = AttModel(latent_dim, sent_len, embed_dim, word_vectors)
saved_model = torch.load("models/Model61")
model.load_state_dict(saved_model['model'])

model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(),lr=lr)
optimizer.load_state_dict(saved_model['optimizer'])
loss_function = torch.nn.CrossEntropyLoss(ignore_index=DIC_word_index['<pad>'])

epoch = saved_model['epoch']
max_epochs = 100  # how many epochs to train for
while epoch < max_epochs:
    # train and evaluate train score
    print('training %i' % epoch)

    # train epoch
    dataloader = torch.utils.data.DataLoader(datas, batch_size = batch_size, shuffle = True, collate_fn = my_collate)
    log_train = _run_epoch(dataloader, True, model, optimizer, loss_function)

    # test epoch
    """
    print('evaluating %i' % epoch)
    dataloader = torch.utils.data.DataLoader(valid_datas, batch_size = batch_size, collate_fn=my_collate)
    log_valid = _run_epoch(dataloader, False, model, optimizer, loss_function)
    """
    
    epoch += 1
    if epoch % 5 == 0:
        torch.save({
            'epoch': epoch + 1,
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict()
        }, "./models/Model" + str(epoch))

training 62


HBox(children=(IntProgress(value=0, description='train', max=3440, style=ProgressStyle(description_width='init…

['你', '是', '對', '的', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
['你', '是', '<eos>', '的', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(1.0865, device='cuda:0', grad_fn=<DivBackward0>)
['說', '的', '夠多', '了', ',', '我們', '要', '走', '了', '<eos>']
['說', '的', '夠多', '了', ',', '我們', '要', '走', '了', '<eos>']
tensor(1.2415, device='cuda:0', grad_fn=<DivBackward0>)
loss: 1.1091374323811642
loss=1.109518

training 63


HBox(children=(IntProgress(value=0, description='train', max=3440, style=ProgressStyle(description_width='init…

['幾分鐘', '後', '便', '知道', '結果', '如何', '了', '<eos>', '<pad>', '<pad>']
['幾分鐘', '後', '便', '知道', '結果', '如何', '了', '<eos>', '<eos>', '<eos>']
tensor(1.1674, device='cuda:0', grad_fn=<DivBackward0>)
['我', '認為', '我', '只', '聽到', '喬治', '<eos>', '<pad>', '<pad>', '<pad>']
['我要', '認為', '那', '正', '發生', '。', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(1.2182, device='cuda:0', grad_fn=<DivBackward0>)
loss: 1.0989156585966438
loss=1.099264

training 64


HBox(children=(IntProgress(value=0, description='train', max=3440, style=ProgressStyle(description_width='init…

['五天', '的', '誘惑', '永久', '的', '沉睡', '<eos>', '<pad>', '<pad>', '<pad>']
['五天', '的', '誘惑', '永久', '的', '沉睡', '中', '<eos>', '<eos>', '<eos>']
tensor(1.0045, device='cuda:0', grad_fn=<DivBackward0>)
['我', '認識', '很多', '毒販', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
['我', '的', '很多', '毒販', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(1.0294, device='cuda:0', grad_fn=<DivBackward0>)
loss: 1.0898553813092933
loss=1.090272

training 65


HBox(children=(IntProgress(value=0, description='train', max=3440, style=ProgressStyle(description_width='init…

['我', '也', '想', '知道', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
['我', '也', '想', '知道', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(1.0931, device='cuda:0', grad_fn=<DivBackward0>)
['沒有', ',', '沒', '懷孕', '過', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>']
['沒有', ',', '沒', '懷孕', '過', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(1.3520, device='cuda:0', grad_fn=<DivBackward0>)
loss: 1.0822724126799161
loss=1.082655

training 66


HBox(children=(IntProgress(value=0, description='train', max=3440, style=ProgressStyle(description_width='init…

['不', '我', '想', '說', '好', '啊', '我', '是', '說', '<eos>']
['不', '我', '好', '說', '說', '好', '好', '啊', '啊', '好']
tensor(1.1568, device='cuda:0', grad_fn=<DivBackward0>)
['好', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
['好', '<eos>', '<eos>', '對', '對', '我們', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(1.1510, device='cuda:0', grad_fn=<DivBackward0>)
loss: 1.0725580528205216
loss=1.072904

training 67


HBox(children=(IntProgress(value=0, description='train', max=3440, style=ProgressStyle(description_width='init…

['女孩', '從', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
['你', '<eos>', '布朗', '克斯', '?', '<eos>', '<eos>', '?', '<eos>', '<eos>']
tensor(1.1522, device='cuda:0', grad_fn=<DivBackward0>)
['看看', '我們', '今天', '掙', '了', '多少', '<eos>', '<pad>', '<pad>', '<pad>']
['我', '<eos>', '<eos>', '掙', '了', '多少', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(1.1874, device='cuda:0', grad_fn=<DivBackward0>)
loss: 1.0664290095137996
loss=1.066793

training 68


HBox(children=(IntProgress(value=0, description='train', max=3440, style=ProgressStyle(description_width='init…

['不', '可能', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
['不', '不', '不是', '不是', '她們', '的', '女朋友', '<eos>', '<eos>', '<eos>']
tensor(1.0263, device='cuda:0', grad_fn=<DivBackward0>)
['對', '收音機', '我', '的', '指揮官', '。', '。', '。', '<eos>', '<pad>']
['對', '收音機', '我', '來說', '。', '<eos>', '。', '<eos>', '<eos>', '<eos>']
tensor(1.2444, device='cuda:0', grad_fn=<DivBackward0>)
loss: 1.0566822513071603
loss=1.057008

training 69


HBox(children=(IntProgress(value=0, description='train', max=3440, style=ProgressStyle(description_width='init…

['他們', '正是', '抒情詩', '。', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
['他們', '正是', '抒情詩', '。', '<eos>', '。', '<eos>', '<eos>', '。', '<eos>']
tensor(1.1120, device='cuda:0', grad_fn=<DivBackward0>)
['啊', ',', '可以', '讓', '我', '過去', '一下', '嗎', '?', '<eos>']
['是', '他', '可以', '買', '一個', '的', '房子', '<eos>', '?', '<eos>']
tensor(1.2884, device='cuda:0', grad_fn=<DivBackward0>)
loss: 1.0494221971132034
loss=1.049807

training 70


HBox(children=(IntProgress(value=0, description='train', max=3440, style=ProgressStyle(description_width='init…

['病毒', '會發', '心臟病', '的', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
['病毒', '會發', '心臟病', '的', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(1.1319, device='cuda:0', grad_fn=<DivBackward0>)
['你', '看看', ',', '好', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
['你', '看看', ',', '這', '了', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(1.1762, device='cuda:0', grad_fn=<DivBackward0>)
loss: 1.0427168343476085
loss=1.043048

training 71


HBox(children=(IntProgress(value=0, description='train', max=3440, style=ProgressStyle(description_width='init…

['可惜', '你', '打扮', '得', '再', '美', '<eos>', '<pad>', '<pad>', '<pad>']
['可惜', '你', '打扮', '得', '再', '美', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(0.9580, device='cuda:0', grad_fn=<DivBackward0>)
['爲', '你', '被', '<unk>', '像', '你', '<eos>', '<pad>', '<pad>', '<pad>']
['爲', '你', '被', '<unk>', '爲', '像', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(1.2190, device='cuda:0', grad_fn=<DivBackward0>)
loss: 1.0367222321587939
loss=1.037060

training 72


HBox(children=(IntProgress(value=0, description='train', max=3440, style=ProgressStyle(description_width='init…

['首先', '它', '太大', '了', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
['不行', '它', '太大', '了', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(1.2257, device='cuda:0', grad_fn=<DivBackward0>)
['<unk>', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
['我們', '知道', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(1.0393, device='cuda:0', grad_fn=<DivBackward0>)
loss: 1.0288436462712842
loss=1.029163

training 73


HBox(children=(IntProgress(value=0, description='train', max=3440, style=ProgressStyle(description_width='init…

['大家', '都', '說', '這種', '肥皂', '最好', '用', '<eos>', '<pad>', '<pad>']
['大家', '都', '這種', '這種', '肥皂', '最好', '用', '<eos>', '<eos>', '<eos>']
tensor(1.0969, device='cuda:0', grad_fn=<DivBackward0>)
['怎麼', '打扮', '成', '這樣', '?', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>']
['怎麼', '打扮', '的', '這樣', '?', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(1.1484, device='cuda:0', grad_fn=<DivBackward0>)
loss: 1.0207240452773355
loss=1.021026

training 74


HBox(children=(IntProgress(value=0, description='train', max=3440, style=ProgressStyle(description_width='init…

['然後', '教', '我', '拳擊', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
['然後', '教', '我', '拳擊', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(1.2555, device='cuda:0', grad_fn=<DivBackward0>)
['等', '一下', ',', '是', '你', '偵測', '到', '的', '?', '<eos>']
['等', '一下', ',', '是', '你', '偵測', '到', '的', '?', '<eos>']
tensor(1.2477, device='cuda:0', grad_fn=<DivBackward0>)
loss: 1.0148682394693065
loss=1.015222

training 75


HBox(children=(IntProgress(value=0, description='train', max=3440, style=ProgressStyle(description_width='init…

['叫', '他', '別', '扯', '我', '進來', '<eos>', '<pad>', '<pad>', '<pad>']
['叫', '他', '別', '拿', '我', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(0.9712, device='cuda:0', grad_fn=<DivBackward0>)
['等', '我', '把', '帽子', '摘下來', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>']
['等', '我', '把', '帽子', '摘下來', ',', '<eos>', '吧', '<eos>', '<eos>']
tensor(1.0077, device='cuda:0', grad_fn=<DivBackward0>)
loss: 1.0076145511033923
loss=1.007940

training 76


HBox(children=(IntProgress(value=0, description='train', max=3440, style=ProgressStyle(description_width='init…

['男孩', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
['男孩', '的', '一個', '女孩', '做', '的', '比賽', '<eos>', '<eos>', '<eos>']
tensor(1.0557, device='cuda:0', grad_fn=<DivBackward0>)
['然後', '每週', '都', '要', '吹', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>']
['然後', '每週', '都', '吹', '吹', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(1.1812, device='cuda:0', grad_fn=<DivBackward0>)
loss: 1.0032601992404737
loss=1.003584

training 77


HBox(children=(IntProgress(value=0, description='train', max=3440, style=ProgressStyle(description_width='init…

['我們', '下次', '再說', '吧', ',', '謝謝', '<eos>', '<pad>', '<pad>', '<pad>']
['我們', '下次', '再說', '吧', '謝謝', '謝謝', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(0.9020, device='cuda:0', grad_fn=<DivBackward0>)
['嘿', ',', '我們', '去', '哪兒', '?', '<eos>', '<pad>', '<pad>', '<pad>']
['曾', ',', '卻', '被', '祖國', '?', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(0.9545, device='cuda:0', grad_fn=<DivBackward0>)
loss: 0.9958086816723957
loss=0.996087

training 78


HBox(children=(IntProgress(value=0, description='train', max=3440, style=ProgressStyle(description_width='init…

['你', '知道', '自己', '在', '說', '什麼', '?', '<eos>', '<pad>', '<pad>']
['你', '知道', '自己', '在', '說', '什麼', '嗎', '<eos>', '<eos>', '<eos>']
tensor(0.8936, device='cuda:0', grad_fn=<DivBackward0>)
['什麼', '你', '看不到', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
['去', ',', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(1.0849, device='cuda:0', grad_fn=<DivBackward0>)
loss: 0.9903046908766724
loss=0.990569

training 79


HBox(children=(IntProgress(value=0, description='train', max=3440, style=ProgressStyle(description_width='init…

['只要', '往後', '拉', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
['我', '了', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(0.8929, device='cuda:0', grad_fn=<DivBackward0>)
['你們', '處理', '這個', '案子', '的', '手法', '<eos>', '<pad>', '<pad>', '<pad>']
['我', '來得', ',', '案子', '造福', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(0.8871, device='cuda:0', grad_fn=<DivBackward0>)
loss: 0.9839016861000727
loss=0.984244

training 80


HBox(children=(IntProgress(value=0, description='train', max=3440, style=ProgressStyle(description_width='init…

['都', '同意', '簽署', '和平', '協議', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>']
['都', '同意', '簽署', '和平', '協議', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(1.1170, device='cuda:0', grad_fn=<DivBackward0>)
['哦', ',', '大家', '都', '喜歡', '聽', '寵物', '故事', '<eos>', '<pad>']
['哦', ',', '大家', '都', '喜歡', '聽', '寵物', '故事', '<eos>', '<eos>']
tensor(1.0598, device='cuda:0', grad_fn=<DivBackward0>)
loss: 0.9784244436337505
loss=0.978701

training 81


HBox(children=(IntProgress(value=0, description='train', max=3440, style=ProgressStyle(description_width='init…

['你', '沒有', '感覺', '嗎', '?', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>']
['你', '沒有', '感覺', '嗎', '?', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(0.8626, device='cuda:0', grad_fn=<DivBackward0>)
['你', '會', '怎麼', '做', '?', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>']
['你', '會', '怎麼', '說', '?', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(1.1661, device='cuda:0', grad_fn=<DivBackward0>)
loss: 0.9730849147536034
loss=0.973380

training 82


HBox(children=(IntProgress(value=0, description='train', max=3440, style=ProgressStyle(description_width='init…

['還好', '麼', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
['還好', '麼', '<eos>', '<eos>', '<eos>', '<eos>', '吹', '<eos>', '<eos>', '<eos>']
tensor(0.9935, device='cuda:0', grad_fn=<DivBackward0>)
['但', '不許', '碰', '<eos>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
['你', '我', '不', '你', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>', '<eos>']
tensor(1.0112, device='cuda:0', grad_fn=<DivBackward0>)
loss: 0.9663721536134564
loss=0.966696

training 83


HBox(children=(IntProgress(value=0, description='train', max=3440, style=ProgressStyle(description_width='init…

loss: 0.8369893600863795

In [None]:
torch.save({
            'epoch': epoch + 1,
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict()
        }, "./models/Model" + str(epoch))

In [None]:
#model = SimpleRNN(latent_dim, sent_len, embed_dim, word_vectors)
model = AttModel(latent_dim, sent_len, embed_dim, word_vectors)

model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(),lr=lr)
loss_function = torch.nn.CrossEntropyLoss(ignore_index=DIC_word_index['<pad>'])

In [None]:
epoch = 0;
max_epochs = 100  # how many epochs to train for
while epoch < max_epochs:
    # train and evaluate train score
    print('training %i' % epoch)

    # train epoch
    dataloader = torch.utils.data.DataLoader(datas, batch_size = batch_size, shuffle = True, collate_fn = my_collate)
    log_train = _run_epoch(dataloader, True, model, optimizer, loss_function)

    # test epoch
    """
    print('evaluating %i' % epoch)
    dataloader = torch.utils.data.DataLoader(valid_datas, batch_size = batch_size, collate_fn=my_collate)
    log_valid = _run_epoch(dataloader, False, model, optimizer, loss_function)
    """
    
    epoch += 1
    torch.save({
        'epoch': epoch + 1,
        'model': model.state_dict(),
        'optimizer': optimizer.state_dict()
    }, "./models/Model" + str(epoch))

In [None]:
print(DIC_word_index['<pad>'])