In [1]:
import gensim
import os
import json
import tqdm
import time
import numpy as np

from collections import Counter
from gensim.models.word2vec import Word2Vec
from sys import getsizeof



In [2]:
ts = time.strftime('%Y-%b-%d-%H-%M-%S', time.gmtime())

save_model_path = os.path.join('won', ts)
os.makedirs('.\\'+save_model_path)

In [3]:
with open('dict_targets_Reviews.json', 'r', encoding='UTF-8') as f:
    target_dict=json.load(f)

In [4]:
with open('dict_inputs_Reviews.json', 'r', encoding='UTF-8') as f:
    inputs_dict=json.load(f)

In [5]:
max_length = 100

In [6]:
with open('word_unk_dic.json', 'r', encoding='UTF-8') as f:
    word_unk_dic=json.load(f)

word_list = word_unk_dic['WORD']

unk_list = word_unk_dic['UNK']

In [7]:
_PAD_ = "_PAD_"  # 빈칸 채우는 심볼
_STA_ = "_STA_"  # 디코드 입력 시퀀스의 시작 심볼
_EOS_ = "_EOS_"  # 디코드 입출력 시퀀스의 종료 심볼
_UNK_ = "_UNK_"  # 사전에 없는 단어를 나타내는 심볼

_PAD_ID_ = 0
_STA_ID_ = 1
_EOS_ID_ = 2
_UNK_ID_ = 3
_PRE_DEFINED_ = [_PAD_ID_, _STA_ID_, _EOS_ID_, _UNK_ID_]

_PRE_DEFINED_dict = {_PAD_:_PAD_ID_, _STA_:_STA_ID_, _EOS_:_EOS_ID_, _UNK_:_UNK_ID_}

In [8]:
def padding(seq, max_len, start=None, eos=None):
    if start:
        padded_seq = [_STA_] + seq
    elif eos:
        padded_seq = seq + [_EOS_]
    else:
        padded_seq = seq

    if len(padded_seq) < max_len:
        #print(len(padded_seq))
        padded_seq = padded_seq + ([_PAD_] * (max_len - len(padded_seq)))
        if len(padded_seq) < max_len:
            return padded_seq[:max_len - len(padded_seq)]
        return padded_seq
    else:
        if len(padded_seq) > max_len:
            return padded_seq[:-1]
        else : 
            return padded_seq

In [9]:
def unk_make(data):
    
    for index, word in enumerate(data):

        if word in word_list:
            pass
        else:
            data[index] = _UNK_
            pass
            
    return data

In [10]:
def data_padding(data,max_len = 0, start=None, eos=None):
    pos_padding_dict = {}
    for title in tqdm.tqdm(list(data.keys())):
        if (len(data[title])>max_len):
            data[title] = data[title][:max_len]
        sentence = unk_make(data[title])
        pos_padding_dict[title] = padding(sentence,max_len,start,eos)
        
    return pos_padding_dict

In [11]:
def dic_lists(data):
    dic_list = []
    for key in data.keys():
        dic_list.append(data[key])
    return dic_list

In [12]:
targets_pad = data_padding(target_dict,max_length,None,True)
inputs_pad = data_padding(inputs_dict,max_length,True,None)

100%|████████████████████████████████████████████████████████████████████████| 568427/568427 [01:13<00:00, 7720.14it/s]
100%|█████████████████████████████████████████████████████████████████████████| 568427/568427 [10:57<00:00, 865.02it/s]


In [13]:
inputs = dic_lists(inputs_pad)

In [14]:
targets = dic_lists(targets_pad)

In [15]:
for i in inputs:
    if len(i) != 100:
        print(len(i))

In [16]:
model = gensim.models.Word2Vec.load("./Word2vec_data.model")

In [17]:
import torch
from torch.autograd import Variable
import torch.nn.utils.rnn as rnn_utils
import torch.nn as nn

In [18]:
word2index=model.wv.index2word

word2index = {}
for i,j in enumerate(model.wv.index2word):
    word2index[j]=i
index2word=model.wv.index2word

model_embedding = []
for index in model.wv.index2word:
    model_embedding.append(model.wv[index])
model_embedding = np.array(model_embedding)

In [19]:
def word_2_index(datas):
    data_index = []
    for data in tqdm.tqdm(datas):
        temp = []
        for word in data:
            temp.append(word2index[word])
        data_index.append(temp)
    return data_index

In [20]:
target_index = word_2_index(targets)
input_index = word_2_index(inputs)

100%|███████████████████████████████████████████████████████████████████████| 568427/568427 [00:07<00:00, 72372.56it/s]
100%|███████████████████████████████████████████████████████████████████████| 568427/568427 [00:09<00:00, 57731.48it/s]


In [21]:
del targets_pad
del inputs_pad

In [22]:
import gc
collected = gc.collect()
print (collected)

0


In [23]:
max_sequence_length = max_length
embedding_size = 300
hidden_size = 256
word_dropout = 0.5
latent_size = 128
num_layers = 2
bidirectional = True
batch_size_fit = 56
rnn_type = 'gru'
learning_rate = 0.001
k = 0.002
x0 = 8000
vocab_size = len(index2word)
sos_idx = word2index['_STA_']
eos_idx = word2index['_EOS_']
pad_idx = word2index['_PAD_']

In [24]:
class RVAE(nn.Module):
    def __init__(self,vocab_size, embedding_size, max_sequence_length, hidden_size, word_dropout, latent_size,
                sos_idx, eos_idx, pad_idx, numpy_embedding,rnn_type='rnn' , num_layers=1, bidirectional=True):
        
        super().__init__()
        self.model_embedding = torch.from_numpy(numpy_embedding)
        self.max_sequence_length = max_sequence_length
        self.sos_idx = sos_idx
        self.eos_idx = eos_idx
        self.pad_idx = pad_idx
        
        self.latent_size = latent_size
        self.rnn_type = rnn_type
        self.bidirectional = bidirectional
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        
        self.embedding = nn.Embedding(vocab_size,embedding_size)
        self.embedding.weight = nn.Parameter(self.model_embedding)
        #self.outputs2vocab = nn.Linear(hidden_size * (2 if bidirectional else 1), vocab_size)
        self.encoder = Encoder(vocab_size = vocab_size,embedding_size = embedding_size, hidden_size = hidden_size, num_layers = num_layers, bidirectional = bidirectional,latent_size = latent_size,rnn_type = rnn_type).cuda()
        self.decoder = Decoder(vocab_size = vocab_size,embedding_size = embedding_size, hidden_size = hidden_size, num_layers = num_layers, bidirectional = bidirectional,latent_size = latent_size,rnn_type = rnn_type, word_dropout=word_dropout).cuda()
    
    def forward(self,x,length): 
        #print(x.size())
        batch_size = x.size(0)
        sorted_lengths, sorted_idx = torch.sort(length, descending=True)
        input_sequence = x[sorted_idx.cuda()]
        input_embedding = self.embedding(input_sequence)
        
        packed_input = rnn_utils.pack_padded_sequence(input_embedding, sorted_lengths.tolist(), batch_first=True)

        mu,logvar,reparam = self.encoder(packed_input)
        logp,outputs  = self.decoder(input_embedding, reparam, sorted_lengths, sorted_idx)
        
        
        return logp, mu, logvar, reparam, outputs

In [25]:
class Encoder(nn.Module):
    def __init__(self,vocab_size,embedding_size, hidden_size, latent_size, bidirectional=True, num_layers = 1,rnn_type='rnn'):
        super(Encoder,self).__init__()
        
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.latent_size = latent_size
        self.rnn_type = rnn_type
        self.bidirectional = bidirectional
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        
        if self.rnn_type == 'rnn':
            rnn = nn.RNN
        elif self.rnn_type == 'gru':
            rnn = nn.GRU
        elif self.rnn_type =='lstm':
            rnn = nn.LSTM
        else:
            raise ValueError()
            
        
        self.encoder = rnn(self.embedding_size, self.hidden_size, num_layers = self.num_layers, bidirectional = self.bidirectional, batch_first = True)
        
        self.hidden_factor = (2 if self.bidirectional else 1) * self.num_layers
        
        self.hidden2mean = nn.Linear(self.hidden_size* self.hidden_factor, self.latent_size)
        self.hidden2logv = nn.Linear(self.hidden_size* self.hidden_factor, self.latent_size)
                        
    
    def reparametrize(self, mu, logvar):
        std = logvar.mul(0.5).exp_()
        
        eps = torch.FloatTensor(std.size()).normal_()
        #print(eps)
        eps = Variable(eps).cuda()
        
        return eps.mul(std).add_(mu)
    
    def forward(self,x):
        
        _,hidden = self.encoder(x)
        
        if self.bidirectional or self.num_layers > 1:
            # flatten hidden state
            hidden = hidden.view(batch_size, self.hidden_size*self.hidden_factor)
        else:
            hidden = hidden.squeeze()

        mu = self.hidden2mean(hidden)
        
        logvar = self.hidden2logv(hidden)
        std = torch.exp(0.5 * logvar)
        #reparam = self.reparametrize(mu,logvar)
        z = Variable(torch.randn([batch_size, self.latent_size])).cuda()
        z = z * std + mu
        
        
        return mu,logvar,z#,reparam

In [26]:
class Decoder(nn.Module):
    def __init__(self,vocab_size,embedding_size, hidden_size, latent_size, bidirectional=True, num_layers = 1,rnn_type='rnn',word_dropout = 0.5):
        super(Decoder,self).__init__()
        
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.latent_size = latent_size
        self.rnn_type = rnn_type
        self.bidirectional = bidirectional
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        
        if self.rnn_type == 'rnn':
            rnn = nn.RNN
        elif self.rnn_type == 'gru':
            rnn = nn.GRU
        elif self.rnn_type =='lstm':
            rnn = nn.LSTM
        else:
            raise ValueError()
            
        self.hidden_factor = (2 if self.bidirectional else 1) * self.num_layers            
        self.latent2hidden = nn.Linear(latent_size, hidden_size * self.hidden_factor)
        
        self.embedding = nn.Embedding(vocab_size,embedding_size)
        self.word_dropout = nn.Dropout(p=word_dropout)
        
        self.decoder = rnn(embedding_size, hidden_size, num_layers=num_layers, bidirectional=self.bidirectional, batch_first=True)
        self.decoder2outputs = nn.Linear(hidden_size * (2 if bidirectional else 1), hidden_size * (2 if bidirectional else 1))
        self.outputs2vocab = nn.Linear(hidden_size * (2 if bidirectional else 1), vocab_size)
        
    def forward(self,x,z,sorted_lengths,sorted_idx):

        hidden = self.latent2hidden(z)
        if self.bidirectional or self.num_layers > 1:
            # unflatten hidden state
            hidden = hidden.view(self.hidden_factor, batch_size, self.hidden_size)
        else:
            hidden = hidden.unsqueeze(0)
        input_embedding = self.word_dropout(x)
        packed_input = rnn_utils.pack_padded_sequence(input_embedding, sorted_lengths.tolist(), batch_first=True)
        outputs,_ = self.decoder(packed_input, hidden)
        
        padded_outputs = rnn_utils.pad_packed_sequence(outputs, batch_first=True)[0]
        padded_outputs = padded_outputs.contiguous()
        _,reversed_idx = torch.sort(sorted_idx.cuda())
        padded_outputs = padded_outputs[reversed_idx]
        b,s,_ = padded_outputs.size()
        
        #print(padded_outputs.view(-1, padded_outputs.size(2)).size())
        output_vocab = self.outputs2vocab(self.decoder2outputs(padded_outputs.view(-1, padded_outputs.size(2))))
        logp = nn.functional.log_softmax(output_vocab, dim=-1)
        logp = logp.view(b, s, self.embedding.num_embeddings)
        
        return logp,padded_outputs.view(-1, padded_outputs.size(2))

In [27]:
rvae=RVAE(vocab_size, embedding_size, max_sequence_length, hidden_size, word_dropout, latent_size,sos_idx, eos_idx, pad_idx , numpy_embedding = model_embedding, num_layers=num_layers ,rnn_type='gru',bidirectional= False).cuda()

In [28]:
def kl_anneal_function(anneal_function, step, k, x0):
    if anneal_function == 'logistic':
        return float(1/(1+np.exp(-k*(step-x0))))
    elif anneal_function == 'linear':
        return min(1, step/x0)

In [29]:
NLL = torch.nn.NLLLoss(size_average=False, ignore_index = pad_idx)

In [30]:
def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0):

    # cut-off unnecessary padding from target, and flatten
    target = target[:, :torch.max(length)].contiguous().view(-1)
    logp = logp.view(-1, logp.size(2))

    # Negative Log Likelihood
    NLL_loss = NLL(logp, target)

    # KL Divergence
    KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
    KL_weight = kl_anneal_function(anneal_function, step, k, x0)

    return NLL_loss, KL_loss, KL_weight

In [31]:
optimizer = torch.optim.Adam(rvae.parameters(), lr=learning_rate)

In [32]:
rvae

RVAE(
  (embedding): Embedding(60923, 300)
  (encoder): Encoder(
    (encoder): GRU(300, 256, num_layers=2, batch_first=True)
    (hidden2mean): Linear(in_features=512, out_features=128, bias=True)
    (hidden2logv): Linear(in_features=512, out_features=128, bias=True)
  )
  (decoder): Decoder(
    (latent2hidden): Linear(in_features=128, out_features=512, bias=True)
    (embedding): Embedding(60923, 300)
    (word_dropout): Dropout(p=0.5)
    (decoder): GRU(300, 256, num_layers=2, batch_first=True)
    (decoder2outputs): Linear(in_features=256, out_features=256, bias=True)
    (outputs2vocab): Linear(in_features=256, out_features=60923, bias=True)
  )
)

In [33]:
def batch(batch_size,input_var,target_var,length_var):
    start = 0
    end = batch_size
    #if len(input_var)%32 != 0:
    while end < len(input_var):
        batch_input = input_var[start:end]
        batch_target = target_var[start:end]
        batch_length = length_var[start:end]
        temp = end
        end  = end + batch_size
        start = temp
        yield batch_input, batch_target, batch_length
        
    if end >= len(input_var):
        batch_input  = input_var[start:]
        batch_target = target_var[start:]
        batch_length = length_var[start:]
        yield batch_input, batch_target, batch_length

In [34]:
epochs = 10

In [35]:
var_index = int(len(input_index)*0.6)
test_index = int(len(input_index)*0.8)

In [36]:
inputs_len = []
for sentence in input_index:
    inputs_len.append(len(sentence) - sentence.count(0))

In [37]:
step = 0

var_avg_losses = []

train_losses = []
var_losses = []

var_NLL_losses = []
var_KL_losses = []

train_NLL_losses = []
train_KL_losses = []
train_KL_weights = []

iteration = 0
for epoch in tqdm.tqdm(range(epochs)):
    iteration = 0
    for batch_x, batch_y, batch_len in batch(batch_size_fit, input_index[:var_index], target_index[:var_index], \
                                             inputs_len[:var_index]):
        iteration = iteration + 1
        
        x_ = Variable(torch.cuda.LongTensor(batch_x))
        y_ = Variable(torch.cuda.LongTensor(batch_y))
        batch_size = x_.size(0)
        length = torch.cuda.LongTensor(batch_len)

        logp, mean, logv, z, outputs=rvae(x_,length)

        NLL_loss, KL_loss, KL_weight = loss_fn(logp, y_, length, mean, logv, 'logistic', step, k, x0)
        
        loss = (NLL_loss+KL_loss*KL_weight)/batch_size #(NLL_loss/batch_size)


        train_losses.append(float(loss.cpu().data))
        train_NLL_losses.append(NLL_loss.data[0]/batch_size)
        train_KL_losses.append(KL_loss.data[0]/batch_size)
        train_KL_weights.append(KL_weight/batch_size)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        step += 1

        if iteration % 1000 == 0 or iteration == (len(input_index[:var_index])-1)//batch_size:
            print("Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f"
        %( iteration, (len(input_index[:var_index])-1)//batch_size_fit, loss.data[0], NLL_loss.data[0]/batch_size, KL_loss.data[0]/batch_size, KL_weight))
            np.savez(L=train_losses,file='train_loss.npz')
            np.savez(L=train_NLL_losses,file='train_NLL_losses.npz')
            np.savez(L=train_KL_losses,file='train_KL_losses.npz') 
            np.savez(L=train_KL_weights,file='train_KL_weights.npz') 
            checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch))
            torch.save(rvae, checkpoint_path)
            
    iteration = 0
    for batch_x, batch_y, batch_len in batch(batch_size_fit, input_index[var_index:test_index], target_index[var_index:test_index], \
                                             inputs_len[var_index:test_index]):
        iteration = iteration + 1
        
        x_ = Variable(torch.cuda.LongTensor(batch_x))
        y_ = Variable(torch.cuda.LongTensor(batch_y))
        batch_size = x_.size(0)
        length = torch.cuda.LongTensor(batch_len)

        logp, mean, logv, z, outputs=rvae(x_,length)

        NLL_loss, KL_loss, KL_weight = loss_fn(logp, y_, length, mean, logv, 'logistic', step, k, x0)

        loss = (NLL_loss + KL_loss*KL_weight)/batch_size#(NLL_loss/batch_size)


        var_losses.append(float(loss.cpu().data))
        var_NLL_losses.append(NLL_loss.data[0]/batch_size)
        var_KL_losses.append(KL_loss.data[0]/batch_size)



        if iteration % 1000 == 0 or iteration == (len(input_index[var_index:])-1)//batch_size:
            print("Valid Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f"
        %( iteration, (len(input_index[var_index:test_index])-1)//batch_size_fit, loss.data[0], NLL_loss.data[0]/batch_size, KL_loss.data[0]/batch_size, KL_weight))
            np.savez(L=var_losses,file='var_loss.npz')
            np.savez(L=var_NLL_losses,file='var_NLL_loss.npz')
            np.savez(L=var_KL_losses,file='var_KL_loss.npz') 
            checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch))
            torch.save(rvae, checkpoint_path)
            
    checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch))
    torch.save(rvae, checkpoint_path)
    print("Model saved at %s"%checkpoint_path)
    print("Epoch %02d/%i, Valid Mean ELBO %9.4f"%( epoch, epochs, np.mean(np.array(var_losses))))
    var_avg_losses.append(np.mean(np.array(var_losses)))
    np.savez(L=var_avg_losses,file='var_avg_loss.npz')

  0%|                                                                                           | 0/10 [00:00<?, ?it/s]

Batch 1000/6090, Loss   26.2992, NLL-Loss   26.2991, KL-Loss  135.8435, KL-Weight  0.000


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


Batch 2000/6090, Loss   34.9820, NLL-Loss   34.9808, KL-Loss  201.1415, KL-Weight  0.000
Batch 3000/6090, Loss   33.3733, NLL-Loss   33.3623, KL-Loss  243.6513, KL-Weight  0.000
Batch 4000/6090, Loss   28.4656, NLL-Loss   28.4005, KL-Loss  194.5762, KL-Weight  0.000
Batch 5000/6090, Loss   32.3901, NLL-Loss   32.1686, KL-Loss   89.7368, KL-Weight  0.002
Batch 6000/6090, Loss   27.0635, NLL-Loss   26.6757, KL-Loss   21.6028, KL-Weight  0.018
Batch 6090/6090, Loss   28.8794, NLL-Loss   28.4053, KL-Loss   22.1344, KL-Weight  0.021
Valid Batch 1000/2030, Loss   30.8294, NLL-Loss   30.3533, KL-Loss   22.1478, KL-Weight  0.021
Valid Batch 2000/2030, Loss   28.4697, NLL-Loss   27.9569, KL-Loss   23.8510, KL-Weight  0.021
Model saved at won\2018-Jun-22-08-59-57\E0.pytorch
Epoch 00/10, Valid Mean ELBO   30.2034


 10%|███████▉                                                                       | 1/10 [25:35<3:50:23, 1535.91s/it]

Batch 1000/6090, Loss   25.2133, NLL-Loss   24.4313, KL-Loss    5.6083, KL-Weight  0.139
Batch 2000/6090, Loss   35.1825, NLL-Loss   34.5177, KL-Loss    1.2201, KL-Weight  0.545
Batch 3000/6090, Loss   33.9646, NLL-Loss   33.6619, KL-Loss    0.3369, KL-Weight  0.898
Batch 4000/6090, Loss   29.9831, NLL-Loss   29.7806, KL-Loss    0.2056, KL-Weight  0.985
Batch 5000/6090, Loss   34.1459, NLL-Loss   34.0424, KL-Loss    0.1036, KL-Weight  0.998
Batch 6000/6090, Loss   27.7255, NLL-Loss   27.6682, KL-Loss    0.0573, KL-Weight  1.000
Batch 6090/6090, Loss   29.7764, NLL-Loss   29.7113, KL-Loss    0.0651, KL-Weight  1.000
Valid Batch 1000/2030, Loss   31.7401, NLL-Loss   31.4930, KL-Loss    0.2471, KL-Weight  1.000
Valid Batch 2000/2030, Loss   30.5516, NLL-Loss   30.4076, KL-Loss    0.1440, KL-Weight  1.000
Model saved at won\2018-Jun-22-08-59-57\E1.pytorch
Epoch 01/10, Valid Mean ELBO   30.8818


 20%|███████████████▊                                                               | 2/10 [51:14<3:24:56, 1537.08s/it]

Batch 1000/6090, Loss   26.0634, NLL-Loss   25.9629, KL-Loss    0.1005, KL-Weight  1.000
Batch 2000/6090, Loss   34.9210, NLL-Loss   34.6772, KL-Loss    0.2438, KL-Weight  1.000
Batch 3000/6090, Loss   32.4568, NLL-Loss   32.4262, KL-Loss    0.0306, KL-Weight  1.000
Batch 4000/6090, Loss   29.4672, NLL-Loss   29.3649, KL-Loss    0.1023, KL-Weight  1.000
Batch 5000/6090, Loss   33.6877, NLL-Loss   33.5749, KL-Loss    0.1127, KL-Weight  1.000
Batch 6000/6090, Loss   27.5254, NLL-Loss   27.4743, KL-Loss    0.0511, KL-Weight  1.000
Batch 6090/6090, Loss   29.5697, NLL-Loss   29.5131, KL-Loss    0.0566, KL-Weight  1.000
Valid Batch 1000/2030, Loss   31.6462, NLL-Loss   31.3941, KL-Loss    0.2522, KL-Weight  1.000
Valid Batch 2000/2030, Loss   29.8575, NLL-Loss   29.7213, KL-Loss    0.1362, KL-Weight  1.000
Model saved at won\2018-Jun-22-08-59-57\E2.pytorch
Epoch 02/10, Valid Mean ELBO   31.0470


 30%|███████████████████████                                                      | 3/10 [1:16:59<2:59:39, 1539.87s/it]

Batch 1000/6090, Loss   25.8831, NLL-Loss   25.8117, KL-Loss    0.0714, KL-Weight  1.000
Batch 2000/6090, Loss   33.2400, NLL-Loss   32.8986, KL-Loss    0.3414, KL-Weight  1.000
Batch 3000/6090, Loss   30.1591, NLL-Loss   30.1129, KL-Loss    0.0462, KL-Weight  1.000
Batch 4000/6090, Loss   29.6131, NLL-Loss   29.5157, KL-Loss    0.0974, KL-Weight  1.000
Batch 5000/6090, Loss   32.6769, NLL-Loss   32.5440, KL-Loss    0.1330, KL-Weight  1.000
Batch 6000/6090, Loss   27.1206, NLL-Loss   27.0327, KL-Loss    0.0880, KL-Weight  1.000
Batch 6090/6090, Loss   29.4946, NLL-Loss   29.4172, KL-Loss    0.0775, KL-Weight  1.000
Valid Batch 1000/2030, Loss   32.1239, NLL-Loss   31.8099, KL-Loss    0.3140, KL-Weight  1.000
Valid Batch 2000/2030, Loss   29.6150, NLL-Loss   29.4274, KL-Loss    0.1876, KL-Weight  1.000
Model saved at won\2018-Jun-22-08-59-57\E3.pytorch
Epoch 03/10, Valid Mean ELBO   31.1100


 40%|██████████████████████████████▊                                              | 4/10 [1:42:43<2:34:05, 1540.97s/it]

Batch 1000/6090, Loss   25.8131, NLL-Loss   25.7308, KL-Loss    0.0824, KL-Weight  1.000
Batch 2000/6090, Loss   32.4201, NLL-Loss   32.1039, KL-Loss    0.3162, KL-Weight  1.000
Batch 3000/6090, Loss   28.4752, NLL-Loss   28.4266, KL-Loss    0.0487, KL-Weight  1.000
Batch 4000/6090, Loss   29.2516, NLL-Loss   29.1114, KL-Loss    0.1402, KL-Weight  1.000
Batch 5000/6090, Loss   32.2883, NLL-Loss   32.1074, KL-Loss    0.1809, KL-Weight  1.000
Batch 6000/6090, Loss   26.8596, NLL-Loss   26.7579, KL-Loss    0.1017, KL-Weight  1.000
Batch 6090/6090, Loss   29.4204, NLL-Loss   29.3280, KL-Loss    0.0924, KL-Weight  1.000
Valid Batch 1000/2030, Loss   32.2172, NLL-Loss   31.9556, KL-Loss    0.2616, KL-Weight  1.000
Valid Batch 2000/2030, Loss   28.9914, NLL-Loss   28.7954, KL-Loss    0.1960, KL-Weight  1.000
Model saved at won\2018-Jun-22-08-59-57\E4.pytorch
Epoch 04/10, Valid Mean ELBO   31.1400


 50%|██████████████████████████████████████▌                                      | 5/10 [2:08:30<2:08:30, 1542.10s/it]

Batch 1000/6090, Loss   25.7105, NLL-Loss   25.6395, KL-Loss    0.0710, KL-Weight  1.000
Batch 2000/6090, Loss   31.3473, NLL-Loss   30.9061, KL-Loss    0.4412, KL-Weight  1.000
Batch 3000/6090, Loss   27.7331, NLL-Loss   27.6804, KL-Loss    0.0527, KL-Weight  1.000
Batch 4000/6090, Loss   29.3257, NLL-Loss   29.1634, KL-Loss    0.1623, KL-Weight  1.000
Batch 5000/6090, Loss   31.5043, NLL-Loss   31.3431, KL-Loss    0.1612, KL-Weight  1.000
Batch 6000/6090, Loss   26.7720, NLL-Loss   26.6613, KL-Loss    0.1106, KL-Weight  1.000
Batch 6090/6090, Loss   29.3182, NLL-Loss   29.2131, KL-Loss    0.1051, KL-Weight  1.000
Valid Batch 1000/2030, Loss   32.6471, NLL-Loss   32.3611, KL-Loss    0.2860, KL-Weight  1.000
Valid Batch 2000/2030, Loss   28.8848, NLL-Loss   28.6939, KL-Loss    0.1909, KL-Weight  1.000
Model saved at won\2018-Jun-22-08-59-57\E5.pytorch
Epoch 05/10, Valid Mean ELBO   31.1695


 60%|██████████████████████████████████████████████▏                              | 6/10 [2:34:14<1:42:49, 1542.44s/it]

Batch 1000/6090, Loss   25.4232, NLL-Loss   25.3315, KL-Loss    0.0917, KL-Weight  1.000
Batch 2000/6090, Loss   31.0069, NLL-Loss   30.6073, KL-Loss    0.3996, KL-Weight  1.000
Batch 3000/6090, Loss   26.6676, NLL-Loss   26.6094, KL-Loss    0.0581, KL-Weight  1.000
Batch 4000/6090, Loss   29.6052, NLL-Loss   29.4148, KL-Loss    0.1904, KL-Weight  1.000
Batch 5000/6090, Loss   31.3052, NLL-Loss   31.0980, KL-Loss    0.2072, KL-Weight  1.000
Batch 6000/6090, Loss   26.4837, NLL-Loss   26.3557, KL-Loss    0.1279, KL-Weight  1.000
Batch 6090/6090, Loss   28.8045, NLL-Loss   28.6808, KL-Loss    0.1237, KL-Weight  1.000
Valid Batch 1000/2030, Loss   32.5082, NLL-Loss   32.1918, KL-Loss    0.3164, KL-Weight  1.000
Valid Batch 2000/2030, Loss   28.2941, NLL-Loss   28.0674, KL-Loss    0.2267, KL-Weight  1.000
Model saved at won\2018-Jun-22-08-59-57\E6.pytorch
Epoch 06/10, Valid Mean ELBO   31.1795


 70%|█████████████████████████████████████████████████████▉                       | 7/10 [3:00:02<1:17:09, 1543.21s/it]

Batch 1000/6090, Loss   25.5943, NLL-Loss   25.5004, KL-Loss    0.0938, KL-Weight  1.000
Batch 2000/6090, Loss   30.5819, NLL-Loss   30.1929, KL-Loss    0.3890, KL-Weight  1.000
Batch 3000/6090, Loss   26.2378, NLL-Loss   26.1759, KL-Loss    0.0619, KL-Weight  1.000
Batch 4000/6090, Loss   29.4084, NLL-Loss   29.2435, KL-Loss    0.1649, KL-Weight  1.000
Batch 5000/6090, Loss   30.5386, NLL-Loss   30.3519, KL-Loss    0.1867, KL-Weight  1.000
Batch 6000/6090, Loss   26.5869, NLL-Loss   26.4388, KL-Loss    0.1481, KL-Weight  1.000
Batch 6090/6090, Loss   28.9019, NLL-Loss   28.7832, KL-Loss    0.1187, KL-Weight  1.000
Valid Batch 1000/2030, Loss   32.6744, NLL-Loss   32.3857, KL-Loss    0.2886, KL-Weight  1.000
Valid Batch 2000/2030, Loss   28.1699, NLL-Loss   27.9264, KL-Loss    0.2436, KL-Weight  1.000
Model saved at won\2018-Jun-22-08-59-57\E7.pytorch
Epoch 07/10, Valid Mean ELBO   31.1888


 80%|███████████████████████████████████████████████████████████████▏               | 8/10 [3:26:24<51:36, 1548.09s/it]

Batch 1000/6090, Loss   25.3644, NLL-Loss   25.2616, KL-Loss    0.1028, KL-Weight  1.000
Batch 2000/6090, Loss   29.8520, NLL-Loss   29.3671, KL-Loss    0.4850, KL-Weight  1.000
Batch 3000/6090, Loss   25.9959, NLL-Loss   25.9196, KL-Loss    0.0763, KL-Weight  1.000
Batch 4000/6090, Loss   29.4884, NLL-Loss   29.3292, KL-Loss    0.1591, KL-Weight  1.000
Batch 5000/6090, Loss   30.7202, NLL-Loss   30.4387, KL-Loss    0.2815, KL-Weight  1.000
Batch 6000/6090, Loss   26.4294, NLL-Loss   26.2780, KL-Loss    0.1514, KL-Weight  1.000
Batch 6090/6090, Loss   28.8355, NLL-Loss   28.6927, KL-Loss    0.1428, KL-Weight  1.000
Valid Batch 1000/2030, Loss   32.8058, NLL-Loss   32.5136, KL-Loss    0.2922, KL-Weight  1.000
Valid Batch 2000/2030, Loss   28.4514, NLL-Loss   28.1924, KL-Loss    0.2590, KL-Weight  1.000
Model saved at won\2018-Jun-22-08-59-57\E8.pytorch
Epoch 08/10, Valid Mean ELBO   31.1996


 90%|███████████████████████████████████████████████████████████████████████        | 9/10 [3:52:51<25:52, 1552.35s/it]

Batch 1000/6090, Loss   25.5517, NLL-Loss   25.4366, KL-Loss    0.1151, KL-Weight  1.000
Batch 2000/6090, Loss   29.8253, NLL-Loss   29.4247, KL-Loss    0.4007, KL-Weight  1.000
Batch 3000/6090, Loss   25.7330, NLL-Loss   25.6541, KL-Loss    0.0789, KL-Weight  1.000
Batch 4000/6090, Loss   29.1125, NLL-Loss   28.9454, KL-Loss    0.1671, KL-Weight  1.000
Batch 5000/6090, Loss   30.1598, NLL-Loss   29.8506, KL-Loss    0.3092, KL-Weight  1.000
Batch 6000/6090, Loss   26.2937, NLL-Loss   26.1478, KL-Loss    0.1459, KL-Weight  1.000
Batch 6090/6090, Loss   28.4183, NLL-Loss   28.2854, KL-Loss    0.1329, KL-Weight  1.000
Valid Batch 1000/2030, Loss   33.2028, NLL-Loss   32.9127, KL-Loss    0.2901, KL-Weight  1.000
Valid Batch 2000/2030, Loss   28.1908, NLL-Loss   27.9192, KL-Loss    0.2715, KL-Weight  1.000
Model saved at won\2018-Jun-22-08-59-57\E9.pytorch
Epoch 09/10, Valid Mean ELBO   31.2087


100%|██████████████████████████████████████████████████████████████████████████████| 10/10 [4:19:05<00:00, 1554.53s/it]


In [None]:
save_model_path

In [None]:
checkpoint_path = 'won\\2018-Jun-20-02-18-18\\E9.pytorch'

In [38]:
try:
    rvae = torch.load(checkpoint_path)
    print("\n--------model restored--------\n")
except:
    print("\n--------model not restored--------\n")
    pass
rvae.cuda()
rvae.eval()


--------model restored--------



RVAE(
  (embedding): Embedding(60923, 300)
  (encoder): Encoder(
    (encoder): GRU(300, 256, num_layers=2, batch_first=True)
    (hidden2mean): Linear(in_features=512, out_features=128, bias=True)
    (hidden2logv): Linear(in_features=512, out_features=128, bias=True)
  )
  (decoder): Decoder(
    (latent2hidden): Linear(in_features=128, out_features=512, bias=True)
    (embedding): Embedding(60923, 300)
    (word_dropout): Dropout(p=0.5)
    (decoder): GRU(300, 256, num_layers=2, batch_first=True)
    (decoder2outputs): Linear(in_features=256, out_features=256, bias=True)
    (outputs2vocab): Linear(in_features=256, out_features=60923, bias=True)
  )
)

In [39]:
def inference(batch, z):
    
    hidden = rvae.decoder.latent2hidden(z)
    hidden = hidden.view(rvae.decoder.hidden_factor, batch_size, rvae.hidden_size)
    
    t = 0
    generations=torch.cuda.FloatTensor(batch_size, max_sequence_length).fill_(pad_idx).long()
    
    while(t<max_sequence_length ):
        if t == 0:
            input_sequence = Variable(torch.Tensor(batch_size).fill_(sos_idx).long()).cuda()
        input_sequence = input_sequence.unsqueeze(1)
        input_embedding = rvae.embedding(input_sequence)
        output, hidden = rvae.decoder.decoder(input_embedding, hidden)
        logits = rvae.decoder.outputs2vocab(output)
        samples = torch.topk(logits,1,dim=-1)[1].squeeze()
        input_sequence = samples.squeeze()
        generations[:,t] = input_sequence.data
        if ((torch.sum(input_sequence)/input_sequence.size(0)) == eos_idx).cpu().data.numpy():
            break
        t+=1
    return generations

def print_inference(generations):


    w2i, i2w = word2index, index2word

    samples = generations.cpu().numpy()

    sent_str = [str()]*len(samples)

    for i, sent in enumerate(samples):
        for word_id in sent:
            if word_id == w2i['_PAD_']: 
                break
            elif word_id == eos_idx:
                break
            sent_str[i] += i2w[word_id] + " "
        sent_str[i] = sent_str[i].strip()
    return sent_str

def print_input(generations):


    w2i, i2w = word2index, index2word

    samples = generations.cpu().data.numpy()

    sent_str = [str()]*len(samples)

    for i, sent in enumerate(samples):
        for word_id in sent:
            if word_id == pad_idx: 
                break
            elif word_id == eos_idx:
                break
            sent_str[i] += i2w[word_id] + " "
        sent_str[i] = sent_str[i].strip()
    return sent_str

In [40]:
inputs_len = []
for sentence in input_index[:15]:
    inputs_len.append(len(sentence) - sentence.count(0))

In [41]:
x_ = Variable(torch.cuda.LongTensor(input_index[:15]))
y_ = Variable(torch.cuda.LongTensor(target_index[0]))
batch_size = x_.size(0)
length = torch.cuda.LongTensor(inputs_len)

logp, mean, logv, z, outputs=rvae(x_,length)

In [42]:
target_print = []
for i in target_index[:15]:
    temp = []
    for j in i :
        if j == 0:
            break
        else :
            temp.append(index2word[j])
    target_print.append(temp)

In [43]:
generations=inference(batch_size, z)
sent_str = print_inference(generations)
input_str = print_input(x_)
for i, j, k in zip(sent_str, input_str, target_print):
    print('input: '+j)
    print("inference: "+i)
    print('target: ' + ' '.join(k))

input: _STA_ i have bought several of the vitality canned dog food products and have found them all to be of good quality . the product looks more like a stew than a processed meat and it smells better . my labrador is finicky and she appreciates this product better than most .
inference: 
target: good quality dog food _EOS_
input: _STA_ product arrived labeled as jumbo salted peanuts ... the peanuts were actually small sized unsalted . not sure if this was an error or if the vendor intended to represent the product as `` jumbo '' .
inference: 
target: not as advertised _EOS_
input: _STA_ this is a confection that has been around a few centuries . it is a light , pillowy citrus gelatin with nuts - in this case filberts . and it is cut into tiny squares and then liberally coated with powdered sugar . and it is a tiny mouthful of heaven . not too chewy , and very flavorful . i highly recommend this yummy treat . if you are familiar with the story of c.s . lewis ' `` the lion , the witch 

In [None]:
batch_size = 10

z = Variable(torch.randn([batch_size, latent_size])).cuda()

generations=inference(batch_size, z)

sent_str = print_inference(generations)

for i in sent_str:
    print(i)

In [None]:
steps = 4

z1 = torch.randn([latent_size]).numpy()
z2 = torch.randn([latent_size]).numpy()

interpolation = np.zeros((z1.shape[0], steps + 2))

for dim, (s,e) in enumerate(zip(z1,z2)):
    interpolation[dim] = np.linspace(s,e,steps+2)
    
z = Variable(torch.from_numpy(interpolation.T).float()).cuda()

In [None]:
batch_size = z.size(0)

generations=inference(batch_size, z)

sent_str = print_inference(generations)

for i in sent_str:
    print(i)

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(rvae.embedding(x_)[8][:10].cpu().data.numpy())
fig.colorbar(cax)
plt.savefig('books_read.png')
plt.show()

In [None]:
torch.cuda.empty_cache()