In [240]:
import os
import json
import time
import torch
import argparse
import numpy as np
from multiprocessing import cpu_count
from torch.utils.data import DataLoader
from collections import OrderedDict, defaultdict
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.utils.rnn as rnn_utils

from ptb import PTB
from utils import to_var, idx2word, expierment_name
from model import SentenceVAE

In [241]:
ts = time.strftime('%Y-%b-%d-%H-%M-%S', time.gmtime())

In [242]:
save_model_path = os.path.join('won', ts)
os.makedirs('.\\'+save_model_path)

In [243]:
datasets = OrderedDict()

In [244]:
splits = ['train', 'valid'] + (['test'] if False else [])

In [245]:
splits

['train', 'valid']

In [246]:
for split in splits:
    datasets[split] = PTB(
        data_dir='data',
        split=split,
        create_data=False,
        max_sequence_length=60,
        min_occ=1
    )

In [247]:
vocab_size = datasets['train'].vocab_size

In [248]:
sos_idx=datasets['train'].sos_idx

In [249]:
eos_idx=datasets['train'].eos_idx

In [250]:
pad_idx=datasets['train'].pad_idx

In [308]:
max_sequence_length = 60
embedding_size = 300
hidden_size = 256
word_dropout = 0.5
latent_size = 16
num_layers = 1
bidirectional = False
batch_size_fit = 32
rnn_type = 'gru'
learning_rate = 0.001
k = 0.0005
x0 = 3500

In [309]:
class RVAE(nn.Module):
    def __init__(self,vocab_size, embedding_size, max_sequence_length, hidden_size, word_dropout, latent_size,
                sos_idx, eos_idx, pad_idx, rnn_type='rnn' , num_layers=1, bidirectional=True):
        
        super().__init__()
        
        self.max_sequence_length = max_sequence_length
        self.sos_idx = sos_idx
        self.eos_idx = eos_idx
        self.pad_idx = pad_idx
        
        self.latent_size = latent_size
        self.rnn_type = rnn_type
        self.bidirectional = bidirectional
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        
        
        #self.outputs2vocab = nn.Linear(hidden_size * (2 if bidirectional else 1), vocab_size)
        self.encoder = Encoder(vocab_size = vocab_size,embedding_size = embedding_size, hidden_size = hidden_size, num_layers = num_layers, bidirectional = bidirectional,latent_size = latent_size,rnn_type = rnn_type).cuda()
        self.decoder = Decoder(vocab_size = vocab_size,embedding_size = embedding_size, hidden_size = hidden_size, num_layers = num_layers, bidirectional = bidirectional,latent_size = latent_size,rnn_type = rnn_type, word_dropout=word_dropout).cuda()
    
    def forward(self,x,length):
        
        mu,logvar,reparam = self.encoder(Variable(x),length)
        logp,outputs  = self.decoder(Variable(x),reparam)
        
        
        return logp, mu, logvar, reparam, outputs

In [310]:
class Encoder(nn.Module):
    def __init__(self,vocab_size,embedding_size, hidden_size, latent_size, bidirectional=True, num_layers = 1,rnn_type='rnn'):
        super(Encoder,self).__init__()
        
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.latent_size = latent_size
        self.rnn_type = rnn_type
        self.bidirectional = bidirectional
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        
        if self.rnn_type == 'rnn':
            rnn = nn.RNN
        elif self.rnn_type == 'gru':
            rnn = nn.GRU
        elif self.rnn_type =='lstm':
            rnn = nn.LSTM
        else:
            raise ValueError()
            
        self.embedding = nn.Embedding(vocab_size,embedding_size)
        self.encoder = rnn(self.embedding_size, self.hidden_size, num_layers = self.num_layers, bidirectional = self.bidirectional, batch_first = True)
        
        self.hidden_factor = (2 if self.bidirectional else 1) * self.num_layers
        
        self.hidden2mean = nn.Linear(self.hidden_size* self.hidden_factor, self.latent_size)
        self.hidden2logv = nn.Linear(self.hidden_size* self.hidden_factor, self.latent_size)
                        
    
    def reparametrize(self, mu, logvar):
        std = logvar.mul(0.5).exp_()
        
        eps = torch.FloatTensor(std.size()).normal_()
        #print(eps)
        eps = Variable(eps).cuda()
        
        return eps.mul(std).add_(mu)
    
    def forward(self,x,length):
        batch_size = x.size(0)
        sorted_lengths, sorted_idx = torch.sort(length, descending=True)
        input_sequence = x[sorted_idx.cuda()]
        #print(input_sequence)
        _,hidden = self.encoder(self.embedding(input_sequence))
        if self.bidirectional or self.num_layers > 1:
            # flatten hidden state
            hidden = hidden.view(batch_size, self.hidden_size*self.hidden_factor)
        else:
            hidden = hidden.squeeze()

        mu = self.hidden2mean(hidden)
        
        logvar = self.hidden2logv(hidden)
        std = torch.exp(0.5 * logvar)
        #reparam = self.reparametrize(mu,logvar)
        z = Variable(torch.randn([batch_size, self.latent_size])).cuda()
        z = z * std + mu
        
        
        return mu,logvar,z#,reparam
        


In [311]:
class Decoder(nn.Module):
    def __init__(self,vocab_size,embedding_size, hidden_size, latent_size, bidirectional=True, num_layers = 1,rnn_type='rnn',word_dropout = 0.5):
        super(Decoder,self).__init__()
        
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.latent_size = latent_size
        self.rnn_type = rnn_type
        self.bidirectional = bidirectional
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        
        if self.rnn_type == 'rnn':
            rnn = nn.RNN
        elif self.rnn_type == 'gru':
            rnn = nn.GRU
        elif self.rnn_type =='lstm':
            rnn = nn.LSTM
        else:
            raise ValueError()
            
        self.hidden_factor = (2 if self.bidirectional else 1) * self.num_layers            
        self.latent2hidden = nn.Linear(latent_size, hidden_size * self.hidden_factor)
        
        self.embedding = nn.Embedding(vocab_size,embedding_size)
        self.word_dropout = nn.Dropout(p=word_dropout)
        
        self.decoder = rnn(embedding_size, hidden_size, num_layers=num_layers, bidirectional=self.bidirectional, batch_first=True)
        self.outputs2vocab = nn.Linear(hidden_size * (2 if bidirectional else 1), vocab_size)
        
    def forward(self,x,z):
        
        batch_size = x.size(0)
        sorted_lengths, sorted_idx = torch.sort(length, descending=True)
        input_sequence = x[sorted_idx.cuda()]
        
        hidden = self.latent2hidden(z)
        if self.bidirectional or self.num_layers > 1:
            # unflatten hidden state
            hidden = hidden.view(self.hidden_factor, batch_size, self.hidden_size)
        else:
            hidden = hidden.unsqueeze(0)
            
        outputs,_ = self.decoder(self.embedding(input_sequence),hidden)
        
        logp =nn.functional.log_softmax(self.outputs2vocab(outputs))
        
        return logp,outputs
        


In [312]:
class RVAE_ETOE(nn.Module):
    def __init__(self,vocab_size, embedding_size, max_sequence_length, hidden_size, word_dropout, latent_size, sos_idx, eos_idx, pad_idx, rnn_type='rnn' , num_layers=1, bidirectional=False):
        super().__init__()
        self.max_sequence_length = max_sequence_length
        self.sos_idx = sos_idx
        self.eos_idx = eos_idx
        self.pad_idx = pad_idx

        self.latent_size = latent_size

        self.rnn_type = rnn_type
        self.bidirectional = bidirectional
        self.num_layers = num_layers
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(vocab_size, embedding_size)
        self.word_dropout = nn.Dropout(p=word_dropout)

        if rnn_type == 'rnn':
            rnn = nn.RNN
        elif rnn_type == 'gru':
            rnn = nn.GRU
        # elif rnn_type == 'lstm':
        #     rnn = nn.LSTM
        else:
            raise ValueError()
            
        self.encoder_rnn = rnn(embedding_size, hidden_size, num_layers=num_layers, bidirectional=self.bidirectional, batch_first=True)
        self.decoder_rnn = rnn(embedding_size, hidden_size, num_layers=num_layers, bidirectional=self.bidirectional, batch_first=True)
        
        self.hidden_factor = (2 if bidirectional else 1) * num_layers

        self.hidden2mean = nn.Linear(hidden_size * self.hidden_factor, latent_size)
        self.hidden2logv = nn.Linear(hidden_size * self.hidden_factor, latent_size)
        self.latent2hidden = nn.Linear(latent_size, hidden_size * self.hidden_factor)
        self.outputs2vocab = nn.Linear(hidden_size * (2 if bidirectional else 1), vocab_size)
        
        
    def forward(self, input_sequence, length):
        input_sequence = Variable(input_sequence).cuda()
        batch_size = input_sequence.size(0)
        sorted_lengths, sorted_idx = torch.sort(length, descending=True)
        input_sequence = input_sequence[sorted_idx.cuda()]
        input_embedding = self.embedding(input_sequence)
        
        packed_input = rnn_utils.pack_padded_sequence(input_embedding, sorted_lengths.tolist(), batch_first=True)
        _,hidden = self.encoder_rnn(packed_input)
        #_,hidden = self.encoder_rnn(input_sequence)

        if self.bidirectional or self.num_layers > 1:
            # flatten hidden state
            hidden = hidden.view(batch_size, self.hidden_size*self.hidden_factor)
        else:
            hidden = hidden.squeeze()

        mu = self.hidden2mean(hidden)

        logvar = self.hidden2logv(hidden)
        std = torch.exp(0.5 * logvar)
        #reparam = self.reparametrize(mu,logvar)
        z = Variable(torch.randn([batch_size, self.latent_size])).cuda()
        z = z * std + mu

        hidden = self.latent2hidden(z)
        if self.bidirectional or self.num_layers > 1:
            # unflatten hidden state
            hidden = hidden.view(self.hidden_factor, batch_size, self.hidden_size)
        else:
            hidden = hidden.unsqueeze(0)
            
        input_embedding = self.word_dropout(input_embedding)
        packed_input = rnn_utils.pack_padded_sequence(input_embedding, sorted_lengths.tolist(), batch_first=True)
        outputs,_ = self.decoder_rnn(packed_input,hidden)
        
        padded_outputs = rnn_utils.pad_packed_sequence(outputs, batch_first=True)[0]
        padded_outputs = padded_outputs.contiguous()
        _,reversed_idx = torch.sort(sorted_idx)
        padded_outputs = padded_outputs[reversed_idx.cuda()]
        b,s,_ = padded_outputs.size()
        


        logp =nn.functional.log_softmax(self.outputs2vocab(padded_outputs.view(-1, padded_outputs.size(2))), dim=-1)
        logp = logp.view(b, s, self.embedding.num_embeddings)

        return logp, mu, logvar, z, padded_outputs.view(-1, padded_outputs.size(2))

    def inference(self, n=4, z=None):

        if z is None:
            batch_size = n
            z = to_var(torch.randn([batch_size, self.latent_size]))
            #print(z)
        else:
            batch_size = z.size(0)

        hidden = self.latent2hidden(z)

        if self.bidirectional or self.num_layers > 1:
            # unflatten hidden state
            hidden = hidden.view(self.hidden_factor, batch_size, self.hidden_size)

        hidden = hidden.unsqueeze(0)

        # required for dynamic stopping of sentence generation
        sequence_idx = torch.arange(0, batch_size, out=torch.cuda.FloatTensor()).long() # all idx of batch
        sequence_running = torch.arange(0, batch_size, out=torch.cuda.FloatTensor ()).long() # all idx of batch which are still generating
        sequence_mask = torch.ones(batch_size, out=torch.cuda.FloatTensor ()).byte()

        running_seqs = torch.arange(0, batch_size, out=torch.cuda.FloatTensor ()).long() # idx of still generating sequences with respect to current loop

        generations = torch.cuda.FloatTensor(batch_size, self.max_sequence_length).fill_(self.pad_idx).long()

        t=0
        while(t<self.max_sequence_length and len(running_seqs)>0):

            if t == 0:
                input_sequence = to_var(torch.Tensor(batch_size).fill_(self.sos_idx).long())

            input_sequence = input_sequence.unsqueeze(1)

            input_embedding = self.embedding(input_sequence)

            output, hidden = self.decoder_rnn(input_embedding, hidden)

            logits = self.outputs2vocab(output)

            input_sequence = self._sample(logits)

            # save next input
            generations = self._save_sample(generations, input_sequence, sequence_running, t)

            # update gloabl running sequence
            sequence_mask[sequence_running] = (input_sequence != self.eos_idx).data
            sequence_running = sequence_idx.masked_select(sequence_mask)

            # update local running sequences
            running_mask = (input_sequence != self.eos_idx).data
            running_seqs = running_seqs.masked_select(running_mask)

            # prune input and hidden state according to local update
            if len(running_seqs) > 0:
                input_sequence = input_sequence[running_seqs]
                hidden = hidden[:, running_seqs]

                running_seqs = torch.arange(0, len(running_seqs), out=torch.cuda.FloatTensor ()).long()

            t += 1
        print(generations)
        return generations, z
      
            
    def _sample(self, dist, mode='greedy'):

        if mode == 'greedy':
            _, sample = torch.topk(dist, 1, dim=-1)
        sample = sample.squeeze()

        return sample

    def _save_sample(self, save_to, sample, running_seqs, t):
        # select only still running
        running_latest = save_to[running_seqs]
        # update token at position t
        running_latest[:,t] = sample.data
        # save back
        save_to[running_seqs] = running_latest

        return save_to        
        

In [313]:
rvae=RVAE(vocab_size, embedding_size, max_sequence_length, hidden_size, word_dropout, latent_size,sos_idx, eos_idx, pad_idx , num_layers=num_layers ,rnn_type='gru')

In [314]:
rvae=RVAE_ETOE(vocab_size, embedding_size, max_sequence_length, hidden_size, word_dropout, latent_size,sos_idx, eos_idx, pad_idx , num_layers=num_layers ,rnn_type='gru').cuda()

In [315]:
def kl_anneal_function(anneal_function, step, k, x0):
    if anneal_function == 'logistic':
        return float(1/(1+np.exp(-k*(step-x0))))
    elif anneal_function == 'linear':
        return min(1, step/x0)



In [316]:
NLL = torch.nn.NLLLoss(size_average=False, ignore_index=datasets['train'].pad_idx)

In [317]:
cross_entropy = torch.nn.CrossEntropyLoss(size_average=False, ignore_index=datasets['train'].pad_idx)

In [318]:
def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0):

    # cut-off unnecessary padding from target, and flatten
    target = target[:, :torch.max(batch['length'])].contiguous().view(-1)
    logp = logp.view(-1, logp.size(2))

    # Negative Log Likelihood
    NLL_loss = cross_entropy(logp, target)

    # KL Divergence
    KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
    KL_weight = kl_anneal_function(anneal_function, step, k, x0)

    return NLL_loss, KL_loss, KL_weight

In [319]:
def loss_fn(logp, target, length, mean, logv, anneal_function, step, k, x0):

    # cut-off unnecessary padding from target, and flatten
    target = target[:, :torch.max(batch['length'])].contiguous().view(-1)
    logp = logp.view(-1, logp.size(2))

    # Negative Log Likelihood
    NLL_loss = NLL(logp, target)

    # KL Divergence
    KL_loss = -0.5 * torch.sum(1 + logv - mean.pow(2) - logv.exp())
    KL_weight = kl_anneal_function(anneal_function, step, k, x0)

    return NLL_loss, KL_loss, KL_weight

In [320]:
optimizer = torch.optim.Adam(rvae.parameters(), lr=learning_rate)

In [321]:
rvae

RVAE_ETOE(
  (embedding): Embedding(9877, 300)
  (word_dropout): Dropout(p=0.5)
  (encoder_rnn): GRU(300, 256, batch_first=True)
  (decoder_rnn): GRU(300, 256, batch_first=True)
  (hidden2mean): Linear(in_features=256, out_features=16, bias=True)
  (hidden2logv): Linear(in_features=256, out_features=16, bias=True)
  (latent2hidden): Linear(in_features=16, out_features=256, bias=True)
  (outputs2vocab): Linear(in_features=256, out_features=9877, bias=True)
)

In [322]:
epochs = 10

In [None]:
step = 0
avg_losses = []
losses = []
NLL_losses = []
KL_losses = []
KL_weights = []

for epoch in range(epochs):
    for split in splits:
        data_loader = DataLoader(
                    dataset=datasets[split],
                    batch_size=batch_size_fit,
                    shuffle=split=='train',
                    num_workers=cpu_count(),
                    pin_memory=torch.cuda.is_available()
                )

        if split == 'train':
            rvae.train()
        else:
            print('********************valid**************')
            rvae.eval()
            
        for iteration, batch in enumerate(data_loader):
            batch_size = batch['input'].size(0)

            x = batch['input'].type(torch.cuda.LongTensor)
            length = batch['length']
            
            logp, mean, logv, z, outputs=rvae(x,length)
            
            NLL_loss, KL_loss, KL_weight = loss_fn(logp, Variable(batch['target'], volatile=False).type(torch.cuda.LongTensor),batch['length'], mean, logv, 'logistic', step, k, x0)
            
            loss = (NLL_loss + KL_loss*KL_weight)/batch_size#(NLL_loss/batch_size)
            
            
            losses.append(float(loss.cpu().data))
            NLL_losses.append(NLL_loss.data[0]/batch_size)
            KL_losses.append(KL_loss.data[0]/batch_size)
            KL_weights.append(KL_weight)
            
            if split == 'train':
                if iteration % 50 == 0:
                    print('**************backpropagation**********')
                
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                step += 1

            if iteration % 100 == 0 or iteration+1 == len(data_loader):
                print("%s Batch %04d/%i, Loss %9.4f, NLL-Loss %9.4f, KL-Loss %9.4f, KL-Weight %6.3f"
            %(split.upper(), iteration, len(data_loader)-1, loss.data[0], NLL_loss.data[0]/batch_size, KL_loss.data[0]/batch_size, KL_weight))
                np.savez(L=losses,file='loss.npz')
                np.savez(L=NLL_losses,file='NLL_losses.npz')
                np.savez(L=KL_losses,file='KL_losses.npz') 
                np.savez(L=KL_weights,file='KL_weights.npz')
                
         # save checkpoint
        if split == 'train':
            checkpoint_path = os.path.join(save_model_path, "E%i.pytorch"%(epoch))
            torch.save(rvae, checkpoint_path)
            print("Model saved at %s"%checkpoint_path)
        print("%s Epoch %02d/%i, Mean ELBO %9.4f"%(split.upper(), epoch, epochs, np.mean(np.array(losses))))
        avg_losses.append(np.mean(np.array(losses)))
        np.savez(L=avg_losses,file='avg_losses.npz')

**************backpropagation**********
TRAIN Batch 0000/1314, Loss  203.7806, NLL-Loss  203.7206, KL-Loss    0.4053, KL-Weight  0.148
**************backpropagation**********
**************backpropagation**********
TRAIN Batch 0100/1314, Loss  160.0858, NLL-Loss  159.9843, KL-Loss    0.6571, KL-Weight  0.154
**************backpropagation**********


In [305]:
samples, z = rvae.inference(n=10)



Columns 0 to 12 
   87  1140    10   168    77    57   451    43   326    13  1188    21   394
    5     5     5     3     0     0     0     0     0     0     0     0     0
   10     5     5     5     5     5     5     5     5    43     5     5     3
    5     5    21    10   149   544    21    10   149   544    21     5  3355
   10     5     5     5     5     5     5     5     5     5     3     0     0
   18    17  1729    77    10   168    33    34   291    21    10   168    98
   10     5    21    10     5     5     5    19    13     5    43     5    10
   10     5     5     5     5     5     5     5     5     5   209  5975    43
  514   796   210    17  5933  1600    21   394     5   448    21   486     3
    5     5     5     5     5     5     5     5     5     5     5     5     5

Columns 13 to 25 
    5   448   447     5   907    13   446     3     0     0     0     0     0
    0     0     0     0     0     0     0     0     0     0     0     0     0
    0     0     0     0   

In [306]:
from utils import to_var, idx2word, interpolate

with open('data'+'/ptb.vocab.json', 'r') as file:
    vocab = json.load(file)

w2i, i2w = vocab['w2i'], vocab['i2w']
sent_str = [str()]*len(samples)

for i, sent in enumerate(samples):
    for word_id in sent:
        if word_id == w2i['<pad>']: 
            break
        sent_str[i] += i2w[str(word_id)] + " "
    sent_str[i] = sent_str[i]

In [307]:
for i in sent_str:
    print(i)

in addition the company said it expects to report a loss of $ n million or n cents a share <eos> 
n n n <eos> 
the n n n n n n n n to n n <eos> 
n n of the national association of the national association of n calif . a . t . maker of san francisco <eos> 
the n n n n n n n n n <eos> 
mr . smith said the company was named president of the company ' s decision to the company ' s decision <eos> 
the n of the n n n is a n to n the n of the n of the n of the n of the n of the n of the n of the n of the n of the n of the n of the n of the n of the n of the n <eos> 
the n n n n n n n n n & drew to the san francisco bay area <eos> 
general electric co . shelf offering of $ n million of debt <eos> 
n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n n 


In [154]:
z1 = torch.randn([latent_size]).numpy()
z2 = torch.randn([latent_size]).numpy()
z = to_var(torch.from_numpy(interpolate(start=z1, end=z2, steps=8)).float())

In [155]:
samples, _ = rvae.inference(z=z)

In [156]:
from utils import to_var, idx2word, interpolate

with open('data'+'/ptb.vocab.json', 'r') as file:
    vocab = json.load(file)

w2i, i2w = vocab['w2i'], vocab['i2w']
sent_str = [str()]*len(samples)

for i, sent in enumerate(samples):
    for word_id in sent:
        if word_id == w2i['<pad>']: 
            break
        sent_str[i] += i2w[str(word_id)] + " "
    sent_str[i] = sent_str[i].strip()

In [157]:
print(sent_str)

['the n n n n n n n n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to', 'the n n n n n n n n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to', 'the n n n n n n n n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to', 'the n n n n n n n n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to', 'the n n n n n n n n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to', 'the n n n n n n n n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to', 'the n n n n n n n n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to n n n to', 'the n n n n n n n n n n to n n n

In [186]:
try:
    rvae = torch.load(checkpoint_path)
    print("\n--------model restored--------\n")
except:
    print("\n--------model not restored--------\n")
    pass


--------model restored--------



In [None]:
rvae.cuda()
rvae.eval()

In [None]:
def _save_sample( save_to, sample, running_seqs, t):
    # select only still running
    running_latest = save_to[running_seqs]
    #print(running_latest)
    # update token at position t
    running_latest[:,t] = sample.data
    #print(running_latest[:,t])
    # save back
    save_to[running_seqs] = running_latest

    return save_to

In [None]:
batch_size = 1

In [None]:
generations=torch.cuda.FloatTensor(batch_size, max_sequence_length).fill_(pad_idx).long()

In [None]:
z = Variable(torch.randn([batch_size, latent_size])).cuda()

In [None]:
z = Variable(torch.cuda.FloatTensor(z)).cuda()

In [None]:
z

In [None]:
hidden = rvae.latent2hidden(z)

In [None]:
hidden = hidden.view(rvae.hidden_factor, batch_size, rvae.hidden_size)

In [None]:
t = 0

while(t<max_sequence_length ):
    if t == 0:
        input_sequence = Variable(torch.Tensor(batch_size).fill_(sos_idx).long()).cuda()
    input_sequence = input_sequence.unsqueeze(1)
    input_embedding = rvae.embedding(input_sequence)
    output, hidden = rvae.decoder_rnn(input_embedding, hidden)
    logits = rvae.outputs2vocab(output)
    samples = torch.topk(logits,1,dim=-1)[1].squeeze()
    input_sequence = samples.squeeze()
    generations[:,t] = input_sequence.data
    #if (input_sequence == eos_idx).cpu().data.numpy():
    #    break
    t+=1

In [None]:
generations

In [None]:
from utils import to_var, idx2word, interpolate

with open('data'+'/ptb.vocab.json', 'r') as file:
    vocab = json.load(file)

w2i, i2w = vocab['w2i'], vocab['i2w']

In [40]:
samples = generations.cpu().numpy()

sent_str = [str()]*len(samples)

for i, sent in enumerate(samples):
    for word_id in sent:
        if word_id == w2i['<pad>']: 
            break
        sent_str[i] += i2w[str(word_id)] + " "
    sent_str[i] = sent_str[i].strip()

NameError: name 'generations' is not defined

In [None]:
print(sent_str)

In [None]:
(1189*841)/(17.5*8.9)

In [None]:
8000000/1.4

In [None]:
1189*841

In [None]:
999949/0.1152

In [None]:
0.2499936249187166772620562253231*0.2499936249187166772620562253231

In [None]:
0.2499936249187166772620562253231*2

In [None]:
0.2499936249187166772620562253231*0.49998724983743337

In [None]:
x = 0.2499936

In [None]:
(1189*841)/(x*2*x)