In [None]:
import json
import gensim
import os
import json
import tqdm
import time
import numpy as np
import nltk
import random

%matplotlib  inline
import matplotlib.pyplot as plt

from rouge import Rouge 
rouge = Rouge()

In [None]:
% time
with open('./data_pointer_example.txt', 'r', encoding='UTF-8') as f:
    stories = json.load(f)

In [None]:
model_embedding = np.load('Word2vec_pointer.npz')['E']

In [None]:
ts = time.strftime('%Y-%b-%d-%H-%M-%S', time.gmtime())

save_model_path = os.path.join('won', ts)
os.makedirs('./'+save_model_path)

In [None]:
class Vocab(object):

  def __init__(self, vocab_file, max_size):
    self._word_to_id = {}
    self._id_to_word = {}
    self._count = 0 # keeps track of total number of words in the Vocab

    # [UNK], [PAD], [START] and [STOP] get the ids 0,1,2,3.
    for w in [UNKNOWN_TOKEN, PAD_TOKEN, START_DECODING, STOP_DECODING]:
      self._word_to_id[w] = self._count
      self._id_to_word[self._count] = w
      self._count += 1

    # Read the vocab file and add words up to max_size
    with open(vocab_file, 'r', encoding='utf-8') as vocab_f:
      for line in vocab_f:
        pieces = line.split()
        if len(pieces) != 2:
          print ('Warning: incorrectly formatted line in vocabulary file: %s\n' % line)
          continue
        w = pieces[0]
        if w in [SENTENCE_START, SENTENCE_END, UNKNOWN_TOKEN, PAD_TOKEN, START_DECODING, STOP_DECODING]:
          raise Exception('<s>, </s>, [UNK], [PAD], [START] and [STOP] shouldn\'t be in the vocab file, but %s is' % w)
        if w in self._word_to_id:
          raise Exception('Duplicated word in vocabulary file: %s' % w)
        self._word_to_id[w] = self._count
        self._id_to_word[self._count] = w
        self._count += 1
        if max_size != 0 and self._count >= max_size:
          print ("max_size of vocab was specified as %i; we now have %i words. Stopping reading." % (max_size, self._count))
          break

    print ("Finished constructing vocabulary of %i total words. Last word added: %s" % (self._count, self._id_to_word[self._count-1]))

  def word2id(self, word):
    if word not in self._word_to_id:
      return self._word_to_id[UNKNOWN_TOKEN]
    return self._word_to_id[word]

  def id2word(self, word_id):
    if word_id not in self._id_to_word:
      raise ValueError('Id not found in vocab: %d' % word_id)
    return self._id_to_word[word_id]

  def size(self):
    return self._count

  def write_metadata(self, fpath):
    print ("Writing word embedding metadata file to %s..." % (fpath))
    with open(fpath, "w") as f:
      fieldnames = ['word']
      writer = csv.DictWriter(f, delimiter="\t", fieldnames=fieldnames)
      for i in xrange(self.size()):
        writer.writerow({"word": self._id_to_word[i]})

In [None]:
# <s> and </s> are used in the data files to segment the abstracts into sentences. They don't receive vocab ids.
SENTENCE_START = '<s>'
SENTENCE_END = '</s>'

PAD_TOKEN = '[PAD]' # This has a vocab id, which is used to pad the encoder input, decoder input and target sequence
UNKNOWN_TOKEN = '[UNK]' # This has a vocab id, which is used to represent out-of-vocabulary words
START_DECODING = '[START]' # This has a vocab id, which is used at the start of every decoder input sequence
STOP_DECODING = '[STOP]' # This has a vocab id, which is used at the end of untruncated target sequences

# Note: none of <s>, </s>, [PAD], [UNK], [START], [STOP] should appear in the vocab file.

In [None]:
vocab = Vocab('./vocab', 50000)

In [None]:
class Example(object):

  def __init__(self, article, abstract_sentences, vocab):
    # Get ids of special tokens
    start_decoding = vocab.word2id(START_DECODING)
    stop_decoding = vocab.word2id(STOP_DECODING)

    # Process the article
    article_words = article.split()
    if len(article_words) > 200:
      article_words = article_words[:200]
    self.enc_len = len(article_words) # store the length after truncation but before padding
    self.enc_input = [vocab.word2id(w) for w in article_words] # list of word ids; OOVs are represented by the id for UNK token

    # Process the abstract
    abstract = ' '.join(abstract_sentences) # string
    abstract_words = abstract.split() # list of strings
    abs_ids = [vocab.word2id(w) for w in abstract_words] # list of word ids; OOVs are represented by the id for UNK token

    # Get the decoder input sequence and target sequence
    self.dec_input, self.target = self.get_dec_inp_targ_seqs(abs_ids, 20, start_decoding, stop_decoding)
    self.dec_len = len(self.dec_input)

    # If using pointer-generator mode, we need to store some extra info
    if True:
      # Store a version of the enc_input where in-article OOVs are represented by their temporary OOV id; also store the in-article OOVs words themselves
      self.enc_input_extend_vocab, self.article_oovs = article2ids(article_words, vocab)

      # Get a verison of the reference summary where in-article OOVs are represented by their temporary article OOV id
      abs_ids_extend_vocab = abstract2ids(abstract_words, vocab, self.article_oovs)

      # Overwrite decoder target sequence so it uses the temp article OOV ids
      _, self.target = self.get_dec_inp_targ_seqs(abs_ids_extend_vocab, 20, start_decoding, stop_decoding)

    # Store the original strings
    self.original_article = article
    self.original_abstract = abstract
    self.original_abstract_sents = abstract_sentences


  def get_dec_inp_targ_seqs(self, sequence, max_len, start_id, stop_id):
    inp = [start_id] + sequence[:]
    target = sequence[:]
    if len(inp) > max_len: # truncate
      inp = inp[:max_len]
      target = target[:max_len] # no end_token
    else: # no truncation
      target.append(stop_id) # end token
    assert len(inp) == len(target)
    return inp, target


  def pad_decoder_inp_targ(self, max_len, pad_id):
    while len(self.dec_input) < max_len:
      self.dec_input.append(pad_id)
    while len(self.target) < max_len:
      self.target.append(pad_id)


  def pad_encoder_input(self, max_len, pad_id):
    while len(self.enc_input) < max_len:
      self.enc_input.append(pad_id)
    if True:
      while len(self.enc_input_extend_vocab) < max_len:
        self.enc_input_extend_vocab.append(pad_id)
        
def article2ids(article_words, vocab):
  ids = []
  oovs = []
  unk_id = vocab.word2id(UNKNOWN_TOKEN)
  for w in article_words:
    i = vocab.word2id(w)
    if i == unk_id: # If w is OOV
      if w not in oovs: # Add to list of OOVs
        oovs.append(w)
      oov_num = oovs.index(w) # This is 0 for the first article OOV, 1 for the second article OOV...
      ids.append(vocab.size() + oov_num) # This is e.g. 50000 for the first article OOV, 50001 for the second...
    else:
      ids.append(i)
  return ids, oovs


def abstract2ids(abstract_words, vocab, article_oovs):
  ids = []
  unk_id = vocab.word2id(UNKNOWN_TOKEN)
  for w in abstract_words:
    i = vocab.word2id(w)
    if i == unk_id: # If w is an OOV word
        #print(abstract_words)
        ids.append(i) 
#       if w in article_oovs: # If w is an in-article OOV
#         vocab_idx = vocab.size() + article_oovs.index(w) # Map to its temporary article OOV number
#         ids.append(vocab_idx)
#       else: # If w is an out-of-article OOV
#         ids.append(unk_id) # Map to the UNK token id
    else:
        ids.append(i)
  return ids

In [None]:
start_decoding = vocab.word2id(START_DECODING)
stop_decoding = vocab.word2id(STOP_DECODING)

input_index=[]
dec_inputs_index=[]
target_index=[]
inputs_len=[]
dec_inputs_len=[]

index_data = {}

zero_len = 0

dec_max_len = 20
enc_max_len = 200

pad_id = vocab.word2id(PAD_TOKEN)
files_group = list(stories.keys())

for file_group in tqdm.tqdm(files_group):
    for file in tqdm.tqdm(list(stories[file_group].keys())):       
        for data in list(stories[file_group][file].keys()):
            if stories[file_group][file][data]['abstract_sentences'] == []:
                print('*********************************************')
                abstract = stories[file_group][file][data]['abstract']
                abstract_sentences = []
                article = stories[file_group][file][data]['article']
                example=Example(article=article,abstract_sentences=abstract_sentences,vocab=vocab)
                example.pad_decoder_inp_targ(dec_max_len,pad_id)
                example.pad_encoder_input(enc_max_len,pad_id)
            else:
                abstract = stories[file_group][file][data]['abstract']
                abstract_sentences = [stories[file_group][file][data]['abstract_sentences'][0]]
                article = stories[file_group][file][data]['article']
                example=Example(article=article,abstract_sentences=abstract_sentences,vocab=vocab)
                example.pad_decoder_inp_targ(dec_max_len,pad_id)
                example.pad_encoder_input(enc_max_len,pad_id)
            if example.enc_len <= 0 or example.dec_len <= 0:
                print(file_group, file, data)
                zero_len +=1
                pass
            else:
                input_index.append(example.enc_input)
                dec_inputs_index.append(example.dec_input)
                target_index.append(example.target)
                inputs_len.append(example.enc_len)
                dec_inputs_len.append(example.dec_len)
    print('********************************************************************************')
    print(len(input_index))
    print('********************************************************************************')
    index_data[file_group] = (input_index,inputs_len,dec_inputs_index,dec_inputs_len,target_index)
    input_index=[]
    dec_inputs_index=[]
    target_index=[]
    inputs_len=[]
    dec_inputs_len=[]

(input_index,inputs_len,dec_inputs_index,dec_inputs_len,target_index) = index_data[files_group[0]]

In [None]:
for file_group in files_group:
    print(file_group)
    (input_index,inputs_len,dec_inputs_index,dec_inputs_len,target_index) = index_data[file_group]
    print(len(input_index))
    start = int(len(input_index)*0.9)
    end = -1
    index_data[file_group] = (input_index[start:end],inputs_len[start:end],dec_inputs_index[start:end],dec_inputs_len[start:end],target_index[start:end])
    (input_index,inputs_len,dec_inputs_index,dec_inputs_len,target_index) = index_data[file_group]
    print(len(input_index))

In [None]:
import torch
from torch.autograd import Variable
import torch.nn.utils.rnn as rnn_utils
import torch.nn as nn
import torch.nn.functional as F

In [None]:
embedding_size = 300
hidden_size = 150
word_dropout = 0.5
num_layers = 1
bidirectional = True
batch_size_fit = 1024-256-128
rnn_type = 'gru'
learning_rate = 0.001

vocab_size = len(vocab._word_to_id)

# <s> and </s> are used in the data files to segment the abstracts into sentences. They don't receive vocab ids.
SENTENCE_START = '<s>'
SENTENCE_END = '</s>'

PAD_TOKEN = '[PAD]' # This has a vocab id, which is used to pad the encoder input, decoder input and target sequence
UNKNOWN_TOKEN = '[UNK]' # This has a vocab id, which is used to represent out-of-vocabulary words
START_DECODING = '[START]' # This has a vocab id, which is used at the start of every decoder input sequence
STOP_DECODING = '[STOP]' # This has a vocab id, which is used at the end of untruncated target sequences

# Note: none of <s>, </s>, [PAD], [UNK], [START], [STOP] should appear in the vocab file.

sos_idx = vocab.word2id(START_DECODING)
eos_idx = vocab.word2id(STOP_DECODING)
pad_idx = vocab.word2id(PAD_TOKEN)

In [None]:
class Embedding_layer(nn.Module):
    def __init__(self, vocab_size, embedding_size, numpy_embedding = None):
        super().__init__()
#         if numpy_embedding == None:
#             self.embedding = nn.Embedding(vocab_size,embedding_size)
#         else:
        self.model_embedding = torch.from_numpy(numpy_embedding).float()
        self.embedding = nn.Embedding(vocab_size,embedding_size)
        self.embedding.weight = nn.Parameter(self.model_embedding)
            
    def forward(self,inputs):
        return self.embedding(inputs)

In [None]:
class Seq2Seq(nn.Module):
    def __init__(self,vocab_size, embedding_size, hidden_size, word_dropout,
                 sos_idx, eos_idx, pad_idx, embedding,rnn_type='rnn' , num_layers=1, bidirectional=True):
        
        super().__init__()
        #self.model_embedding = torch.from_numpy(numpy_embedding)
        self.sos_idx = sos_idx
        self.eos_idx = eos_idx
        self.pad_idx = pad_idx
        
        self.rnn_type = rnn_type
        self.bidirectional = bidirectional
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        
        self.embedding = embedding
        #self.embedding.weight = nn.Parameter(self.model_embedding)
        
        self.encoder = Encoder(vocab_size = vocab_size,embedding_size = embedding_size, hidden_size = hidden_size, num_layers = num_layers, bidirectional = bidirectional,rnn_type = rnn_type).cuda()
        #self.decoder = Decoder(vocab_size = vocab_size,embedding_size = embedding_size, hidden_size = hidden_size, num_layers = num_layers, bidirectional = False,rnn_type = rnn_type, word_dropout=word_dropout).cuda()
        
    def forward(self,x,length,y_): 
        #print(x.size())
        batch_size = x.size(0)
        sorted_lengths, sorted_idx = torch.sort(length, descending=True)
        input_sequence = x[sorted_idx.cuda()]
        input_embedding = self.embedding(input_sequence).float()

        packed_input = rnn_utils.pack_padded_sequence(input_embedding, sorted_lengths.tolist(), batch_first=True)

        self.encoder_outputs, self.encoder_hidden_state, self.encoder_hidden_state_attn = self.encoder(packed_input,batch_size)

        padded_outputs = rnn_utils.pad_packed_sequence(self.encoder_outputs, batch_first=True)[0]
        padded_outputs = padded_outputs.contiguous()
        _,reversed_idx = torch.sort(sorted_idx.cuda())
        self.encoder_outputs = padded_outputs[reversed_idx]        
        
        return self.encoder_outputs, self.encoder_hidden_state, self.encoder_hidden_state_attn

In [None]:
class Encoder(nn.Module):
    def __init__(self,vocab_size,embedding_size, hidden_size, bidirectional=True, num_layers = 1,rnn_type='rnn'):
        super(Encoder,self).__init__()
        
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.rnn_type = rnn_type
        self.bidirectional = bidirectional
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        
        if self.rnn_type == 'rnn':
            rnn = nn.RNN
        elif self.rnn_type == 'gru':
            rnn = nn.GRU
        elif self.rnn_type =='lstm':
            rnn = nn.LSTM
        else:
            raise ValueError()
            
        
        self.encoder = rnn(self.embedding_size, self.hidden_size, num_layers = self.num_layers, bidirectional = self.bidirectional, batch_first = True)
        self.encoder.weight_hh_l0 = nn.init.xavier_uniform(self.encoder.weight_hh_l0)
        self.encoder.weight_ih_l0 = nn.init.xavier_uniform(self.encoder.weight_ih_l0)

        self.hidden_factor = (2 if self.bidirectional else 1) * self.num_layers

    
    def forward(self,x,batch_size):
        
        outputs, self.hidden = self.encoder(x)
        
        if self.bidirectional or self.num_layers > 1:
            # flatten hidden state
            self.hidden_ = self.hidden.view(batch_size, self.hidden_size*self.hidden_factor)
        else:
            self.hidden_ = self.hidden.squeeze()

        
        return outputs, self.hidden_, self.hidden[-1]

In [None]:
import math
class Attn(nn.Module):
    def __init__(self, method, hidden_size):
        super(Attn, self).__init__()
        self.method = method
        self.hidden_size = hidden_size
        self.attn = nn.Linear(self.hidden_size * 2, hidden_size)
        self.v = nn.Parameter(torch.rand(hidden_size))
        stdv = 1. / math.sqrt(self.v.size(0))
        self.v.data.normal_(mean=0, std=stdv)

    def forward(self, hidden, encoder_outputs):
        '''
        :param hidden: 
            previous hidden state of the decoder, in shape (layers*directions,B,H)
        :param encoder_outputs:
            encoder outputs from Encoder, in shape (T,B,H)
        :return
            attention energies in shape (B,T)
        '''
        max_len = encoder_outputs.size(1)
        this_batch_size = encoder_outputs.size(0)
        #print(encoder_outputs.size())
        #print(hidden.size())
        self.H = hidden.repeat(max_len,1,1).transpose(0,1)

        attn_energies = self.score(self.H,encoder_outputs) # compute attention score
        return F.softmax(attn_energies).unsqueeze(1) # normalize with softmax

    def score(self, hidden, encoder_outputs):
        #print(hidden.size(), encoder_outputs.size())
        energy = F.tanh(self.attn(torch.cat([hidden, encoder_outputs], 2))) # [B*T*2H]->[B*T*H]
        energy = energy.transpose(2,1) # [B*H*T]
        v = self.v.repeat(encoder_outputs.data.shape[0],1).unsqueeze(1) #[B*1*H]
        energy = torch.bmm(v,energy) # [B*1*T]
        return energy.squeeze(1) #[B*T]

In [None]:
class Decoder(nn.Module):
    def __init__(self,vocab_size,embedding_size , hidden_size, encoder_bi, embedding, bidirectional=True, num_layers = 1,rnn_type='rnn',word_dropout = 0.5):
        super(Decoder,self).__init__()
        
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.rnn_type = rnn_type
        self.bidirectional = bidirectional
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.attn = Attn('concat', hidden_size)
        self.encoder_bi = encoder_bi
        if self.rnn_type == 'rnn':
            rnn = nn.RNN
        elif self.rnn_type == 'gru':
            rnn = nn.GRU
        elif self.rnn_type =='lstm':
            rnn = nn.LSTM
        else:
            raise ValueError()
            
        self.hidden_factor = (2 if self.encoder_bi else 1) * self.num_layers            
        
        self.embedding = embedding
        self.word_dropout = nn.Dropout(p=word_dropout)
        
        self.decoder = rnn(embedding_size+hidden_size, hidden_size, num_layers=num_layers, bidirectional=self.bidirectional, batch_first=True)
        self.decoder.weight_hh_l0 = nn.init.xavier_uniform(self.decoder.weight_hh_l0)
        self.decoder.weight_ih_l0 = nn.init.xavier_uniform(self.decoder.weight_ih_l0)
        
        self.hidden2hidden = nn.Linear(hidden_size * (2 if bidirectional else 1), hidden_size//2)
        self.hidden2outputs = nn.Linear(hidden_size//2, hidden_size//2)
        self.outputs2vocab = nn.Linear(hidden_size//2, vocab_size)

        
    def forward(self,batch_size,hidden,encoder_outputs):
        self.batch_size = batch_size.size(0)
        #print(hidden.size())
        if  self.num_layers > 1:
            # unflatten hidden state
            self.hidden = hidden.view(self.hidden_factor, self.batch_size, self.hidden_size)
        else:
            #hidden = hidden[-1]
            self.hidden = hidden.unsqueeze(0)
        #print(hidden.size())    
        self.input_embedding = self.embedding(batch_size).view(encoder_outputs.size(0),1, -1)

        self.input_embedding = self.word_dropout(self.input_embedding)
        
        self.attn_weights = self.attn(self.hidden, encoder_outputs)
        self.context = self.attn_weights.bmm(encoder_outputs)

        self.inputs = torch.cat((self.input_embedding, self.context), 2)
        
        self.dec_outputs,self.hidden = self.decoder(self.inputs, self.hidden)
        #self.test = self.dec_outputs + self.context
        self.outputs = nn.functional.log_softmax(self.outputs2vocab(self.hidden2outputs(self.hidden2hidden(self.dec_outputs.squeeze()))))
        #self.outputs = nn.functional.log_softmax(self.outputs2vocab(self.outputs.squeeze()))
        
        return self.outputs, self.context, self.hidden, self.attn_weights

In [None]:
embedding = Embedding_layer(vocab_size, embedding_size,numpy_embedding = model_embedding)

In [None]:
S2S=Seq2Seq(vocab_size, embedding_size, hidden_size, word_dropout,sos_idx, eos_idx, pad_idx , num_layers=num_layers ,rnn_type='gru',bidirectional= bidirectional,embedding =embedding ).cuda()

In [None]:
decoder = Decoder(vocab_size = vocab_size,embedding_size = embedding_size, hidden_size = hidden_size*2, num_layers = num_layers, encoder_bi= bidirectional,bidirectional = False,rnn_type = rnn_type, word_dropout=word_dropout,embedding =embedding).cuda()

In [None]:
S2S,decoder

In [None]:
NLL = torch.nn.NLLLoss(ignore_index = pad_idx)

optimizer_encoder = torch.optim.Adam(S2S.parameters(), lr=learning_rate)
optimizer_decoder = torch.optim.Adam(decoder.parameters(), lr=learning_rate)

In [None]:
import random

def batch(batch_size,input_var,dec_input_var,target_var,length_var, dec_length_var):

    shuffle_list = list(zip(input_var,dec_input_var,target_var,length_var,dec_length_var))
    random.shuffle(shuffle_list)
    
    start = 0
    end = batch_size
    #if len(input_var)%32 != 0:
    while end < len(input_var):
        batch_input = []
        batch_input_dec = []
        batch_target = []
        batch_length = []
        batch_length_dec = []
        
        batch_shuffle = shuffle_list[start:end]
        
        for i,j,k,n,m in batch_shuffle:
            batch_input.append(i)
            batch_input_dec.append(j)
            batch_target.append(k)
            batch_length.append(n)
            batch_length_dec.append(m)
            
        temp = end
        end  = end + batch_size
        start = temp
        yield batch_input, batch_input_dec, batch_target, batch_length, batch_length_dec
        
    if end >= len(input_var):
        batch_input = []
        batch_input_dec = []
        batch_target = []
        batch_length = []
        batch_length_dec = []
        batch_shuffle = shuffle_list[start:]
        
        for i,j,k,n,m in batch_shuffle:
            batch_input.append(i)
            batch_input_dec.append(j)
            batch_target.append(k)
            batch_length.append(n)
            batch_length_dec.append(m)
        yield batch_input, batch_input_dec, batch_target, batch_length, batch_length_dec

In [None]:
epochs = 20
max_target_len = 20
clip = 2.0
teacher_forcing_ratio = 0

In [None]:
epochs = 20
max_target_len = 20
clip = 2.0
teacher_forcing_ratio = 100

In [None]:
paths = ['./chunked/train_*.bin', './chunked/val_*.bin']

In [None]:
for path in paths :
    print(path == './chunked/train_*.bin')

In [None]:
step = 0
var_losses = []
train_losses = []
avg_losses = []
topis_list = []
iteration = 0
for epoch in tqdm.tqdm(range(epochs+1)):
    
    for path in paths :
        print(path)
        input_index, inputs_len, dec_inputs_index, dec_inputs_len, target_index = index_data[path]
        
        for batch_x, batch_y_x, batch_y, batch_len, batch_len_y in batch(batch_size_fit, input_index, \
                                                                         dec_inputs_index,target_index, \
                                                                         inputs_len, dec_inputs_len):
            optimizer_encoder.zero_grad()
            optimizer_decoder.zero_grad()

            iteration = iteration + 1

            if path == paths[0]:
                S2S.train()
                decoder.train()
            else:
                S2S.eval()
                decoder.eval()

            x_ = Variable(torch.cuda.LongTensor(batch_x))
            y_ = Variable(torch.cuda.LongTensor(batch_y)).transpose(1,0)
            batch_size = x_.size(0)

            length = torch.cuda.LongTensor(batch_len)

            decoder_input=Variable(torch.cuda.LongTensor(batch_size)).fill_(sos_idx)

            encoder_outputs, encoder_hidden_state,encoder_hidden_state_attn=S2S(x_,length,y_)
            #print(encoder_outputs)
            decoder_hidden = encoder_hidden_state

            loss = 0
            tamp=[]

            for i in range(max_target_len):
                decoder_output, decoder_context, decoder_hidden, decoder_attention = decoder(decoder_input,decoder_hidden,encoder_outputs)
                #print(S2S.decoder.input_embedding)
                #print(index2word[a.data.topk(1)[1].cpu().numpy()[0][0]])
                topv, topi = decoder_output.data.topk(1)
                tamp.append(topi.cpu().numpy())
                loss += NLL(decoder_output,y_[i])
                decoder_hidden = decoder_hidden.squeeze(0)
                decoder_input = y_[i]
            topis_list.append(tamp)
            
            if path == paths[0]:

                loss.backward()
                #torch.nn.utils.clip_grad_norm(S2S.parameters(), clip)
                #torch.nn.utils.clip_grad_norm(decoder.parameters(), clip)
                optimizer_encoder.step()
                optimizer_decoder.step()

                loss = loss.data[0]/max_target_len

                train_losses.append(loss)

                step += 1

                if iteration % 10 == 0 or iteration == (len(input_index)-1)//batch_size:
                    print("Batch %04d/%i, Loss %9.4f"%( iteration, (len(input_index)-1)//batch_size_fit, loss))
                    np.savez(L=train_losses,file='./train_loss.npz')
                    checkpoint_path_encoder = os.path.join(save_model_path, "enc_E%i.pytorch"%(epoch))
                    checkpoint_path_decoder = os.path.join(save_model_path, "dec_E%i.pytorch"%(epoch))
                    torch.save(S2S, checkpoint_path_encoder)
                    torch.save(decoder, checkpoint_path_decoder)

            else:

                loss = loss.data[0]/max_target_len

                var_losses.append(loss)

                step += 1

                if iteration % 10 == 0 or iteration == (len(input_index)-1)//batch_size:
                    print("Valid Batch %04d/%i, Loss %9.4f"%( iteration, (len(input_index)-1)//batch_size_fit, loss))
                    np.savez(L=var_losses,file='./var_loss.npz')

            del loss
            del encoder_outputs
            del encoder_hidden_state 
        iteration = 0
    print("Model saved at %s"%save_model_path)
    print("Epoch %02d/%i, Mean ELBO %9.4f"%( epoch, epochs, np.mean(np.array(var_losses))))
    avg_losses.append(np.mean(np.array(var_losses)))
    np.savez(L=avg_losses,file='./avg_losses.npz')
        

In [None]:
save_model_path,epoch

In [None]:
save_model_path = os.path.join('won', '2018-Jul-20-01-38-51')

In [None]:
checkpoint_path_encoder = os.path.join(save_model_path, 'enc_E9.pytorch')
checkpoint_path_decoder = os.path.join(save_model_path, 'dec_E9.pytorch')

In [None]:
S2S = torch.load(checkpoint_path_encoder)
decoder = torch.load(checkpoint_path_decoder)

In [None]:
S2S.eval()
decoder.eval()

In [None]:
(input_index,inputs_len,dec_inputs_index,dec_inputs_len,target_index) = index_data[files_group[2]]

In [None]:
start = 0
end = -1

In [None]:
inputs_len = []
for sentence in input_index[start:end]:
    inputs_len.append(len(sentence) - sentence.count(0))
dec_inputs_len = []
for sentence in dec_inputs_index[start:end]:
    dec_inputs_len.append(len(sentence) - sentence.count(vocab.word2id(PAD_TOKEN)))

In [None]:
x_ = Variable(torch.cuda.LongTensor(input_index[start:end]))
Y_X = Variable(torch.cuda.LongTensor(dec_inputs_index[start:end]))

batch_size = x_.size(0)
length = torch.cuda.LongTensor(inputs_len)
length_y = torch.cuda.LongTensor(dec_inputs_len)

sorted_lengths, sorted_idx = torch.sort(length, descending=True)
input_sequence = x_[sorted_idx.cuda()]
input_embedding = S2S.embedding(input_sequence)
_,reversed_idx = torch.sort(sorted_idx.cuda())
packed_input = rnn_utils.pack_padded_sequence(input_embedding, sorted_lengths.tolist(), batch_first=True)

encoder_outputs, encoder_hidden_state,encoder_hidden_state_attn = S2S.encoder(packed_input,batch_size)

In [None]:
padded_outputs = rnn_utils.pad_packed_sequence(encoder_outputs, batch_first=True)[0]
padded_outputs = padded_outputs.contiguous()
_,reversed_idx = torch.sort(sorted_idx.cuda())
encoder_outputs = padded_outputs[reversed_idx]

In [None]:
decoder_input=Variable(torch.cuda.LongTensor(encoder_outputs.size(0))).fill_(sos_idx)

In [None]:
Y = Variable(torch.cuda.LongTensor(target_index[start:end])).transpose(1,0)

In [None]:
if len(encoder_hidden_state.size()) == 1:
    decoder_hidden = encoder_hidden_state.unsqueeze(0)
else: 
    decoder_hidden = encoder_hidden_state

In [None]:

decoded_words = []
decoder_attentions = torch.zeros(encoder_outputs.size(0),dec_max_len, enc_max_len)
for i in range(dec_max_len):
    #print(decoder_input,decoder_hidden)
    decoder_output, decoder_context, decoder_hidden, decoder_attention = decoder(decoder_input,decoder_hidden.float(),encoder_outputs.float())
    #print(decoder_hidden)
    decoder_attentions[:decoder_attention.size(0),i,:decoder_attention.size(2)] += decoder_attention.squeeze(0).squeeze(0).cpu().data
    topv, topi = decoder_output.data.topk(1)
    #ni = topi[0]
    '''
    if ni == eos_idx:
        decoded_words.append(_EOS_)
        break
    else:
        decoded_words.append(index2word[ni])
    '''
    temp = []
    if len(topi.size()) == 1:
        temp.append(vocab.id2word(topi.cpu().numpy()[0]))
    else:
        for top in topi.cpu().numpy():
            temp.append(vocab.id2word(top[0]))
    decoded_words.append(temp)
    decoder_input = Variable(topi).cuda()
    decoder_hidden = decoder_hidden.squeeze()
    if len(decoder_hidden.size()) == 1:
        decoder_hidden = decoder_hidden.unsqueeze(0)
    else: 
        decoder_hidden = decoder_hidden

In [None]:
inferences = []
for decoded_sent in np.array(decoded_words).transpose():
    for i,word in enumerate(decoded_sent):
        if word == STOP_DECODING:
            decoded_sent = decoded_sent[:i]

    inferences.append(list(decoded_sent))


targets_result = []
for inputs in target_index[start:end]:
    result = []
    for word in inputs:
        if word == eos_idx:
            break
        else:
            result.append(vocab.id2word(word))
    targets_result.append(result)    

inputs_result = []
for inputs in input_index[start:end]:
    result = []
    for word in inputs:
        result.append(vocab.id2word(word))
    inputs_result.append(result)

In [None]:
for source,target,inference, decoder_attention in zip(inputs_result, targets_result, inferences, decoder_attentions):
    print('*********************************************************************')
    print('source : '+' '.join(source))
    
    print('*********************************************************************')
    print('target : '+' '.join(target))
    
    print('*********************************************************************')
    print('inference : '+' '.join(inference))
    
    print('*********************************************************************')
    scores = rouge.get_scores(' '.join(target), ' '.join(inference))
    for score in scores[0]:
        print(score +' : '+str(scores[0][score]))

    plt.matshow(decoder_attention.numpy())

In [None]:
r1_recall = []
r2_recall = []
rl_recall = []

r1_f1 = []
r2_f1 = []
rl_f1 = []

error = 0

for source,target,inference, decoder_attention in tqdm.tqdm(zip(inputs_result, targets_result, inferences, decoder_attentions)):
    try:
        scores = rouge.get_scores(' '.join(target), ' '.join(inference))
    except ValueError:
        error +=1
        print('******************************************')
        print('target'+' '.join(target))
        print('inference'+' '.join(inference))
        pass
    r1_recall.append(scores[0]['rouge-1']['r'])
    r2_recall.append(scores[0]['rouge-2']['r'])
    rl_recall.append(scores[0]['rouge-l']['r'])
    
    r1_f1.append(scores[0]['rouge-1']['f'])
    r2_f1.append(scores[0]['rouge-2']['f'])
    rl_f1.append(scores[0]['rouge-l']['f'])
print('error sentence : ' + str(error))

In [None]:
print('ROUGE-1 recall : ' + str(sum(r1_recall)*100 / len(r1_recall)))
print('ROUGE-1 F1 : ' + str(sum(r1_f1)*100 / len(r1_f1)))
print('********************************************************************')
print('ROUGE-2 recall : ' + str(sum(r2_recall)*100 / len(r2_recall)))
print('ROUGE-2 F1 : ' + str(sum(r2_f1)*100 / len(r2_f1)))
print('********************************************************************')
print('ROUGE-l recall : ' + str(sum(rl_recall)*100 / len(rl_recall)))
print('ROUGE-l F1 : ' + str(sum(rl_f1)*100 / len(rl_f1)))