In [1]:
import json
import gensim
import os
import json
import tqdm
import time
import numpy as np
import nltk
import random

%matplotlib  inline
import matplotlib.pyplot as plt

from rouge import Rouge 
rouge = Rouge()

In [2]:
% time
with open('./data_pointer_example.txt', 'r', encoding='UTF-8') as f:
    stories = json.load(f)

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 7.15 µs


In [3]:
model_embedding = np.load('Word2vec_pointer.npz')['E']

In [4]:
ts = time.strftime('%Y-%b-%d-%H-%M-%S', time.gmtime())

save_model_path = os.path.join('won', ts)
os.makedirs('./'+save_model_path)

In [5]:
class Vocab(object):

  def __init__(self, vocab_file, max_size):
    self._word_to_id = {}
    self._id_to_word = {}
    self._count = 0 # keeps track of total number of words in the Vocab

    # [UNK], [PAD], [START] and [STOP] get the ids 0,1,2,3.
    for w in [UNKNOWN_TOKEN, PAD_TOKEN, START_DECODING, STOP_DECODING]:
      self._word_to_id[w] = self._count
      self._id_to_word[self._count] = w
      self._count += 1

    # Read the vocab file and add words up to max_size
    with open(vocab_file, 'r', encoding='utf-8') as vocab_f:
      for line in vocab_f:
        pieces = line.split()
        if len(pieces) != 2:
          print ('Warning: incorrectly formatted line in vocabulary file: %s\n' % line)
          continue
        w = pieces[0]
        if w in [SENTENCE_START, SENTENCE_END, UNKNOWN_TOKEN, PAD_TOKEN, START_DECODING, STOP_DECODING]:
          raise Exception('<s>, </s>, [UNK], [PAD], [START] and [STOP] shouldn\'t be in the vocab file, but %s is' % w)
        if w in self._word_to_id:
          raise Exception('Duplicated word in vocabulary file: %s' % w)
        self._word_to_id[w] = self._count
        self._id_to_word[self._count] = w
        self._count += 1
        if max_size != 0 and self._count >= max_size:
          print ("max_size of vocab was specified as %i; we now have %i words. Stopping reading." % (max_size, self._count))
          break

    print ("Finished constructing vocabulary of %i total words. Last word added: %s" % (self._count, self._id_to_word[self._count-1]))

  def word2id(self, word):
    if word not in self._word_to_id:
      return self._word_to_id[UNKNOWN_TOKEN]
    return self._word_to_id[word]

  def id2word(self, word_id):
    if word_id not in self._id_to_word:
      raise ValueError('Id not found in vocab: %d' % word_id)
    return self._id_to_word[word_id]

  def size(self):
    return self._count

  def write_metadata(self, fpath):
    print ("Writing word embedding metadata file to %s..." % (fpath))
    with open(fpath, "w") as f:
      fieldnames = ['word']
      writer = csv.DictWriter(f, delimiter="\t", fieldnames=fieldnames)
      for i in xrange(self.size()):
        writer.writerow({"word": self._id_to_word[i]})

In [6]:
# <s> and </s> are used in the data files to segment the abstracts into sentences. They don't receive vocab ids.
SENTENCE_START = '<s>'
SENTENCE_END = '</s>'

PAD_TOKEN = '[PAD]' # This has a vocab id, which is used to pad the encoder input, decoder input and target sequence
UNKNOWN_TOKEN = '[UNK]' # This has a vocab id, which is used to represent out-of-vocabulary words
START_DECODING = '[START]' # This has a vocab id, which is used at the start of every decoder input sequence
STOP_DECODING = '[STOP]' # This has a vocab id, which is used at the end of untruncated target sequences

# Note: none of <s>, </s>, [PAD], [UNK], [START], [STOP] should appear in the vocab file.

In [7]:
vocab = Vocab('./vocab', 50000)









max_size of vocab was specified as 50000; we now have 50000 words. Stopping reading.
Finished constructing vocabulary of 50000 total words. Last word added: long-delayed


In [8]:
class Example(object):

  def __init__(self, article, abstract_sentences, vocab):
    # Get ids of special tokens
    start_decoding = vocab.word2id(START_DECODING)
    stop_decoding = vocab.word2id(STOP_DECODING)

    # Process the article
    article_words = article.split()
    if len(article_words) > 100:
      article_words = article_words[:100]
    self.enc_len = len(article_words) # store the length after truncation but before padding
    self.enc_input = [vocab.word2id(w) for w in article_words] # list of word ids; OOVs are represented by the id for UNK token

    # Process the abstract
    abstract = ' '.join(abstract_sentences) # string
    abstract_words = abstract.split() # list of strings
    abs_ids = [vocab.word2id(w) for w in abstract_words] # list of word ids; OOVs are represented by the id for UNK token

    # Get the decoder input sequence and target sequence
    self.dec_input, self.target = self.get_dec_inp_targ_seqs(abs_ids, 20, start_decoding, stop_decoding)
    self.dec_len = len(self.dec_input)

    # If using pointer-generator mode, we need to store some extra info
    if True:
      # Store a version of the enc_input where in-article OOVs are represented by their temporary OOV id; also store the in-article OOVs words themselves
      self.enc_input_extend_vocab, self.article_oovs = article2ids(article_words, vocab)

      # Get a verison of the reference summary where in-article OOVs are represented by their temporary article OOV id
      abs_ids_extend_vocab = abstract2ids(abstract_words, vocab, self.article_oovs)

      # Overwrite decoder target sequence so it uses the temp article OOV ids
      _, self.target = self.get_dec_inp_targ_seqs(abs_ids_extend_vocab, 20, start_decoding, stop_decoding)

    # Store the original strings
    self.original_article = article
    self.original_abstract = abstract
    self.original_abstract_sents = abstract_sentences


  def get_dec_inp_targ_seqs(self, sequence, max_len, start_id, stop_id):
    inp = [start_id] + sequence[:]
    target = sequence[:]
    if len(inp) > max_len: # truncate
      inp = inp[:max_len]
      target = target[:max_len] # no end_token
    else: # no truncation
      target.append(stop_id) # end token
    assert len(inp) == len(target)
    return inp, target


  def pad_decoder_inp_targ(self, max_len, pad_id):
    while len(self.dec_input) < max_len:
      self.dec_input.append(pad_id)
    while len(self.target) < max_len:
      self.target.append(pad_id)


  def pad_encoder_input(self, max_len, pad_id):
    while len(self.enc_input) < max_len:
      self.enc_input.append(pad_id)
    if True:
      while len(self.enc_input_extend_vocab) < max_len:
        self.enc_input_extend_vocab.append(pad_id)
        
def article2ids(article_words, vocab):
  ids = []
  oovs = []
  unk_id = vocab.word2id(UNKNOWN_TOKEN)
  for w in article_words:
    i = vocab.word2id(w)
    if i == unk_id: # If w is OOV
      if w not in oovs: # Add to list of OOVs
        oovs.append(w)
      oov_num = oovs.index(w) # This is 0 for the first article OOV, 1 for the second article OOV...
      ids.append(vocab.size() + oov_num) # This is e.g. 50000 for the first article OOV, 50001 for the second...
    else:
      ids.append(i)
  return ids, oovs


def abstract2ids(abstract_words, vocab, article_oovs):
  ids = []
  unk_id = vocab.word2id(UNKNOWN_TOKEN)
  for w in abstract_words:
    i = vocab.word2id(w)
    if i == unk_id: # If w is an OOV word
        #print(abstract_words)
        ids.append(i) 
#       if w in article_oovs: # If w is an in-article OOV
#         vocab_idx = vocab.size() + article_oovs.index(w) # Map to its temporary article OOV number
#         ids.append(vocab_idx)
#       else: # If w is an out-of-article OOV
#         ids.append(unk_id) # Map to the UNK token id
    else:
        ids.append(i)
  return ids

In [9]:
start_decoding = vocab.word2id(START_DECODING)
stop_decoding = vocab.word2id(STOP_DECODING)

input_index=[]
dec_inputs_index=[]
target_index=[]
inputs_len=[]
dec_inputs_len=[]

index_data = {}

zero_len = 0

dec_max_len = 20
enc_max_len = 100

pad_id = vocab.word2id(PAD_TOKEN)
files_group = list(stories.keys())

for file_group in tqdm.tqdm(files_group):
    for file in tqdm.tqdm(list(stories[file_group].keys())):       
        for data in list(stories[file_group][file].keys()):
            if stories[file_group][file][data]['abstract_sentences'] == []:
                print('*********************************************')
                abstract = stories[file_group][file][data]['abstract']
                abstract_sentences = []
                article = stories[file_group][file][data]['article']
                example=Example(article=article,abstract_sentences=abstract_sentences,vocab=vocab)
                example.pad_decoder_inp_targ(dec_max_len,pad_id)
                example.pad_encoder_input(enc_max_len,pad_id)
            else:
                abstract = stories[file_group][file][data]['abstract']
                abstract_sentences = [stories[file_group][file][data]['abstract_sentences'][0]]
                article = stories[file_group][file][data]['article']
                example=Example(article=article,abstract_sentences=abstract_sentences,vocab=vocab)
                example.pad_decoder_inp_targ(dec_max_len,pad_id)
                example.pad_encoder_input(enc_max_len,pad_id)
            if example.enc_len <= 0 or example.dec_len <= 0:
                print(file_group, file, data)
                zero_len +=1
                pass
            else:
                input_index.append(example.enc_input)
                dec_inputs_index.append(example.dec_input)
                target_index.append(example.target)
                inputs_len.append(example.enc_len)
                dec_inputs_len.append(example.dec_len)
    print('********************************************************************************')
    print(len(input_index))
    print('********************************************************************************')

    index_data[file_group] = (input_index,inputs_len,dec_inputs_index,dec_inputs_len,target_index)
    input_index=[]
    dec_inputs_index=[]
    target_index=[]
    inputs_len=[]
    dec_inputs_len=[]

  0%|          | 0/3 [00:00<?, ?it/s]
  0%|          | 0/288 [00:00<?, ?it/s][A
  0%|          | 1/288 [00:00<00:45,  6.25it/s][A

./chunked/train_*.bin ./chunked\train_000.bin 598
./chunked/train_*.bin ./chunked\train_000.bin 611
./chunked/train_*.bin ./chunked\train_000.bin 676
./chunked/train_*.bin ./chunked\train_000.bin 687
./chunked/train_*.bin ./chunked\train_000.bin 986
./chunked/train_*.bin ./chunked\train_001.bin 276
./chunked/train_*.bin ./chunked\train_001.bin 339
./chunked/train_*.bin ./chunked\train_001.bin 561
./chunked/train_*.bin ./chunked\train_001.bin 640
./chunked/train_*.bin ./chunked\train_001.bin 825
./chunked/train_*.bin ./chunked\train_001.bin 903



  1%|          | 2/288 [00:00<00:45,  6.33it/s][A
  1%|          | 3/288 [00:00<00:46,  6.14it/s][A

./chunked/train_*.bin ./chunked\train_002.bin 44
./chunked/train_*.bin ./chunked\train_002.bin 198
./chunked/train_*.bin ./chunked\train_002.bin 727
./chunked/train_*.bin ./chunked\train_002.bin 941
./chunked/train_*.bin ./chunked\train_002.bin 956
./chunked/train_*.bin ./chunked\train_003.bin 56
./chunked/train_*.bin ./chunked\train_003.bin 152



  1%|▏         | 4/288 [00:00<00:47,  6.02it/s][A
  2%|▏         | 5/288 [00:00<00:46,  6.09it/s][A

./chunked/train_*.bin ./chunked\train_003.bin 978
./chunked/train_*.bin ./chunked\train_004.bin 192
./chunked/train_*.bin ./chunked\train_004.bin 345
./chunked/train_*.bin ./chunked\train_004.bin 358
./chunked/train_*.bin ./chunked\train_004.bin 409
./chunked/train_*.bin ./chunked\train_004.bin 866
./chunked/train_*.bin ./chunked\train_004.bin 930



  2%|▏         | 6/288 [00:01<00:47,  5.88it/s][A

./chunked/train_*.bin ./chunked\train_005.bin 110
./chunked/train_*.bin ./chunked\train_005.bin 228
./chunked/train_*.bin ./chunked\train_005.bin 402



  2%|▏         | 7/288 [00:01<00:48,  5.80it/s][A

./chunked/train_*.bin ./chunked\train_006.bin 429
./chunked/train_*.bin ./chunked\train_006.bin 556
./chunked/train_*.bin ./chunked\train_006.bin 722
./chunked/train_*.bin ./chunked\train_006.bin 962
./chunked/train_*.bin ./chunked\train_007.bin 235



  3%|▎         | 8/288 [00:01<00:49,  5.65it/s][A
  3%|▎         | 9/288 [00:02<01:03,  4.39it/s][A

./chunked/train_*.bin ./chunked\train_008.bin 753
./chunked/train_*.bin ./chunked\train_008.bin 770
./chunked/train_*.bin ./chunked\train_009.bin 347



  3%|▎         | 10/288 [00:02<01:01,  4.49it/s][A
  4%|▍         | 11/288 [00:02<01:00,  4.55it/s][A
  4%|▍         | 12/288 [00:02<00:59,  4.67it/s][A
  5%|▍         | 13/288 [00:02<00:57,  4.75it/s][A
  5%|▍         | 14/288 [00:02<00:56,  4.83it/s][A

./chunked/train_*.bin ./chunked\train_013.bin 161
./chunked/train_*.bin ./chunked\train_013.bin 562
./chunked/train_*.bin ./chunked\train_013.bin 563
./chunked/train_*.bin ./chunked\train_013.bin 564
./chunked/train_*.bin ./chunked\train_013.bin 565
./chunked/train_*.bin ./chunked\train_013.bin 566
./chunked/train_*.bin ./chunked\train_013.bin 567
./chunked/train_*.bin ./chunked\train_013.bin 568
./chunked/train_*.bin ./chunked\train_013.bin 569
./chunked/train_*.bin ./chunked\train_013.bin 570
./chunked/train_*.bin ./chunked\train_013.bin 571
./chunked/train_*.bin ./chunked\train_013.bin 572
./chunked/train_*.bin ./chunked\train_013.bin 573
./chunked/train_*.bin ./chunked\train_013.bin 574
./chunked/train_*.bin ./chunked\train_013.bin 936
./chunked/train_*.bin ./chunked\train_013.bin 937
./chunked/train_*.bin ./chunked\train_013.bin 938
./chunked/train_*.bin ./chunked\train_013.bin 998
./chunked/train_*.bin ./chunked\train_014.bin 332



  5%|▌         | 15/288 [00:03<00:55,  4.89it/s][A
  6%|▌         | 16/288 [00:03<00:55,  4.94it/s][A

./chunked/train_*.bin ./chunked\train_015.bin 313
./chunked/train_*.bin ./chunked\train_015.bin 314
./chunked/train_*.bin ./chunked\train_015.bin 419
./chunked/train_*.bin ./chunked\train_015.bin 420
./chunked/train_*.bin ./chunked\train_015.bin 428
./chunked/train_*.bin ./chunked\train_015.bin 501
./chunked/train_*.bin ./chunked\train_015.bin 526
./chunked/train_*.bin ./chunked\train_015.bin 898
./chunked/train_*.bin ./chunked\train_015.bin 903
./chunked/train_*.bin ./chunked\train_016.bin 108
./chunked/train_*.bin ./chunked\train_016.bin 111
./chunked/train_*.bin ./chunked\train_016.bin 113
./chunked/train_*.bin ./chunked\train_016.bin 114
./chunked/train_*.bin ./chunked\train_016.bin 115



  6%|▌         | 17/288 [00:03<00:54,  4.97it/s][A
  6%|▋         | 18/288 [00:03<00:53,  5.02it/s][A
  7%|▋         | 19/288 [00:03<00:53,  5.05it/s][A
  7%|▋         | 20/288 [00:03<00:52,  5.10it/s][A

./chunked/train_*.bin ./chunked\train_019.bin 413
./chunked/train_*.bin ./chunked\train_020.bin 28
./chunked/train_*.bin ./chunked\train_020.bin 29
./chunked/train_*.bin ./chunked\train_020.bin 104



  7%|▋         | 21/288 [00:04<00:52,  5.13it/s][A
  8%|▊         | 22/288 [00:04<00:51,  5.14it/s][A

./chunked/train_*.bin ./chunked\train_021.bin 622
./chunked/train_*.bin ./chunked\train_021.bin 958
./chunked/train_*.bin ./chunked\train_022.bin 200



  8%|▊         | 23/288 [00:04<00:51,  5.17it/s][A
  8%|▊         | 24/288 [00:04<00:50,  5.20it/s][A

./chunked/train_*.bin ./chunked\train_023.bin 376
./chunked/train_*.bin ./chunked\train_024.bin 14
./chunked/train_*.bin ./chunked\train_024.bin 314
./chunked/train_*.bin ./chunked\train_024.bin 350
./chunked/train_*.bin ./chunked\train_024.bin 363
./chunked/train_*.bin ./chunked\train_024.bin 395



  9%|▊         | 25/288 [00:04<00:50,  5.21it/s][A
  9%|▉         | 26/288 [00:04<00:49,  5.25it/s][A
  9%|▉         | 27/288 [00:05<00:49,  5.25it/s][A

./chunked/train_*.bin ./chunked\train_026.bin 300
./chunked/train_*.bin ./chunked\train_027.bin 149



 10%|▉         | 28/288 [00:05<00:49,  5.27it/s][A
 10%|█         | 29/288 [00:05<00:48,  5.30it/s][A
 10%|█         | 30/288 [00:05<00:48,  5.31it/s][A

./chunked/train_*.bin ./chunked\train_029.bin 611
./chunked/train_*.bin ./chunked\train_030.bin 677



 11%|█         | 31/288 [00:05<00:48,  5.31it/s][A
 11%|█         | 32/288 [00:05<00:47,  5.33it/s][A

./chunked/train_*.bin ./chunked\train_030.bin 773
./chunked/train_*.bin ./chunked\train_030.bin 838
./chunked/train_*.bin ./chunked\train_031.bin 400



 11%|█▏        | 33/288 [00:06<00:47,  5.34it/s][A

./chunked/train_*.bin ./chunked\train_032.bin 753
./chunked/train_*.bin ./chunked\train_033.bin 415



 12%|█▏        | 34/288 [00:06<00:47,  5.34it/s][A
 12%|█▏        | 35/288 [00:06<00:47,  5.36it/s][A

./chunked/train_*.bin ./chunked\train_034.bin 726



 12%|█▎        | 36/288 [00:06<00:46,  5.37it/s][A
 13%|█▎        | 37/288 [00:06<00:46,  5.37it/s][A
 13%|█▎        | 38/288 [00:07<00:46,  5.39it/s][A
 14%|█▎        | 39/288 [00:07<00:46,  5.39it/s][A
 14%|█▍        | 40/288 [00:07<00:45,  5.41it/s][A
 14%|█▍        | 41/288 [00:07<00:45,  5.43it/s][A

./chunked/train_*.bin ./chunked\train_040.bin 501
./chunked/train_*.bin ./chunked\train_041.bin 440



 15%|█▍        | 42/288 [00:07<00:45,  5.44it/s][A
 15%|█▍        | 43/288 [00:07<00:44,  5.45it/s][A
 15%|█▌        | 44/288 [00:08<00:44,  5.46it/s][A

./chunked/train_*.bin ./chunked\train_043.bin 685



 16%|█▌        | 45/288 [00:08<00:44,  5.46it/s][A
 16%|█▌        | 46/288 [00:08<00:44,  5.47it/s][A

./chunked/train_*.bin ./chunked\train_045.bin 40
./chunked/train_*.bin ./chunked\train_045.bin 369



 16%|█▋        | 47/288 [00:08<00:44,  5.47it/s][A
 17%|█▋        | 48/288 [00:08<00:43,  5.47it/s][A
 17%|█▋        | 49/288 [00:08<00:43,  5.49it/s][A
 17%|█▋        | 50/288 [00:09<00:43,  5.49it/s][A
 18%|█▊        | 51/288 [00:09<00:43,  5.49it/s][A

./chunked/train_*.bin ./chunked\train_049.bin 937



 18%|█▊        | 52/288 [00:09<00:42,  5.50it/s][A
 18%|█▊        | 53/288 [00:09<00:42,  5.50it/s][A
 19%|█▉        | 54/288 [00:09<00:42,  5.50it/s][A
 19%|█▉        | 55/288 [00:09<00:42,  5.51it/s][A

./chunked/train_*.bin ./chunked\train_053.bin 760
./chunked/train_*.bin ./chunked\train_054.bin 642



 19%|█▉        | 56/288 [00:10<00:42,  5.50it/s][A
 20%|█▉        | 57/288 [00:10<00:41,  5.50it/s][A

./chunked/train_*.bin ./chunked\train_055.bin 890
./chunked/train_*.bin ./chunked\train_056.bin 591
./chunked/train_*.bin ./chunked\train_056.bin 676



 20%|██        | 58/288 [00:10<00:41,  5.51it/s][A
 20%|██        | 59/288 [00:10<00:41,  5.51it/s][A

./chunked/train_*.bin ./chunked\train_058.bin 341
./chunked/train_*.bin ./chunked\train_058.bin 447



 21%|██        | 60/288 [00:10<00:41,  5.51it/s][A
 21%|██        | 61/288 [00:11<00:41,  5.51it/s][A
 22%|██▏       | 62/288 [00:11<00:40,  5.52it/s][A
 22%|██▏       | 63/288 [00:11<00:40,  5.52it/s][A
 22%|██▏       | 64/288 [00:11<00:40,  5.52it/s][A
 23%|██▎       | 65/288 [00:11<00:40,  5.52it/s][A
 23%|██▎       | 66/288 [00:11<00:40,  5.52it/s][A

./chunked/train_*.bin ./chunked\train_065.bin 483



 23%|██▎       | 67/288 [00:12<00:40,  5.52it/s][A
 24%|██▎       | 68/288 [00:12<00:39,  5.52it/s][A

./chunked/train_*.bin ./chunked\train_067.bin 114
./chunked/train_*.bin ./chunked\train_067.bin 189



 24%|██▍       | 69/288 [00:12<00:39,  5.52it/s][A
 24%|██▍       | 70/288 [00:12<00:39,  5.52it/s][A
 25%|██▍       | 71/288 [00:12<00:39,  5.53it/s][A

./chunked/train_*.bin ./chunked\train_069.bin 903



 25%|██▌       | 72/288 [00:13<00:39,  5.52it/s][A
 25%|██▌       | 73/288 [00:13<00:41,  5.23it/s][A
 26%|██▌       | 74/288 [00:14<00:40,  5.24it/s][A

./chunked/train_*.bin ./chunked\train_072.bin 920
./chunked/train_*.bin ./chunked\train_072.bin 942



 26%|██▌       | 75/288 [00:14<00:40,  5.24it/s][A
 26%|██▋       | 76/288 [00:14<00:40,  5.24it/s][A
 27%|██▋       | 77/288 [00:14<00:40,  5.24it/s][A
 27%|██▋       | 78/288 [00:14<00:39,  5.25it/s][A
 27%|██▋       | 79/288 [00:15<00:39,  5.25it/s][A
 28%|██▊       | 80/288 [00:15<00:39,  5.26it/s][A
 28%|██▊       | 81/288 [00:15<00:39,  5.27it/s][A
 28%|██▊       | 82/288 [00:15<00:39,  5.26it/s][A

./chunked/train_*.bin ./chunked\train_081.bin 686



 29%|██▉       | 83/288 [00:15<00:38,  5.27it/s][A
 29%|██▉       | 84/288 [00:15<00:38,  5.27it/s][A

./chunked/train_*.bin ./chunked\train_083.bin 574
./chunked/train_*.bin ./chunked\train_083.bin 924



 30%|██▉       | 85/288 [00:16<00:38,  5.27it/s][A
 30%|██▉       | 86/288 [00:16<00:38,  5.28it/s][A
 30%|███       | 87/288 [00:16<00:38,  5.28it/s][A
 31%|███       | 88/288 [00:16<00:37,  5.28it/s][A
 31%|███       | 89/288 [00:16<00:37,  5.28it/s][A
 31%|███▏      | 90/288 [00:17<00:37,  5.29it/s][A
 32%|███▏      | 91/288 [00:17<00:37,  5.29it/s][A
 32%|███▏      | 92/288 [00:17<00:37,  5.29it/s][A
 32%|███▏      | 93/288 [00:17<00:36,  5.29it/s][A
 33%|███▎      | 94/288 [00:17<00:36,  5.29it/s][A
 33%|███▎      | 95/288 [00:17<00:36,  5.30it/s][A
 33%|███▎      | 96/288 [00:18<00:36,  5.29it/s][A
 34%|███▎      | 97/288 [00:18<00:36,  5.29it/s][A
 34%|███▍      | 98/288 [00:18<00:35,  5.30it/s][A
 34%|███▍      | 99/288 [00:18<00:35,  5.30it/s][A
 35%|███▍      | 100/288 [00:18<00:35,  5.30it/s][A
 35%|███▌      | 101/288 [00:19<00:35,  5.31it/s][A
 35%|███▌      | 102/288 [00:19<00:35,  5.31it/s][A
 36%|███▌      | 103/288 [00:19<00:34,  5.31it/s][A
 36%|██

*********************************************
./chunked/train_*.bin ./chunked\train_191.bin 183



 67%|██████▋   | 193/288 [00:36<00:17,  5.35it/s][A
 67%|██████▋   | 194/288 [00:36<00:17,  5.35it/s][A
 68%|██████▊   | 195/288 [00:36<00:17,  5.35it/s][A
 68%|██████▊   | 196/288 [00:36<00:17,  5.35it/s][A
 68%|██████▊   | 197/288 [00:36<00:16,  5.35it/s][A
 69%|██████▉   | 198/288 [00:36<00:16,  5.36it/s][A
 69%|██████▉   | 199/288 [00:37<00:16,  5.36it/s][A
 69%|██████▉   | 200/288 [00:37<00:16,  5.36it/s][A
 70%|██████▉   | 201/288 [00:37<00:16,  5.37it/s][A
 70%|███████   | 202/288 [00:37<00:16,  5.37it/s][A
 70%|███████   | 203/288 [00:37<00:15,  5.37it/s][A
 71%|███████   | 204/288 [00:38<00:15,  5.37it/s][A
 71%|███████   | 205/288 [00:38<00:15,  5.37it/s][A
 72%|███████▏  | 206/288 [00:38<00:15,  5.37it/s][A
 72%|███████▏  | 207/288 [00:38<00:15,  5.38it/s][A
 72%|███████▏  | 208/288 [00:38<00:14,  5.38it/s][A
 73%|███████▎  | 209/288 [00:38<00:14,  5.38it/s][A
 73%|███████▎  | 210/288 [00:39<00:14,  5.38it/s][A
 73%|███████▎  | 211/288 [00:39<00:14,  5.39i

********************************************************************************
287112
********************************************************************************



 14%|█▍        | 2/14 [00:00<00:02,  5.89it/s][A
 21%|██▏       | 3/14 [00:00<00:02,  5.45it/s][A
 29%|██▊       | 4/14 [00:00<00:01,  5.54it/s][A
 36%|███▌      | 5/14 [00:00<00:01,  5.61it/s][A
 43%|████▎     | 6/14 [00:01<00:01,  5.65it/s][A
 50%|█████     | 7/14 [00:01<00:01,  5.56it/s][A
 57%|█████▋    | 8/14 [00:01<00:01,  5.39it/s][A
 64%|██████▍   | 9/14 [00:01<00:00,  5.43it/s][A
 71%|███████▏  | 10/14 [00:01<00:00,  5.43it/s][A
 79%|███████▊  | 11/14 [00:02<00:00,  5.42it/s][A
 86%|████████▌ | 12/14 [00:02<00:00,  5.41it/s][A
 93%|█████████▎| 13/14 [00:02<00:00,  5.44it/s][A
 67%|██████▋   | 2/3 [00:56<00:28, 28.38s/it]s][A
  0%|          | 0/12 [00:00<?, ?it/s][A
  8%|▊         | 1/12 [00:00<00:01,  6.27it/s][A

********************************************************************************
13368
********************************************************************************



 17%|█▋        | 2/12 [00:00<00:01,  5.98it/s][A
 25%|██▌       | 3/12 [00:00<00:01,  6.13it/s][A
 33%|███▎      | 4/12 [00:00<00:01,  6.21it/s][A
 42%|████▏     | 5/12 [00:00<00:01,  6.14it/s][A
 50%|█████     | 6/12 [00:00<00:00,  6.11it/s][A
 58%|█████▊    | 7/12 [00:01<00:00,  6.07it/s][A
 67%|██████▋   | 8/12 [00:01<00:00,  6.02it/s][A
 75%|███████▌  | 9/12 [00:01<00:00,  6.01it/s][A
 83%|████████▎ | 10/12 [00:01<00:00,  5.99it/s][A
 92%|█████████▏| 11/12 [00:01<00:00,  5.95it/s][A
100%|██████████| 3/3 [00:58<00:00, 19.56s/it]s][A

********************************************************************************
11490
********************************************************************************





In [14]:
for file_group in files_group:
    print(file_group)
    (input_index,inputs_len,dec_inputs_index,dec_inputs_len,target_index) = index_data[file_group]
    print(len(input_index))
    start = int(len(input_index)*0.7)
    end = -1
    index_data[file_group] = (input_index[start:end],inputs_len[start:end],dec_inputs_index[start:end],dec_inputs_len[start:end],target_index[start:end])
    (input_index,inputs_len,dec_inputs_index,dec_inputs_len,target_index) = index_data[file_group]
    print(len(input_index))

./chunked/train_*.bin
287112
86133
./chunked/val_*.bin
13368
4010
./chunked/test_*.bin
11490
3447


In [15]:
import torch
from torch.autograd import Variable
import torch.nn.utils.rnn as rnn_utils
import torch.nn as nn
import torch.nn.functional as F

In [16]:
embedding_size = 128
hidden_size = 256
bidirectional = True
batch_size_fit = 1024-256

learning_rate = 0.001

vocab_size = len(vocab._word_to_id)

# <s> and </s> are used in the data files to segment the abstracts into sentences. They don't receive vocab ids.
SENTENCE_START = '<s>'
SENTENCE_END = '</s>'

PAD_TOKEN = '[PAD]' # This has a vocab id, which is used to pad the encoder input, decoder input and target sequence
UNKNOWN_TOKEN = '[UNK]' # This has a vocab id, which is used to represent out-of-vocabulary words
START_DECODING = '[START]' # This has a vocab id, which is used at the start of every decoder input sequence
STOP_DECODING = '[STOP]' # This has a vocab id, which is used at the end of untruncated target sequences

# Note: none of <s>, </s>, [PAD], [UNK], [START], [STOP] should appear in the vocab file.

sos_idx = vocab.word2id(START_DECODING)
eos_idx = vocab.word2id(STOP_DECODING)
pad_idx = vocab.word2id(PAD_TOKEN)
unk_idx = vocab.word2id(UNKNOWN_TOKEN)

In [17]:
class Encoder(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size,numpy_embedding):
        super(Encoder, self).__init__()
        
        self.model_embedding = torch.from_numpy(numpy_embedding).float()
        
        self.embed = nn.Embedding(vocab_size, embed_size)
        #self.embed.weight = nn.Parameter(self.model_embedding)

        self.lstm = nn.LSTM(input_size=embed_size,  #num_layers = 2,
            hidden_size=hidden_size, batch_first=True, 
            bidirectional=True,)

    def forward(self, x):
        #print(x)
        # input: [b x seq]
        embedded = self.embed(x)
        #print(embedded)
        out, h = self.lstm(embedded) # out: [b x seq x hid*2] (biRNN)
        return out, h


In [18]:
class Hypothesis(object):
    def __init__(self, token_id, hidden_state, cell_state, log_prob):
        self._h = hidden_state
        self._c = cell_state
        self.log_prob = log_prob
        self.full_prediction = token_id # list
        self.survivability = self.log_prob/ float(len(self.full_prediction))

    def extend(self, token_id, hidden_state, cell_state, log_prob):
        return Hypothesis(token_id= self.full_prediction + [token_id],
                          hidden_state=hidden_state,
                          cell_state=cell_state,
                          log_prob= self.log_prob + log_prob)

In [19]:
class Pointer_Decoder(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size,numpy_embedding,lamda = 1):
        super(Pointer_Decoder, self).__init__()
        self.lamda = lamda
        self.model_embedding = torch.from_numpy(numpy_embedding).float()
        
        self.embed = nn.Embedding(vocab_size, embed_size)
        #self.embed.weight = nn.Parameter(self.model_embedding)
        
        self.lstm = nn.LSTM(input_size=embed_size,
            hidden_size=hidden_size*2, batch_first=True)
        
        self.V2V = nn.Linear(hidden_size * 4, hidden_size * 4)
        self.V = nn.Linear(hidden_size * 4, vocab_size)
        
        self.Wh = nn.Linear(2 * hidden_size, 2* hidden_size)
        self.Ws = nn.Linear(hidden_size*2, 2*hidden_size)
        self.w_c = nn.Linear(1, 2*hidden_size)
        self.v = nn.Linear(2*hidden_size, 1)

        self.w_h = nn.Linear(hidden_size*2, 1)
        self.w_s = nn.Linear(hidden_size*2, 1)
        self.w_x = nn.Linear(embedding_size, 1)
        
    def forward(self, x, encoder_outputs, encoder_hidden, encoder_cell_state,encoder_inputs, coverage):
        #print(x)
        batch_size = encoder_outputs.size(0)
        enc_max_len = encoder_outputs.size(1)

        embedded = self.embed(x).view(encoder_outputs.size(0),1, -1)
        self.state = (encoder_hidden.view(-1,batch_size,hidden_size*2), encoder_cell_state.view(-1,batch_size,hidden_size*2))

        enc_proj=self.Wh(encoder_outputs.contiguous().view(batch_size,enc_max_len,-1)) # batch_size x enc_max_len x hidden x 2
        
        decoder_outputs, (decoder_hidden, decoder_cell_state) = self.lstm(embedded, self.state) # out: [b x seq x hid*2] (biRNN)
        
        dec_proj = self.Ws(decoder_outputs).expand_as(enc_proj) # batch_size x enc_max_len x hidden x 2

        cov_proj = self.w_c(coverage.view(-1,1)).view(batch_size,enc_max_len,-1) # batch_size x enc_max_len x hidden x 2

        attn_scores = self.v(F.tanh(enc_proj + dec_proj + cov_proj)).view(batch_size, enc_max_len)# batch_size x enc_max_len
        del enc_proj
        del dec_proj
        del cov_proj
        #enc_mask = encoder_inputs.eq_(pad_idx).clone().type(torch.cuda.ByteTensor) # pad 버림

        #attn_scores = attn_scores.masked_fill_(enc_mask, -float(10000)) # pad 버림
        
        attn_scores = F.softmax(attn_scores)

        context = attn_scores.unsqueeze(1).bmm(encoder_outputs) #batch x 1 x hidden x 2

        p_vocab = F.softmax(self.V(self.V2V(torch.cat((decoder_outputs, context), 2)))) #batch x 1 x vocab
        #p_vocab = F.softmax(self.V(torch.cat((decoder_outputs, context), 2))) #batch x 1 x vocab
        p_gen = F.sigmoid(self.w_h(context).squeeze(2) + self.w_s(decoder_outputs).squeeze(2) + self.w_x(embedded).squeeze(2)) #batch x 1

        weighted_Pvocab = p_vocab.squeeze(1) * p_gen #batch x vocab
        del p_vocab
        weighted_attn  = (1-p_gen)*attn_scores #batch x vocab
        
#         구현예정
#         if self.max_article_oov > 0:
#             ext_vocab = Variable(torch.zeros(batch_size, self.max_article_oov).cuda())				#create OOV (but in-article) zero vectors
#             combined_vocab = torch.cat((weighted_Pvocab, ext_vocab), 1)			
#             del ext_vocab
#         else:
#             combined_vocab = weighted_Pvocab

        combined_vocab  = weighted_Pvocab #batch x vocab
    
        enc_mask = encoder_inputs.clone()
        
        combined_vocab =combined_vocab.scatter_add(1, enc_mask, weighted_attn) #batch x vocab
        del enc_mask
        
        return combined_vocab, decoder_hidden, decoder_cell_state, attn_scores, p_gen, attn_scores
        
        
    def decode_step(self, x, encoder_outputs, encoder_hidden, encoder_cell_state, encoder_inputs):
        #print(x)
        batch_size = encoder_outputs.size(0)
        enc_max_len = encoder_outputs.size(1)
        coverage =  Variable(torch.zeros(batch_size, enc_max_len)).cuda()
        
        embedded = self.embed(x).view(encoder_outputs.size(0),1, -1)
        state = (encoder_hidden.view(-1,batch_size,hidden_size*2), encoder_cell_state.view(-1,batch_size,hidden_size*2))

        enc_proj=self.Wh(encoder_outputs.contiguous().view(batch_size,enc_max_len,-1)) # batch_size x enc_max_len x hidden x 2

        decoder_outputs, (decoder_hidden, decoder_cell_state) = self.lstm(embedded, state) # out: [b x seq x hid*2] (biRNN)

        dec_proj = self.Ws(decoder_outputs).expand_as(enc_proj) # batch_size x enc_max_len x hidden x 2

        cov_proj = self.w_c(coverage.view(-1,1)).view(batch_size,enc_max_len,-1) # batch_size x enc_max_len x hidden x 2

        attn_scores = self.v(F.tanh(enc_proj + dec_proj + cov_proj)).view(batch_size, enc_max_len)# batch_size x enc_max_len
        #print(enc_proj.size(),dec_proj.size(),cov_proj.size())
        enc_mask = encoder_inputs.eq_(pad_idx).detach().type(torch.cuda.ByteTensor) # pad 버림

        attn_scores = attn_scores.masked_fill_(enc_mask, -float(10000)) # pad 버림

        attn_scores = F.softmax(attn_scores)

        context = attn_scores.unsqueeze(1).bmm(encoder_outputs) #batch x 1 x hidden x 2

        p_vocab = F.softmax(self.V(torch.cat((decoder_outputs, context), 2))) #batch x 1 x vocab

        p_gen = F.sigmoid(self.w_h(context).squeeze(2) + self.w_s(decoder_outputs).squeeze(2) + self.w_x(embedded).squeeze(2)) #batch x 1

        weighted_Pvocab = p_vocab.squeeze(1) * p_gen #batch x vocab

        weighted_attn  = (1-p_gen)*attn_scores #batch x vocab

        combined_vocab  = weighted_Pvocab #batch x vocab

        combined_vocab =combined_vocab.scatter_add(1, encoder_inputs, weighted_attn) #batch x vocab


        return combined_vocab, decoder_hidden, decoder_cell_state

    def getOverallTopk(self, vocab_probs, _h, _c, all_hyps, results):
        # return top-k values i.e. top-k over all beams i.e. next step input ids
        # return hidden, cell states corresponding to topk
        probs, inds = vocab_probs.topk(k=2, dim=1)
        probs = probs.log().data
        inds = inds.data
        #inds.add_(1)
        candidates = []

        assert len(all_hyps) == probs.size(0), '# Hypothesis and log-prob size dont match'
        # cycle through all hypothesis in full beam
        for i, hypo in enumerate(probs.tolist()):
            for j, _ in enumerate(hypo):
                new_cand = all_hyps[i].extend(token_id=inds[i,j],
                                              hidden_state=_h[i].unsqueeze(0),
                                              cell_state=_c[i].unsqueeze(0),
                                              log_prob= probs[i,j])
                candidates.append(new_cand)
        # sort in descending order
        candidates = sorted(candidates, key=lambda x:x.survivability, reverse=True)
        new_beam, next_inp = [], []
        next_h, next_c = [], []
        #prune hypotheses and generate new beam
        for h in candidates:
            if h.full_prediction[-1] == eos_idx:
                # weed out small sentences that likely have no meaning
                if len(h.full_prediction)>=5:
                    results.append(h.full_prediction)
            else:
                new_beam.append(h)
                next_inp.append(h.full_prediction[-1])
                next_h.append(h._h.data)
                next_c.append(h._c.data)
            if len(new_beam) >= 2:
                break
        assert len(new_beam) >= 1, 'Non-existent beam'

        return new_beam, torch.LongTensor([next_inp]), results, torch.cat(next_h, 0), torch.cat(next_c, 0)

    def decode(self, encoder_outputs, decoder_hidden, decoder_cell_state, encoder_inputs):
        _input = Variable(torch.LongTensor([[sos_idx]]).cuda(), volatile=True)
        decoded_outputs = []
        all_hyps = [Hypothesis([sos_idx], None, None, 0)]
        for _step in range(dec_max_len):
            print(_input)
            curr_beam_size = _input.size(0)
            enc_states = encoder_outputs.clone()
            beam_enc_states = enc_states.expand(curr_beam_size, enc_states.size(1), enc_states.size(2)).contiguous().detach()
            beam_article_inds  = encoder_inputs.clone().expand(curr_beam_size, x_.size(1)).detach()

            vocab_probs, next_h, next_c = self.decode_step( _input, beam_enc_states, decoder_hidden, decoder_cell_state, beam_article_inds)

            all_hyps, decode_inds, decoded_outputs, init_h, init_c = self.getOverallTopk(vocab_probs, next_h.view(curr_beam_size,1,-1), \
                                                                                    next_c.view(curr_beam_size,1,-1), all_hyps, decoded_outputs)
            #decode_inds.masked_fill_((decode_inds > vocab_size), unk_idx)
            
            decode_inds = decode_inds.t()
            _input = Variable(decode_inds.cuda(), volatile=True)
            decoder_hidden = Variable(init_h,volatile=True)
            decoder_cell_state = Variable(init_c,volatile=True)

        non_terminal_output = [item.full_prediction for item in all_hyps]
        all_outputs = decoded_outputs + non_terminal_output
        return all_outputs

In [20]:
encoder = Encoder(vocab_size, embedding_size, hidden_size, model_embedding).cuda()

In [21]:
decoder = Pointer_Decoder(vocab_size, embedding_size, hidden_size,model_embedding,lamda = 1).cuda()

In [22]:
encoder,decoder

(Encoder(
   (embed): Embedding(50000, 128)
   (lstm): LSTM(128, 256, batch_first=True, bidirectional=True)
 ), Pointer_Decoder(
   (embed): Embedding(50000, 128)
   (lstm): LSTM(128, 512, batch_first=True)
   (V2V): Linear(in_features=1024, out_features=1024)
   (V): Linear(in_features=1024, out_features=50000)
   (Wh): Linear(in_features=512, out_features=512)
   (Ws): Linear(in_features=512, out_features=512)
   (w_c): Linear(in_features=1, out_features=512)
   (v): Linear(in_features=512, out_features=1)
   (w_h): Linear(in_features=512, out_features=1)
   (w_s): Linear(in_features=512, out_features=1)
   (w_x): Linear(in_features=128, out_features=1)
 ))

In [23]:
NLL = torch.nn.NLLLoss(ignore_index = pad_idx)

optimizer_encoder = torch.optim.Adam(encoder.parameters(), lr=learning_rate)
optimizer_decoder = torch.optim.Adam(decoder.parameters(), lr=learning_rate)

In [24]:
import random

def batch(batch_size,input_var,dec_input_var,target_var,length_var, dec_length_var):

    shuffle_list = list(zip(input_var,dec_input_var,target_var,length_var,dec_length_var))
    random.shuffle(shuffle_list)
    
    start = 0
    end = batch_size
    #if len(input_var)%32 != 0:
    while end < len(input_var):
        batch_input = []
        batch_input_dec = []
        batch_target = []
        batch_length = []
        batch_length_dec = []
        
        batch_shuffle = shuffle_list[start:end]
        
        for i,j,k,n,m in batch_shuffle:
            batch_input.append(i)
            batch_input_dec.append(j)
            batch_target.append(k)
            batch_length.append(n)
            batch_length_dec.append(m)
            
        temp = end
        end  = end + batch_size
        start = temp
        yield batch_input, batch_input_dec, batch_target, batch_length, batch_length_dec
        
    if end >= len(input_var):
        batch_input = []
        batch_input_dec = []
        batch_target = []
        batch_length = []
        batch_length_dec = []
        batch_shuffle = shuffle_list[start:]
        
        for i,j,k,n,m in batch_shuffle:
            batch_input.append(i)
            batch_input_dec.append(j)
            batch_target.append(k)
            batch_length.append(n)
            batch_length_dec.append(m)
        yield batch_input, batch_input_dec, batch_target, batch_length, batch_length_dec

In [25]:
epochs = 200
max_target_len = 20
clip = 2.0
teacher_forcing_ratio = 0.0

In [None]:
paths = ['./chunked/train_*.bin', './chunked/val_*.bin']

In [None]:
step = 0
var_losses = []
train_losses = []
avg_losses = []
coverege_losses = []
iteration = 0
lamda = 1
for epoch in tqdm.tqdm(range(epochs+1)):
    optimizer_encoder.zero_grad()
    optimizer_decoder.zero_grad()
    for path in paths :
        print(path)
        input_index, inputs_len, dec_inputs_index, dec_inputs_len, target_index = index_data[path]
        
        for batch_x, batch_y_x, batch_y, batch_len, batch_len_y in batch(batch_size_fit, input_index, \
                                                                         dec_inputs_index,target_index, \
                                                                         inputs_len, dec_inputs_len):
            optimizer_encoder.zero_grad()
            optimizer_decoder.zero_grad()

            iteration = iteration + 1

            if path == paths[0]:
                encoder.train()
                decoder.train()
            else:
                encoder.eval()
                decoder.eval()

            x_ = Variable(torch.cuda.LongTensor(batch_x))
            #x_index = Variable(torch.cuda.LongTensor(batch_x))
            y_ = Variable(torch.cuda.LongTensor(batch_y)).transpose(1,0)
            batch_size = x_.size(0)


            encoder_outputs, (encoder_hidden, encoder_cell_state) = encoder(x_)
            
            decoder_input=Variable(torch.cuda.LongTensor(batch_size)).fill_(sos_idx)
            decoder_hidden = encoder_hidden#[-2:]
            decoder_cell_state = encoder_cell_state#[-2:]
            
            coverage =  Variable(torch.zeros(batch_size, enc_max_len)).cuda()
            
            loss = 0
            coverege_loss_ = 0
            use_teacher_forcing = random.random() < teacher_forcing_ratio
            if use_teacher_forcing:# or path == paths[1]:
                print('use_teacher_forcing, '+path)
                for i in range(max_target_len):
                    P_vocab, decoder_hidden, decoder_cell_state, attn_scores, p_gen, attn_scores = decoder(\
                                                        decoder_input,encoder_outputs, decoder_hidden, decoder_cell_state,\
                                                                                                        x_,coverage)

                    
                    
                    _c_loss , index=torch.stack((coverage,attn_scores),2).min(2)
                    #print(index[0])
                    coverege_loss = _c_loss.type(torch.cuda.DoubleTensor).sum(1)
                    coverege_loss = coverege_loss.sum().div(batch_size)
                    coverage =coverage+ attn_scores
                    topv, topi = P_vocab.data.topk(1)

                    nllloss = NLL(torch.log(P_vocab),y_[i])
                    
                    total_loss = nllloss + lamda*coverege_loss.type(torch.cuda.FloatTensor)
                    loss += total_loss
                    
                    coverege_loss_ += coverege_loss
                    #decoder_hidden = decoder_hidden.squeeze(0)
                    decoder_input = Variable(topi).squeeze(0)
            else:
                for i in range(max_target_len):
                    #print(coverage)
                    P_vocab, decoder_hidden, decoder_cell_state, attn_scores, p_gen, attn_scores = decoder(\
                                                                decoder_input,encoder_outputs, decoder_hidden, decoder_cell_state,\
                                                                                                           x_,coverage)
                    #print('attn_scores')
                    #print(attn_scores)
                    
                    _c_loss , index=torch.stack((coverage,attn_scores),2).min(2)
                    #print(index[0].sum())
                    coverege_loss = _c_loss.type(torch.cuda.DoubleTensor).sum(1)
                    coverege_loss = coverege_loss.sum().div(batch_size)
                    coverage =coverage+ attn_scores
                    topv, topi = P_vocab.data.topk(1)

                    nllloss = NLL(torch.log(P_vocab),y_[i])
                    total_loss = nllloss + lamda*coverege_loss.type(torch.cuda.FloatTensor)
                    loss += total_loss
                    
                    coverege_loss_ += coverege_loss
                    #decoder_hidden = decoder_hidden.squeeze(0)
                    decoder_input = y_[i]
            
            if path == paths[0]:

                loss.backward()
                #print('a')
                torch.nn.utils.clip_grad_norm(encoder.parameters(), clip)
                torch.nn.utils.clip_grad_norm(decoder.parameters(), clip)
                optimizer_encoder.step()
                optimizer_decoder.step()
                
                loss = loss.data[0]/max_target_len
                c_loss = coverege_loss_.data[0]/max_target_len
                coverege_losses.append(c_loss)
                train_losses.append(loss)
                

                
                step += 1

                if iteration % 10 == 0 or iteration == (len(input_index)-1)//batch_size:
                    print("Batch %04d/%i, Loss %9.4f, coverage Loss %9.4f"%( iteration, (len(input_index)-1)//batch_size_fit, loss, c_loss))
                    np.savez(L=train_losses,file='./train_loss.npz')
                    np.savez(L=coverege_losses,file='./coverege_loss.npz')
                    checkpoint_path_encoder = os.path.join(save_model_path, "enc_E%i.pytorch"%(epoch))
                    checkpoint_path_decoder = os.path.join(save_model_path, "dec_E%i.pytorch"%(epoch))
                    torch.save(encoder, checkpoint_path_encoder)
                    torch.save(decoder, checkpoint_path_decoder)
                    


                del nllloss
                del loss
                del x_
                del y_
                del total_loss
                
            else:

                loss = loss.data[0]/max_target_len

                var_losses.append(loss)

                step += 1

                if iteration % 10 == 0 or iteration == (len(input_index)-1)//batch_size:
                    print("Valid Batch %04d/%i, Loss %9.4f"%( iteration, (len(input_index)-1)//batch_size_fit, loss))
                    np.savez(L=var_losses,file='./var_loss.npz')
                    


                del nllloss
                del loss
                del x_
                del y_
                del total_loss
                
            optimizer_encoder.zero_grad()
            optimizer_decoder.zero_grad()
        iteration = 0
        
    print("Model saved at %s"%save_model_path)
    print("Epoch %02d/%i, Mean ELBO %9.4f"%( epoch, epochs, np.mean(np.array(var_losses))))
    avg_losses.append(np.mean(np.array(var_losses)))
    np.savez(L=avg_losses,file='./avg_losses.npz')
        

  0%|          | 0/201 [00:00<?, ?it/s]

./chunked/train_*.bin




Batch 0010/112, Loss    5.9769, coverage Loss    0.9496


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


Batch 0020/112, Loss    5.8146, coverage Loss    0.9477
Batch 0030/112, Loss    5.8074, coverage Loss    0.9470
Batch 0040/112, Loss    5.7000, coverage Loss    0.9463
Batch 0050/112, Loss    5.5244, coverage Loss    0.9419
Batch 0060/112, Loss    5.3730, coverage Loss    0.9142
Batch 0070/112, Loss    5.1640, coverage Loss    0.8416
Batch 0080/112, Loss    5.0445, coverage Loss    0.7822
Batch 0090/112, Loss    4.8428, coverage Loss    0.6988
Batch 0100/112, Loss    4.6473, coverage Loss    0.5747
Batch 0110/112, Loss    4.4479, coverage Loss    0.4380
Batch 0112/112, Loss    4.4427, coverage Loss    0.4292
./chunked/val_*.bin
Valid Batch 0005/5, Loss    4.2339


  0%|          | 1/201 [03:46<12:36:37, 226.99s/it]

Model saved at won/2018-Aug-01-01-05-16
Epoch 00/200, Mean ELBO    4.1789
./chunked/train_*.bin
Batch 0010/112, Loss    4.2786, coverage Loss    0.3585
Batch 0020/112, Loss    4.2408, coverage Loss    0.3336
Batch 0030/112, Loss    4.0624, coverage Loss    0.3143
Batch 0040/112, Loss    4.1015, coverage Loss    0.3072
Batch 0050/112, Loss    4.0548, coverage Loss    0.2916
Batch 0060/112, Loss    3.9083, coverage Loss    0.2696
Batch 0070/112, Loss    3.9090, coverage Loss    0.2580
Batch 0080/112, Loss    3.8889, coverage Loss    0.2447
Batch 0090/112, Loss    3.8718, coverage Loss    0.2530
Batch 0100/112, Loss    3.9203, coverage Loss    0.2609
Batch 0110/112, Loss    3.8408, coverage Loss    0.2475
Batch 0112/112, Loss    3.7961, coverage Loss    0.2337
./chunked/val_*.bin
Valid Batch 0005/5, Loss    3.7666


  1%|          | 2/201 [07:32<12:30:50, 226.39s/it]

Model saved at won/2018-Aug-01-01-05-16
Epoch 01/200, Mean ELBO    3.9182
./chunked/train_*.bin
Batch 0010/112, Loss    3.5523, coverage Loss    0.2243
Batch 0020/112, Loss    3.5503, coverage Loss    0.2205
Batch 0030/112, Loss    3.5781, coverage Loss    0.2235
Batch 0040/112, Loss    3.4818, coverage Loss    0.2107
Batch 0050/112, Loss    3.4982, coverage Loss    0.1977
Batch 0060/112, Loss    3.5365, coverage Loss    0.1979
Batch 0070/112, Loss    3.5388, coverage Loss    0.2054
Batch 0080/112, Loss    3.5664, coverage Loss    0.1980
Batch 0090/112, Loss    3.4976, coverage Loss    0.2107
Batch 0100/112, Loss    3.4903, coverage Loss    0.1962
Batch 0110/112, Loss    3.5428, coverage Loss    0.2003
Batch 0112/112, Loss    3.4665, coverage Loss    0.1962
./chunked/val_*.bin
Valid Batch 0005/5, Loss    3.6257


  1%|▏         | 3/201 [11:19<12:27:23, 226.48s/it]

Model saved at won/2018-Aug-01-01-05-16
Epoch 02/200, Mean ELBO    3.7750
./chunked/train_*.bin
Batch 0010/112, Loss    3.2500, coverage Loss    0.1907
Batch 0020/112, Loss    3.2510, coverage Loss    0.1853
Batch 0030/112, Loss    3.2462, coverage Loss    0.1833
Batch 0040/112, Loss    3.2370, coverage Loss    0.1807
Batch 0050/112, Loss    3.1510, coverage Loss    0.1712
Batch 0060/112, Loss    3.2314, coverage Loss    0.1751
Batch 0070/112, Loss    3.2741, coverage Loss    0.1687
Batch 0080/112, Loss    3.2948, coverage Loss    0.1736
Batch 0090/112, Loss    3.1932, coverage Loss    0.1808
Batch 0100/112, Loss    3.3089, coverage Loss    0.1760
Batch 0110/112, Loss    3.1907, coverage Loss    0.1827
Batch 0112/112, Loss    3.2156, coverage Loss    0.1761
./chunked/val_*.bin
Valid Batch 0005/5, Loss    3.5215


  2%|▏         | 4/201 [15:08<12:26:04, 227.23s/it]

Model saved at won/2018-Aug-01-01-05-16
Epoch 03/200, Mean ELBO    3.6814
./chunked/train_*.bin
Batch 0010/112, Loss    3.0264, coverage Loss    0.1588
Batch 0020/112, Loss    3.0542, coverage Loss    0.1612
Batch 0030/112, Loss    2.9294, coverage Loss    0.1584
Batch 0040/112, Loss    2.9467, coverage Loss    0.1535
Batch 0050/112, Loss    3.0060, coverage Loss    0.1558
Batch 0060/112, Loss    2.9683, coverage Loss    0.1555
Batch 0070/112, Loss    3.0201, coverage Loss    0.1571
Batch 0080/112, Loss    2.9756, coverage Loss    0.1492
Batch 0090/112, Loss    2.9903, coverage Loss    0.1508
Batch 0100/112, Loss    3.0355, coverage Loss    0.1568
Batch 0110/112, Loss    3.0442, coverage Loss    0.1566
Batch 0112/112, Loss    2.9948, coverage Loss    0.1566
./chunked/val_*.bin
Valid Batch 0005/5, Loss    3.4716


  2%|▏         | 5/201 [18:57<12:23:11, 227.51s/it]

Model saved at won/2018-Aug-01-01-05-16
Epoch 04/200, Mean ELBO    3.6177
./chunked/train_*.bin
Batch 0010/112, Loss    2.8272, coverage Loss    0.1530
Batch 0020/112, Loss    2.7950, coverage Loss    0.1459
Batch 0030/112, Loss    2.7172, coverage Loss    0.1423
Batch 0040/112, Loss    2.7827, coverage Loss    0.1358
Batch 0050/112, Loss    2.8053, coverage Loss    0.1374
Batch 0060/112, Loss    2.8267, coverage Loss    0.1436
Batch 0070/112, Loss    2.8284, coverage Loss    0.1382
Batch 0080/112, Loss    2.8500, coverage Loss    0.1418
Batch 0090/112, Loss    2.8646, coverage Loss    0.1403
Batch 0100/112, Loss    2.8428, coverage Loss    0.1375
Batch 0110/112, Loss    2.7710, coverage Loss    0.1370
Batch 0112/112, Loss    2.8138, coverage Loss    0.1348
./chunked/val_*.bin
Valid Batch 0005/5, Loss    3.4268


  3%|▎         | 6/201 [22:45<12:19:23, 227.51s/it]

Model saved at won/2018-Aug-01-01-05-16
Epoch 05/200, Mean ELBO    3.5712
./chunked/train_*.bin
Batch 0010/112, Loss    2.5669, coverage Loss    0.1289
Batch 0020/112, Loss    2.5166, coverage Loss    0.1256
Batch 0030/112, Loss    2.5214, coverage Loss    0.1260
Batch 0040/112, Loss    2.5885, coverage Loss    0.1309
Batch 0050/112, Loss    2.6357, coverage Loss    0.1279
Batch 0060/112, Loss    2.6249, coverage Loss    0.1277
Batch 0070/112, Loss    2.5938, coverage Loss    0.1246
Batch 0080/112, Loss    2.6676, coverage Loss    0.1290
Batch 0090/112, Loss    2.6971, coverage Loss    0.1306
Batch 0100/112, Loss    2.5993, coverage Loss    0.1208
Batch 0110/112, Loss    2.6293, coverage Loss    0.1182
Batch 0112/112, Loss    2.6929, coverage Loss    0.1271
./chunked/val_*.bin
Valid Batch 0005/5, Loss    3.4167


  3%|▎         | 7/201 [26:31<12:15:17, 227.41s/it]

Model saved at won/2018-Aug-01-01-05-16
Epoch 06/200, Mean ELBO    3.5402
./chunked/train_*.bin
Batch 0010/112, Loss    2.4383, coverage Loss    0.1149
Batch 0020/112, Loss    2.4080, coverage Loss    0.1119
Batch 0030/112, Loss    2.4884, coverage Loss    0.1102
Batch 0040/112, Loss    2.5177, coverage Loss    0.1120
Batch 0050/112, Loss    2.4045, coverage Loss    0.1123
Batch 0060/112, Loss    2.4662, coverage Loss    0.1124
Batch 0070/112, Loss    2.5537, coverage Loss    0.1110
Batch 0080/112, Loss    2.4999, coverage Loss    0.1113
Batch 0090/112, Loss    2.4847, coverage Loss    0.1112
Batch 0100/112, Loss    2.5186, coverage Loss    0.1133
Batch 0110/112, Loss    2.5265, coverage Loss    0.1141
Batch 0112/112, Loss    2.5195, coverage Loss    0.1132
./chunked/val_*.bin
Valid Batch 0005/5, Loss    3.5587


  4%|▍         | 8/201 [30:19<12:11:46, 227.50s/it]

Model saved at won/2018-Aug-01-01-05-16
Epoch 07/200, Mean ELBO    3.5238
./chunked/train_*.bin
Batch 0010/112, Loss    2.2624, coverage Loss    0.1064
Batch 0020/112, Loss    2.3452, coverage Loss    0.1016
Batch 0030/112, Loss    2.2612, coverage Loss    0.1019
Batch 0040/112, Loss    2.3212, coverage Loss    0.0978
Batch 0050/112, Loss    2.3420, coverage Loss    0.0979
Batch 0060/112, Loss    2.2926, coverage Loss    0.0970
Batch 0070/112, Loss    2.3038, coverage Loss    0.1005
Batch 0080/112, Loss    2.4067, coverage Loss    0.1032
Batch 0090/112, Loss    2.3581, coverage Loss    0.1055
Batch 0100/112, Loss    2.3660, coverage Loss    0.1036
Batch 0110/112, Loss    2.3558, coverage Loss    0.0996
Batch 0112/112, Loss    2.4054, coverage Loss    0.0988
./chunked/val_*.bin
Valid Batch 0005/5, Loss    3.4622


  4%|▍         | 9/201 [34:08<12:08:27, 227.64s/it]

Model saved at won/2018-Aug-01-01-05-16
Epoch 08/200, Mean ELBO    3.5114
./chunked/train_*.bin
Batch 0010/112, Loss    2.2020, coverage Loss    0.0970
Batch 0020/112, Loss    2.1455, coverage Loss    0.0919
Batch 0030/112, Loss    2.1556, coverage Loss    0.0903
Batch 0040/112, Loss    2.1738, coverage Loss    0.0891
Batch 0050/112, Loss    2.2096, coverage Loss    0.0931
Batch 0060/112, Loss    2.1502, coverage Loss    0.0926
Batch 0070/112, Loss    2.2073, coverage Loss    0.0915
Batch 0080/112, Loss    2.2238, coverage Loss    0.0909
Batch 0090/112, Loss    2.2973, coverage Loss    0.0922
Batch 0100/112, Loss    2.2098, coverage Loss    0.0924
Batch 0110/112, Loss    2.2690, coverage Loss    0.0899
Batch 0112/112, Loss    2.2787, coverage Loss    0.0894
./chunked/val_*.bin
Valid Batch 0005/5, Loss    3.5326


  5%|▍         | 10/201 [37:56<12:04:33, 227.61s/it]

Model saved at won/2018-Aug-01-01-05-16
Epoch 09/200, Mean ELBO    3.5093
./chunked/train_*.bin
Batch 0010/112, Loss    2.0423, coverage Loss    0.0863
Batch 0020/112, Loss    2.0620, coverage Loss    0.0860
Batch 0030/112, Loss    2.0845, coverage Loss    0.0878
Batch 0040/112, Loss    2.1189, coverage Loss    0.0790
Batch 0050/112, Loss    2.1080, coverage Loss    0.0828
Batch 0060/112, Loss    2.0512, coverage Loss    0.0834
Batch 0070/112, Loss    2.1264, coverage Loss    0.0890
Batch 0080/112, Loss    2.1459, coverage Loss    0.0869
Batch 0090/112, Loss    2.0990, coverage Loss    0.0835
Batch 0100/112, Loss    2.1418, coverage Loss    0.0800
Batch 0110/112, Loss    2.1816, coverage Loss    0.0858
Batch 0112/112, Loss    2.1859, coverage Loss    0.0825
./chunked/val_*.bin
Valid Batch 0005/5, Loss    3.5926


  5%|▌         | 11/201 [41:43<12:00:46, 227.61s/it]

Model saved at won/2018-Aug-01-01-05-16
Epoch 10/200, Mean ELBO    3.5089
./chunked/train_*.bin
Batch 0010/112, Loss    1.9135, coverage Loss    0.0765
Batch 0020/112, Loss    1.9369, coverage Loss    0.0749
Batch 0030/112, Loss    1.9587, coverage Loss    0.0760
Batch 0040/112, Loss    2.0085, coverage Loss    0.0754
Batch 0050/112, Loss    1.9731, coverage Loss    0.0728
Batch 0060/112, Loss    1.9713, coverage Loss    0.0743
Batch 0070/112, Loss    1.9745, coverage Loss    0.0733
Batch 0080/112, Loss    1.9983, coverage Loss    0.0753
Batch 0090/112, Loss    2.0030, coverage Loss    0.0765
Batch 0100/112, Loss    2.0371, coverage Loss    0.0787
Batch 0110/112, Loss    2.0001, coverage Loss    0.0741
Batch 0112/112, Loss    2.0213, coverage Loss    0.0759
./chunked/val_*.bin
Valid Batch 0005/5, Loss    3.7255


  6%|▌         | 12/201 [45:30<11:56:52, 227.58s/it]

Model saved at won/2018-Aug-01-01-05-16
Epoch 11/200, Mean ELBO    3.5163
./chunked/train_*.bin
Batch 0010/112, Loss    1.8601, coverage Loss    0.0733
Batch 0020/112, Loss    1.7991, coverage Loss    0.0683
Batch 0030/112, Loss    1.8892, coverage Loss    0.0690
Batch 0040/112, Loss    1.8506, coverage Loss    0.0678
Batch 0050/112, Loss    1.9108, coverage Loss    0.0692
Batch 0060/112, Loss    1.9104, coverage Loss    0.0676
Batch 0070/112, Loss    1.9083, coverage Loss    0.0660
Batch 0080/112, Loss    1.9064, coverage Loss    0.0698
Batch 0090/112, Loss    1.8824, coverage Loss    0.0673
Batch 0100/112, Loss    1.9595, coverage Loss    0.0654
Batch 0110/112, Loss    1.9375, coverage Loss    0.0660
Batch 0112/112, Loss    1.9665, coverage Loss    0.0677
./chunked/val_*.bin
Valid Batch 0005/5, Loss    3.7388


  6%|▋         | 13/201 [49:18<11:53:11, 227.61s/it]

Model saved at won/2018-Aug-01-01-05-16
Epoch 12/200, Mean ELBO    3.5239
./chunked/train_*.bin
Batch 0010/112, Loss    1.7231, coverage Loss    0.0621
Batch 0020/112, Loss    1.7144, coverage Loss    0.0611
Batch 0030/112, Loss    1.7637, coverage Loss    0.0609
Batch 0040/112, Loss    1.8077, coverage Loss    0.0636
Batch 0050/112, Loss    1.8114, coverage Loss    0.0617
Batch 0060/112, Loss    1.7683, coverage Loss    0.0590
Batch 0070/112, Loss    1.8153, coverage Loss    0.0612
Batch 0080/112, Loss    1.8322, coverage Loss    0.0612
Batch 0090/112, Loss    1.8663, coverage Loss    0.0617
Batch 0100/112, Loss    1.8455, coverage Loss    0.0631
Batch 0110/112, Loss    1.8858, coverage Loss    0.0655
Batch 0112/112, Loss    1.8850, coverage Loss    0.0615
./chunked/val_*.bin
Valid Batch 0005/5, Loss    3.8539


  7%|▋         | 14/201 [53:07<11:49:40, 227.70s/it]

Model saved at won/2018-Aug-01-01-05-16
Epoch 13/200, Mean ELBO    3.5374
./chunked/train_*.bin
Batch 0010/112, Loss    1.7029, coverage Loss    0.0590
Batch 0020/112, Loss    1.7124, coverage Loss    0.0571
Batch 0030/112, Loss    1.6960, coverage Loss    0.0573
Batch 0040/112, Loss    1.7211, coverage Loss    0.0560
Batch 0050/112, Loss    1.6589, coverage Loss    0.0542
Batch 0060/112, Loss    1.6795, coverage Loss    0.0548
Batch 0070/112, Loss    1.7096, coverage Loss    0.0576
Batch 0080/112, Loss    1.7381, coverage Loss    0.0534
Batch 0090/112, Loss    1.7635, coverage Loss    0.0572
Batch 0100/112, Loss    1.7362, coverage Loss    0.0537
Batch 0110/112, Loss    1.7850, coverage Loss    0.0558
Batch 0112/112, Loss    1.7869, coverage Loss    0.0551
./chunked/val_*.bin
Valid Batch 0005/5, Loss    3.8050


  7%|▋         | 15/201 [56:55<11:45:53, 227.70s/it]

Model saved at won/2018-Aug-01-01-05-16
Epoch 14/200, Mean ELBO    3.5519
./chunked/train_*.bin
Batch 0010/112, Loss    1.5877, coverage Loss    0.0513
Batch 0020/112, Loss    1.5674, coverage Loss    0.0512
Batch 0030/112, Loss    1.6097, coverage Loss    0.0522
Batch 0040/112, Loss    1.6238, coverage Loss    0.0478
Batch 0050/112, Loss    1.6845, coverage Loss    0.0523
Batch 0060/112, Loss    1.7038, coverage Loss    0.0540
Batch 0070/112, Loss    1.6523, coverage Loss    0.0522
Batch 0080/112, Loss    1.6892, coverage Loss    0.0513
Batch 0090/112, Loss    1.6367, coverage Loss    0.0516
Batch 0100/112, Loss    1.6660, coverage Loss    0.0542


In [None]:
save_model_path,epoch

In [None]:
save_model_path = os.path.join('won', '2018-Jul-31-09-36-35')

In [None]:
checkpoint_path_encoder = os.path.join(save_model_path, 'enc_E22.pytorch')
checkpoint_path_decoder = os.path.join(save_model_path, 'dec_E22.pytorch')

In [None]:
encoder = torch.load(checkpoint_path_encoder)
decoder = torch.load(checkpoint_path_decoder)

In [None]:
encoder.eval()
decoder.eval()
print(encoder,decoder)

# greedy generator

In [None]:
(input_index,inputs_len,dec_inputs_index,dec_inputs_len,target_index) = index_data[files_group[0]]

In [None]:
start = 0
end = 1000

In [None]:
x_ = Variable(torch.cuda.LongTensor(input_index[start:end]))

batch_size = x_.size(0)

encoder_outputs, (encoder_hidden, encoder_cell_state) = encoder(x_)

In [None]:
decoder_input=Variable(torch.cuda.LongTensor(batch_size)).fill_(sos_idx)
decoder_hidden = encoder_hidden#[-2:]
decoder_cell_state = encoder_cell_state#[-2:]
coverage =  Variable(torch.zeros(batch_size, enc_max_len)).cuda()

a=decoder.decode( encoder_outputs, decoder_hidden, decoder_cell_state, x_)

b=list(map(vocab.id2word,a[0]))
c=list(map(vocab.id2word,a[1]))
d=list(map(vocab.id2word,target_index[start:end][0]))

rouge.get_scores(' '.join(d), ' '.join(c))

In [None]:

decoded_words = []
decoder_attentions = torch.zeros(encoder_outputs.size(0),dec_max_len+20, enc_max_len)
for i in range(dec_max_len):
    #print(decoder_input,decoder_hidden)
    P_vocab, decoder_hidden, decoder_cell_state, attn_scores, p_gen, decoder_attention = decoder(\
                                                        decoder_input,encoder_outputs, decoder_hidden, decoder_cell_state,\
                                                                                                        x_,coverage)
    coverage = coverage+attn_scores
    decoder_attentions[:attn_scores.size(0),i,:attn_scores.size(1)] += attn_scores.cpu().data
    topv, topi = P_vocab.data.topk(1)
    #ni = topi[0]
    '''
    if ni == eos_idx:
        decoded_words.append(_EOS_)
        break
    else:
        decoded_words.append(index2word[ni])
    '''
    temp = []
    if len(topi.size()) == 1:
        temp.append(vocab.id2word(topi.cpu().numpy()[0]))
    else:
        for top in topi.cpu().numpy():
            temp.append(vocab.id2word(top[0]))
    decoded_words.append(temp)
    decoder_input = Variable(topi).cuda()
    decoder_hidden = decoder_hidden.squeeze()
    if len(decoder_hidden.size()) == 1:
        decoder_hidden = decoder_hidden.unsqueeze(0)
    else: 
        decoder_hidden = decoder_hidden
del coverage


In [None]:
inferences = []
for decoded_sent in np.array(decoded_words).transpose():
    for i,word in enumerate(decoded_sent):
        if word == STOP_DECODING:
            decoded_sent = decoded_sent[:i+1]

    inferences.append(list(decoded_sent))


targets_result = []
for inputs in target_index[start:end]:
    result = []
    for word in inputs:
        if word == eos_idx:
            break
        else:
            result.append(vocab.id2word(word))
    targets_result.append(result)    

inputs_result = []
for inputs in input_index[start:end]:
    result = []
    for word in inputs:
        result.append(vocab.id2word(word))
    inputs_result.append(result)

In [None]:
for source,target,inference, decoder_attention in zip(inputs_result[:10], targets_result[:10], inferences[:10], decoder_attentions[:10]):
    print('*********************************************************************')
    print('source : '+' '.join(source))
    
    print('*********************************************************************')
    print('target : '+' '.join(target))
    
    print('*********************************************************************')
    print('inference : '+' '.join(inference))
    
    print('*********************************************************************')
    scores = rouge.get_scores(' '.join(target), ' '.join(inference))
    for score in scores[0]:
        print(score +' : '+str(scores[0][score]))

    plt.matshow(decoder_attention.numpy())

In [None]:
r1_recall = []
r2_recall = []
rl_recall = []

r1_f1 = []
r2_f1 = []
rl_f1 = []

error = 0

for source,target,inference in tqdm.tqdm(zip(inputs_result, targets_result, inferences)):#, decoder_attentions)):
    try:
        scores = rouge.get_scores(' '.join(target), ' '.join(inference))
    except ValueError:
        error +=1
        print('******************************************')
        print('target'+' '.join(target))
        print('inference'+' '.join(inference))
        pass
    r1_recall.append(scores[0]['rouge-1']['r'])
    r2_recall.append(scores[0]['rouge-2']['r'])
    rl_recall.append(scores[0]['rouge-l']['r'])
    
    r1_f1.append(scores[0]['rouge-1']['f'])
    r2_f1.append(scores[0]['rouge-2']['f'])
    rl_f1.append(scores[0]['rouge-l']['f'])
print('error sentence : ' + str(error))

In [None]:
print('ROUGE-1 recall : ' + str(sum(r1_recall)*100 / len(r1_recall)))
print('ROUGE-1 F1 : ' + str(sum(r1_f1)*100 / len(r1_f1)))
print('********************************************************************')
print('ROUGE-2 recall : ' + str(sum(r2_recall)*100 / len(r2_recall)))
print('ROUGE-2 F1 : ' + str(sum(r2_f1)*100 / len(r2_f1)))
print('********************************************************************')
print('ROUGE-l recall : ' + str(sum(rl_recall)*100 / len(rl_recall)))
print('ROUGE-l F1 : ' + str(sum(rl_f1)*100 / len(rl_f1)))

# 'won/2018-Jul-30-11-30-40' epoch 9
ROUGE-1 recall : 25.147463257859798
ROUGE-1 F1 : 26.152656594511935
********************************************************************
ROUGE-2 recall : 11.111168986470044
ROUGE-2 F1 : 11.87687849599647
********************************************************************
ROUGE-l recall : 23.374466815236698
ROUGE-l F1 : 23.332530990714563

# 'won/2018-Jul-31-01-09-08' epoch18
ROUGE-1 recall : 27.181338680580858
ROUGE-1 F1 : 23.24354440201661
********************************************************************
ROUGE-2 recall : 10.285292567935505
ROUGE-2 F1 : 10.084292863650939
********************************************************************
ROUGE-l recall : 25.50797278681479
ROUGE-l F1 : 20.400153250444962

# 'won', '2018-Jul-31-06-07-54' epoch 14
ROUGE-1 recall : 24.045028589331288
ROUGE-1 F1 : 26.872889411505124
********************************************************************
ROUGE-2 recall : 10.375095347295948
ROUGE-2 F1 : 11.756716997867436
********************************************************************
ROUGE-l recall : 22.083905535277424
ROUGE-l F1 : 23.46146722706788