In [1]:
import warnings
warnings.filterwarnings("ignore")
import sys
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

import time
import re


import torch as T



import torch.nn as nn
import torch.nn.functional as F


from model import Model


from data_util import config, data
from data_util.batcher import Batcher
from data_util.data import Vocab
from write_result import *

from train_util import *
from torch.distributions import Categorical
from rouge import Rouge
from numpy import random
import argparse
import torchsnooper
import logging

# -------- Test Packages -------
from beam_search import *
import shutil
from tensorboardX import SummaryWriter
from nltk.translate.bleu_score import corpus_bleu


# _6
config.lr = 0.0001
config.eps = 1e-12
config.min_dec_steps = 4
config.vocab_size = 60000

In [2]:
info_str = ''
for a in dir(config):
    if type(getattr(config, a)) in [str,int,float,bool] \
    and 'path' not in str(a) \
    and '__' not in str(a) \
    and 'info' not in str(a):

        info_str += '## %s : %s\n'%(a,getattr(config, a))

# [print(a,getattr(config, a)) for a in dir(config)
# if type(getattr(config, a)) in [str,int,float]
#  and 'path' not in str(a)
#  and '__' not in str(a)
#  and 'info' not in str(a)
# ]
print(info_str)

## batch_size : 8
## beam_size : 16
## ber_layer : 11
## data_type : Cameras_new8
## emb_dim : 300
## emb_grad : False
## eps : 1e-12
## gound_truth_prob : 0.1
## hidden_dim : 512
## intra_decoder : True
## intra_encoder : True
## key_attention : False
## keywords : POS_FOP_keywords
## loggerName : Text-Summary
## lr : 0.0001
## max_dec_steps : 50
## max_enc_steps : 1000
## max_epochs : 100
## max_iterations : 500000
## max_key_num : 8
## min_dec_steps : 4
## rand_unif_init_mag : 0.02
## trunc_norm_init_std : 0.0001
## vocab_size : 60000
## word_emb_type : word2Vec



# Logger

In [3]:
from datetime import datetime as dt

def getLogger(loggerName, loggerPath):
    # 設置logger
    logger = logging.getLogger(loggerName)  # 不加名稱設置root logger
    logger.setLevel(logging.DEBUG)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s: - %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S')
    logging.Filter(loggerName)

    # 使用FileHandler輸出到文件
    directory = os.path.dirname(loggerPath)
    if not os.path.exists(directory):
        os.makedirs(directory)
    fh = logging.FileHandler(loggerPath)

    fh.setLevel(logging.DEBUG)
    fh.setFormatter(formatter)

    # 使用StreamHandler輸出到屏幕
    ch = logging.StreamHandler()
    ch.setLevel(logging.DEBUG)
    ch.setFormatter(formatter)
    # 添加兩個Handler
    logger.addHandler(ch)
    logger.addHandler(fh)
    # Handler只啟動一次
    # 設置logger
    logger.info(u'logger已啟動')
    return logger

def removeLogger(logger):
    logger.info(u'logger已關閉')
    handlers = logger.handlers[:]
    for handler in handlers:
        handler.close()
        logger.removeHandler(handler)

# View batch data

In [4]:
def test_batch():
    vocab = Vocab(config.vocab_path, config.vocab_size)
    batcher = Batcher(config.train_data_path, vocab, mode='train',
                           batch_size=config.batch_size, single_pass=False)
    batch = batcher.next_batch()
    # with torchsnooper.snoop():
    while batch is not None:
        example_list = batch.example_list
        for ex in example_list:
            r = str(ex.original_review)
            s = str(ex.original_summary)
            k = str(ex.key_words)
            sent = ex.original_summary_sents
#             print("original_review_sents:", r)
            print("original_summary_sents : ", s)
            print("key_words : ", k)
            print('------------------------------------------------------------\n')            
        batch = batcher.next_batch()      
        break
test_batch()


original_summary_sents :  <s> great camera read review if your try to decide on this or some other similarly price camera </s>
key_words :  ['camera', 'more', 'electronic']
------------------------------------------------------------

original_summary_sents :  <s> upgrade from my canon rebel xti and am so glad did </s>
key_words :  ['mode', 'not', 'excellent']
------------------------------------------------------------

original_summary_sents :  <s> good little camera for the price </s>
key_words :  ['image', 'most', 'small']
------------------------------------------------------------

original_summary_sents :  <s> nice canon good feature </s>
key_words :  ['feature', 'external']
------------------------------------------------------------

original_summary_sents :  <s> zoom is amazing low light night day sunshine or not its great </s>
key_words :  ['camera', 'direct']
------------------------------------------------------------

original_summary_sents :  <s> spend the extra and get 

# Get Bin Information

In [None]:
with open(config.bin_info,'r',encoding='utf-8') as f:
    lines = f.readlines()
    [print(line) for line in lines]
    train_num = int(lines[0].split(":")[1])
    test_num = int(lines[1].split(":")[1])
    val_num = int(lines[2].split(":")[1])
    # f.write("train : %s\n"%(len(flit_key_train_df)))
    # f.write("test : %s\n"%(len(flit_key_test_df)))
    # f.write("valid : %s\n"%(len(flit_key_valid_df)))


train : 33574

test : 4196

valid : 4196



# Summary Encoder

In [None]:
from torchsummaryX import summary
from model import Encoder,Model
device = T.device("cuda" if T.cuda.is_available() else "cpu") # PyTorch v0.4.0
encoder = Encoder().to(device)    

vocab = Vocab(config.vocab_path, config.vocab_size)
batcher = Batcher(config.train_data_path, vocab, mode='train',
                       batch_size=config.batch_size, single_pass=False)
batch = batcher.next_batch()
enc_batch, enc_lens, enc_padding_mask, enc_key_batch, enc_key_lens, enc_key_padding_mask, enc_batch_extend_vocab, extra_zeros, context = get_enc_data(batch)
enc_batch = Model(False,'glove',vocab).embeds(enc_batch) #Get embeddings for encoder input

summary(encoder, enc_batch, enc_lens) # encoder summary

           Kernel Shape  Output Shape   Params  Mult-Adds
Layer                                                    
0_lstm                -  [1631, 1024]  3334144    3325952
1_reduce_h  [1024, 512]      [8, 512]   524800     524288
2_reduce_c  [1024, 512]      [8, 512]   524800     524288
---------------------------------------------------------
                       Totals
Total params          4383744
Trainable params      4383744
Non-trainable params        0
Mult-Adds             4374528


Unnamed: 0_level_0,Kernel Shape,Output Shape,Params,Mult-Adds
Layer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0_lstm,-,"[1631, 1024]",3334144,3325952
1_reduce_h,"[1024, 512]","[8, 512]",524800,524288
2_reduce_c,"[1024, 512]","[8, 512]",524800,524288


# Summary Decoder

In [None]:
from torchsummaryX import summary
from model import Decoder,Model
from train_util import *
device = T.device("cuda" if T.cuda.is_available() else "cpu") # PyTorch v0.4.0
# decoder = Decoder().to(device)    

model = Model(False,'glove',vocab)
vocab = Vocab(config.vocab_path, config.vocab_size)
batcher = Batcher(config.train_data_path, vocab, mode='train',
                       batch_size=config.batch_size, single_pass=False)
batch = batcher.next_batch()
enc_batch, enc_lens, enc_padding_mask, enc_key_batch, enc_key_lens, enc_key_padding_mask, enc_batch_extend_vocab, extra_zeros, context = get_enc_data(batch)
enc_batch = model.embeds(enc_batch) #Get embeddings for encoder input
enc_out, enc_hidden = model.encoder(enc_batch, enc_lens)

# train_batch_MLE
dec_batch, max_dec_len, dec_lens, target_batch = get_dec_data(batch)                        #Get input and target batchs for training decoder
step_losses = []
s_t = (enc_hidden[0], enc_hidden[1])                                                        #Decoder hidden states
# x_t 為decoder每一個time step 的batch input
x_t = get_cuda(T.LongTensor(len(enc_out)).fill_(2))                             #Input to the decoder
prev_s = None                                                                               #Used for intra-decoder attention (section 2.2 in DEEP REINFORCED MODEL - https://arxiv.org/pdf/1705.04304.pdf)
sum_temporal_srcs = None     
             
    
for t in range(min(max_dec_len, config.max_dec_steps)):
    use_gound_truth = get_cuda((T.rand(len(enc_out)) > 0.25)).long()                        #Probabilities indicating whether to use ground truth labels instead of previous decoded tokens
    # use_gound_truth * dec_batch[:, t] : 為ground true time step token
    # (1 - use_gound_truth) * x_t : 為previous time step token
    x_t = use_gound_truth * dec_batch[:, t] + (1 - use_gound_truth) * x_t                   #Select decoder input based on use_ground_truth probabilities
    x_t = model.embeds(x_t)
    enc_key_batch = model.embeds(enc_key_batch)
#     final_dist, s_t, ct_e, sum_temporal_srcs, prev_s = model.decoder(x_t, s_t, enc_out, enc_padding_mask, context, extra_zeros, enc_batch_extend_vocab, sum_temporal_srcs, prev_s)
    final_dist, s_t, ct_e, sum_temporal_srcs, prev_s = model.decoder(
    x_t, s_t, enc_out, enc_padding_mask,context, 
    extra_zeros,enc_batch_extend_vocab,sum_temporal_srcs, prev_s, 
    enc_key_batch, enc_key_lens)
#     print('x_t2',x_t)
#     print(type(enc_batch_extend_vocab))
#     print(enc_batch_extend_vocab.shape)
#     print(enc_batch_extend_vocab)
    decoder_summary = summary(model.decoder, x_t, s_t, enc_out, enc_padding_mask, context, extra_zeros, enc_batch_extend_vocab, sum_temporal_srcs, prev_s,enc_key_batch, enc_key_lens) # encoder summary
    break
decoder_summary

                               Kernel Shape    Output Shape    Params  \
Layer                                                                   
0_x_context                     [1324, 300]        [8, 300]    397500   
1_lstm                                    -        [8, 512]   1667072   
2_enc_attention.Linear_W_h     [1024, 1024]  [8, 330, 1024]   1048576   
3_enc_attention.Linear_W_s     [1024, 1024]       [8, 1024]   1049600   
4_enc_attention.Linear_v          [1024, 1]     [8, 330, 1]      1024   
5_dec_attention.Linear_W_prev    [512, 512]     [8, 1, 512]    262144   
6_dec_attention.Linear_W_s       [512, 512]        [8, 512]    262656   
7_dec_attention.Linear_v           [512, 1]       [8, 1, 1]       512   
8_p_gen_linear                    [2860, 1]          [8, 1]      2861   
9_V                             [2048, 512]        [8, 512]   1049088   
10_V1                          [512, 60000]      [8, 60000]  30780000   

                               Mult-Adds  
Layer  

Unnamed: 0_level_0,Kernel Shape,Output Shape,Params,Mult-Adds
Layer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0_x_context,"[1324, 300]","[8, 300]",397500,397200
1_lstm,-,"[8, 512]",1667072,1662976
2_enc_attention.Linear_W_h,"[1024, 1024]","[8, 330, 1024]",1048576,1048576
3_enc_attention.Linear_W_s,"[1024, 1024]","[8, 1024]",1049600,1048576
4_enc_attention.Linear_v,"[1024, 1]","[8, 330, 1]",1024,1024
5_dec_attention.Linear_W_prev,"[512, 512]","[8, 1, 512]",262144,262144
6_dec_attention.Linear_W_s,"[512, 512]","[8, 512]",262656,262144
7_dec_attention.Linear_v,"[512, 1]","[8, 1, 1]",512,512
8_p_gen_linear,"[2860, 1]","[8, 1]",2861,2860
9_V,"[2048, 512]","[8, 512]",1049088,1048576


# Train object

In [None]:
from torch.autograd import Variable
import torch
def write_enc_graph():
    encoder_writer = SummaryWriter('runs/Pointer-Generator/glove/Encoder')
    device = T.device("cuda" if T.cuda.is_available() else "cpu") # PyTorch v0.4.0
    encoder = Encoder().to(device) 

    vocab = Vocab(config.vocab_path, config.vocab_size)
    batcher = Batcher(config.train_data_path, vocab, mode='train',
                           batch_size=config.batch_size, single_pass=False)
    batch = batcher.next_batch()
    enc_batch, enc_lens, enc_padding_mask, enc_key_batch, enc_key_lens, enc_key_padding_mask, enc_batch_extend_vocab, extra_zeros, context = get_enc_data(batch)
    enc_batch = Model(False,'glove',vocab).embeds(enc_batch) #Get embeddings for encoder input

#     enc_batch = Variable(torch.rand(enc_batch.shape)).to(device) 
    enc_lens = torch.from_numpy(enc_lens).to(device) 

    encoder_writer.add_graph(encoder, (enc_batch, enc_lens), verbose=True)
    encoder_writer.close()

def write_dec_graph():
    decoder_writer = SummaryWriter('runs/Pointer-Generator/glove/Decoder')
    device = T.device("cuda" if T.cuda.is_available() else "cpu") # PyTorch v0.4.0
    # decoder = Decoder().to(device)    

    vocab = Vocab(config.vocab_path, config.vocab_size)
    model = Model(False,'glove',vocab)
    
    batcher = Batcher(config.train_data_path, vocab, mode='train',
                           batch_size=config.batch_size, single_pass=False)
    batch = batcher.next_batch()
    enc_batch, enc_lens, enc_padding_mask, enc_key_batch, enc_key_lens, enc_key_padding_mask, enc_batch_extend_vocab, extra_zeros, context = get_enc_data(batch)
    enc_batch = model.embeds(enc_batch) #Get embeddings for encoder input
    enc_out, enc_hidden = model.encoder(enc_batch, enc_lens)

    # train_batch_MLE
    dec_batch, max_dec_len, dec_lens, target_batch = get_dec_data(batch)                        #Get input and target batchs for training decoder
    step_losses = []
    s_t = (enc_hidden[0], enc_hidden[1])                                                        #Decoder hidden states
    # x_t 為decoder每一個time step 的batch input
    x_t = get_cuda(T.LongTensor(len(enc_out)).fill_(2))                             #Input to the decoder
    prev_s = None                                                                               #Used for intra-decoder attention (section 2.2 in DEEP REINFORCED MODEL - https://arxiv.org/pdf/1705.04304.pdf)
    sum_temporal_srcs = None     


    for t in range(min(max_dec_len, config.max_dec_steps)):
        use_gound_truth = get_cuda((T.rand(len(enc_out)) > 0.25)).long()                        #Probabilities indicating whether to use ground truth labels instead of previous decoded tokens
        # use_gound_truth * dec_batch[:, t] : 為ground true time step token
        # (1 - use_gound_truth) * x_t : 為previous time step token
        if t == 0 :temp_batch = dec_batch[:, t]
        x_t = use_gound_truth * temp_batch + (1 - use_gound_truth) * x_t                   #Select decoder input based on use_ground_truth probabilities
        x_t = model.embeds(x_t)
    #     final_dist, s_t, ct_e, sum_temporal_srcs, prev_s = model.decoder(x_t, s_t, enc_out, enc_padding_mask, context, extra_zeros, enc_batch_extend_vocab, sum_temporal_srcs, prev_s)
        final_dist, s_t, ct_e, sum_temporal_srcs, prev_s = model.decoder(
        x_t, s_t, enc_out, enc_padding_mask,context, 
        extra_zeros,enc_batch_extend_vocab,sum_temporal_srcs, prev_s, 
        enc_key_batch, enc_key_lens)        


        #         decoder_summary = summary(model.decoder, x_t, s_t, enc_out, enc_padding_mask, context, extra_zeros, enc_batch_extend_vocab, sum_temporal_srcs, prev_s,enc_key_batch, enc_key_lens) # encoder summary
#         x_t = Variable(torch.rand(x_t.shape)).to(device) 
        #             s_t = Variable(torch.rand(s_t.shape)).to(device)
#         enc_out = Variable(torch.rand(enc_out.shape)).to(device)
#         enc_padding_mask = Variable(torch.rand(enc_padding_mask.shape)).to(device,dtype=torch.long)
#         context = Variable(torch.rand(context.shape)).to(device)
#         extra_zeros = Variable(torch.rand(extra_zeros.shape)).to(device)
#         enc_batch_extend_vocab = Variable(torch.rand(enc_batch_extend_vocab.shape)).to(device)
        #             sum_temporal_srcs = Variable(torch.rand(sum_temporal_srcs.shape)).to(device)
        #             prev_s = Variable(torch.rand(prev_s.shape)).to(device)
#         enc_key_batch = Variable(torch.rand(enc_key_batch.shape)).to(device)
        enc_key_lens = torch.from_numpy(enc_key_lens).to(device) 
        
        decoder_writer.add_graph(model.decoder, 
                         (x_t, s_t, enc_out, enc_padding_mask, context, extra_zeros, enc_batch_extend_vocab, sum_temporal_srcs, prev_s,enc_key_batch, enc_key_lens), verbose=True)
        decoder_writer.close()
        break    

In [None]:
# https://blog.csdn.net/u012869752/article/details/72513141
# 由于在jupyter notebook中，args不为空
from run import *
from glob import glob
# nvidia-smi -pm 1
if __name__ == "__main__":   
    try:
        # --------------------------Training ----------------------------------
        parser = argparse.ArgumentParser()
        parser.add_argument('--train_mle', type=bool, default=True)
        parser.add_argument('--train_rl', type=bool, default=False)
        parser.add_argument('--mle_weight', type=float, default=1.0)
#         parser.add_argument('--load_model', type=str, default='/0045000_1.05_0.00.tar')
        parser.add_argument('--load_model', type=str, default=None)
        parser.add_argument('--new_lr', type=float, default=None)
        parser.add_argument('--multi_device', type=bool, default=True)
        parser.add_argument('--view', type=bool, default=True)
        parser.add_argument('--pre_train_emb', type=bool, default=True)
        parser.add_argument('--word_emb_type', type=str, default='FastText')
        parser.add_argument('--train_action', type=bool, default=True)
        opt = parser.parse_args(args=[])
        
        today = dt.now()
        loggerPath = "LOG/%s-(%s_%s_%s)-(%s:%s:%s)"%(opt.word_emb_type,
                  today.year,today.month,today.day,
                  today.hour,today.minute,today.second)

        logger = getLogger(config.loggerName,loggerPath)   
        
        if opt.load_model == None:
            shutil.rmtree('runs/Pointer-Generator/FastText', ignore_errors=True) # clear previous 
            shutil.rmtree('runs/Pointer-Generator/FastText/exp-4', ignore_errors=True) # clear previous 
            shutil.rmtree('runs/Pointer-Generator/FastText/Eecoder', ignore_errors=True) # clear previous 
            shutil.rmtree('runs/Pointer-Generator/FastText/Decoder', ignore_errors=True) # clear previous 

        writer = SummaryWriter('runs/Pointer-Generator/FastText/exp-4')
#         writer = SummaryWriter('runs/Pointer-Generator/FastText')
        writer.add_text('Train_Para/',info_str,0)
#         write_enc_graph()
#         write_dec_graph()
        if opt.train_action: train_action(opt, logger, writer, train_num)

    except KeyError as e:
        traceback = sys.exc_info()[2]
        print(sys.exc_info())
        print(traceback.tb_lineno)
        print(e)
    finally:
        removeLogger(logger)
        
        # export scalar data to JSON for external processing
        # tensorboard --logdir /home/eagleuser/Users/leyan/Text-Summarizer-FOP/TensorBoard
#         tensorboard --logdir ./runs
#         if not os.path.exists('TensorBoard'): os.makedirs('TensorBoard')
#         writer.export_scalars_to_json("TensorBoard/test.json")
        writer.close()
        

2020-03-19 11:10:45 - Text-Summary - INFO: - logger已啟動
2020-03-19 11:10:45 - Text-Summary - INFO: - ------Training Setting--------
2020-03-19 11:10:45 - Text-Summary - INFO: - Traing Type :Cameras_new8
2020-03-19 11:10:45 - Text-Summary - INFO: - Training mle: True, mle weight: 1.00
2020-03-19 11:10:45 - Text-Summary - INFO: - use pre_train_FastText vocab_size 60000 

2020-03-19 11:10:45 - Text-Summary - INFO: - intra_encoder: True intra_decoder: True 

2020-03-19 11:11:02 - Text-Summary - INFO: - Model(
  (encoder): Encoder(
    (lstm): LSTM(300, 512, batch_first=True, bidirectional=True)
    (reduce_h): Linear(in_features=1024, out_features=512, bias=True)
    (reduce_c): Linear(in_features=1024, out_features=512, bias=True)
  )
  (decoder): Decoder(
    (enc_attention): encoder_attention(
      (W_h): Linear(in_features=1024, out_features=1024, bias=False)
      (W_s): Linear(in_features=1024, out_features=1024, bias=True)
      (W_t): Linear(in_features=300, out_features=1024, bias