In [None]:
import os
import args
import time
import torch
import random
import logging
import warnings
import argparse
import numpy as np
import torch.nn as nn
from torch import cuda
from torch.autograd import Variable

warnings.filterwarnings('ignore')


In [None]:
import os
import args
import time
import torch
import random
import logging
import warnings
import argparse
import numpy as np
import torch.nn as nn
from torch import cuda
from torch.autograd import Variable

warnings.filterwarnings('ignore')


# 0 | Hyper Parameters

In [None]:
parser = argparse.ArgumentParser()
args.add_data_options(parser)
args.add_model_options(parser)
args.add_train_options(parser)
opt = parser.parse_args([])

opt.save_path = 'dataset/Douban/model/'
opt.online_process_data = True
opt.train_src = 'dataset/Douban/train_10m.src'
opt.train_tgt = 'dataset/Douban/train_10m.tgt'
opt.layers = 1
opt.enc_size = 1024
opt.word_vec_size = 512
opt.dropout = 0.1
opt.batch_size = 512
opt.beam_size = 3
opt.epochs = 20
# opt.gpus = [0]
opt.learning_rate = 0.003
opt.curriculum = 0
opt.extra_shuffle = True
opt.start_eval_batch = 15000
opt.eval_per_batch = 1200
opt.seed = 1234
opt.cuda_seed = 1234
opt.log_interval = 1
opt.enc_heads = 16
opt.dec_heads = 16
opt.log_interval = 1
opt.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
if torch.cuda.is_available() and (not opt.gpus):
    print('CUDA ERROR!')

if opt.seed > 0:
    torch.manual_seed(opt.seed)
    random.seed(opt.seed)
    np.random.seed(opt.seed)
    

if opt.gpus:
    if opt.cuda_seed > 0:
        torch.cuda.manual_seed(opt.cuda_seed)
        torch.manual_seed(opt.cuda_seed)
        torch.cuda.manual_seed(opt.cuda_seed)
        torch.backends.cudnn.deterministic = True
    cuda.set_device(opt.gpus[0])
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 1 | Prepare for Data

In [None]:
import layer

import onlinePreprocess as onlinePreprocess
onlinePreprocess.seq_length = opt.max_sent_length
onlinePreprocess.shuffle = True if opt.process_shuffle else False

from onlinePreprocess import prepare_data_online
from layer.Dict import save_dict, load_dict

"""
dists = {
    'src':
    'tgt':
}
"""
dicts = {}
dicts['src'] = load_dict(opt.save_path+'src.pkl')
dicts['tgt'] = load_dict(opt.save_path+'tgt.pkl')

"""
dataset = {
    type = list
    'src':
    'ins':
    'del':
    'tgt':
}
"""
dataset = prepare_data_online(opt.train_src, 
                              None, 
                              opt.train_tgt, 
                              None)
trainData = layer.IDDataSet(dataset['train']['src'],
                            dataset['train']['ins'],
                            dataset['train']['del'],
                            dataset['train']['tgt'],
                            opt.batch_size, opt.gpus)


# 2 | Model Struture

In [None]:
opt.device

In [None]:
from layer.TransModel import Encoder, Decoder, Transformer

enc_src = Encoder(dicts['src'].size(), opt.word_vec_size, opt.layers, opt.enc_heads,
                  opt.enc_size, opt.dropout, opt.device)
enc_tgt = Encoder(dicts['tgt'].size(), opt.word_vec_size, opt.layers, opt.enc_heads,
                  opt.enc_size, opt.dropout, opt.device)
dec = Decoder(dicts['tgt'].size(), opt.word_vec_size, opt.layers, opt.dec_heads,
              opt.enc_size, opt.dropout, opt.device)
generator = nn.Sequential(
        nn.Linear(opt.dec_size, dicts['tgt'].size()), 
        nn.LogSoftmax(dim=-1))

model = Transformer(enc_src, enc_tgt, dec, layer.Constants.PAD, layer.Constants.PAD, opt)
model.generator = generator

# model.load_state_dict(torch.load('TransModel.pt',map_location=torch.device('cpu')))
def initialize_weights(m):
    if hasattr(m, 'weight') and m.weight.dim() > 1:
        nn.init.xavier_uniform_(m.weight.data)
model.apply(initialize_weights)


if len(opt.gpus) >= 1:
    model.cuda()
    generator.cuda()
else:
    model.cpu()
    generator.cpu()

# 3 | Train Step

In [None]:
LR = opt.learning_rate
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
loss_func = nn.CrossEntropyLoss(ignore_index=layer.Constants.PAD)


for epoch in range(opt.start_epoch, opt.epochs+1):
    # Train Step
    model.train()
    if opt.extra_shuffle and epoch > opt.curriculum:
        trainData.shuffle()
    batch_order = torch.randperm(len(trainData))
    
    start_time = time.time()
    num_correct, num_words = 0, 0
    total_loss = 0
    for i in range(len(trainData)):
        optimizer.zero_grad()
        
        batch_idx = batch_order[i]
        batch = trainData[batch_idx]

        keys = batch[0][0].permute(1,0)
        guide1 = batch[1][0].permute(1,0)
        guide2 = batch[2][0].permute(1,0)
        tgt = batch[3][0].permute(1,0)
        
        g_output = model(keys, guide1, guide2, tgt)
        g_target = tgt[:,1:].contiguous().view(-1)
        no_pad_idx = g_target.ne(layer.Constants.PAD)

        preds = model.generator(g_output.contiguous().view(-1, g_output.shape[-1]))[no_pad_idx]
        targets = g_target[no_pad_idx]

        loss = loss_func(preds, targets)
        total_loss += loss.item()

        preds = torch.argmax(preds, dim=-1)

        loss.backward()
        optimizer.step()
        
        
        num_correct += preds.eq(targets).sum().item()
        num_words += no_pad_idx.sum().item()
        
        if (i+1) % opt.log_interval == 0:
            batch_time = time.time() - start_time
            mins, secs = int(batch_time/60), int(batch_time%60)
            
            
            print('| Epoch: {0} | Batch: {1}/{2} | Train_loss: {3:.3f} | Train_acc: {4:.2f}% |'.format(
                    epoch, i+1, len(trainData), total_loss/opt.log_interval, 100*num_correct/num_words
            ))
            print('\t | Train_time: {0}m {1}s |'.format(mins, secs))
            num_correct, num_words = 0, 0
            total_loss = 0
            start_time = time.time()


In [None]:
torch.save(model.state_dict(), 'TransModel.pt')