In [None]:
import argparse
import torch
import time
import json
import numpy as np
import math
import random
import codecs
import os
from config import flags
from sklearn.model_selection import train_test_split

In [None]:
seed = 34
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
random_state = seed
flags.seed = seed

In [None]:
def batch_generator(X, y, batch_size=128, return_idx=False, crf=False):
    for offset in range(0, X.shape[0], batch_size):
        batch_X_len=np.sum(X[offset:offset+batch_size]!=0, axis=1)
        batch_idx=batch_X_len.argsort()[::-1]
        batch_X_len=batch_X_len[batch_idx]
        batch_X_mask=(X[offset:offset+batch_size]!=0)[batch_idx].astype(np.uint8)
        batch_X=X[offset:offset+batch_size][batch_idx] 
        batch_y=y[offset:offset+batch_size][batch_idx]
        batch_X = torch.from_numpy(batch_X).long().cuda()
        batch_X_mask=torch.from_numpy(batch_X_mask).long().cuda()
        batch_y = torch.from_numpy(batch_y).long().cuda()
        if len(batch_y.size())==2 and not crf:
            batch_y=torch.nn.utils.rnn.pack_padded_sequence(batch_y, batch_X_len, batch_first=True)
        if return_idx:
            yield (batch_X, batch_y, batch_X_len, batch_X_mask, batch_idx)
        else:
            yield (batch_X, batch_y, batch_X_len, batch_X_mask)

In [None]:
def valid_loss(model, valid_X, valid_y, batch_size=16, crf=False):
    model.eval()
    losses=[]
    for batch in batch_generator(valid_X, valid_y, batch_size=batch_size, crf=crf):
        batch_valid_X, batch_valid_y, batch_valid_X_len, batch_X_mask = batch
        loss=model(batch_valid_X, batch_valid_X_len, batch_valid_y)
        losses.append(loss.item())
    model.train()
    return sum(losses)/len(losses)

In [None]:
from model import Encoder, Decoder, Seq2Seq

In [None]:
def train(train_X, train_y, valid_X, valid_y, model, model_fn, optimizer, parameters, epochs, batch_size, run_epoch, crf):
    best_loss=float("inf")
    valid_history=[]
    train_history=[]
    for epoch in range(epochs):
        pred_y=np.zeros((train_X.shape[0], train_X.shape[1]), np.int16)
        offset = range(0, train_X.shape[0], batch_size)
        i_th = 0
        results = []
        for batch in batch_generator(train_X, train_y, batch_size, crf=crf):
            batch_train_X, batch_train_y, batch_train_X_len, batch_train_X_mask=batch
            loss = model(batch_train_X, batch_train_X_len, batch_train_y)
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm(parameters, 1.)
            optimizer.step()
        loss=valid_loss(model, train_X, train_y, batch_size, crf=crf)
        train_history.append(loss)
        loss=valid_loss(model, valid_X, valid_y, batch_size, crf=crf)
        valid_history.append(loss)
        if loss<best_loss:
            best_loss=loss
            torch.save(model, model_fn)
        shuffle_idx=np.random.permutation(len(train_X))
        train_X=train_X[shuffle_idx]
        train_y=train_y[shuffle_idx]
        print(str(epoch) + '/' + str(epochs))
        
    model=torch.load(model_fn)
    return train_history, valid_history

In [None]:
def run(domain, data_dir, model_dir, valid_split, epochs, lr, dropout, batch_size, runs, crf):
    gen_emb=np.load(data_dir+"gen.vec.npy")
    ae_data=np.load(data_dir+domain+".npz")
    train_data = ae_data['train_X']
    train_label = ae_data['train_y']
    """
    train_X, valid_X, train_y, valid_y = train_test_split(train_data,
                                                          train_label,
                                                          test_size = valid_split,
                                                          random_state = random_state)
    """
    idx = np.arange(ae_data['train_X'].shape[0])
    np.random.shuffle(idx)
    valid_X=train_data[-valid_split:]
    valid_y=train_label[-valid_split:]
    train_X=train_data[:-valid_split]
    train_y=train_label[:-valid_split]
    
    print("数据集总大小：", len(ae_data['train_X']))
    print("训练集大小：", len(train_X))
    print("验证集大小：", len(valid_X))

    for r in range(runs):
        print('正在训练第 ' + str(r + 1) + '轮')
        flags.model_dir = model_dir
        flags.batch_size = batch_size
        flags.epochs = epochs
        flags.data_dir = data_dir
        flags.lr = lr
        flags.dropout = dropout
        encoder = Encoder(gen_emb, flags)
        decoder = Decoder(flags)
        model   = Seq2Seq(encoder, decoder, flags)
        model.cuda()
        print(model)
    
        parameters = [p for p in model.parameters() if p.requires_grad]
        optimizer=torch.optim.Adam(parameters, lr=lr)
        train_history, valid_history = train(train_X, train_y, valid_X, valid_y, model, model_dir+domain+str(r), 
                                           optimizer, parameters, epochs, batch_size, r, crf)

In [None]:
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--model_dir', type=str, default="model/Seq2Seq/")
    parser.add_argument('--batch_size', type=int, default=128)
    parser.add_argument('--epochs', type=int, default=10)
    parser.add_argument('--runs', type=int, default=5)
    parser.add_argument('--domain', type=str, default="restaurant15")
    parser.add_argument('--data_dir', type=str, default="data/prep_data_15/")
    parser.add_argument('--valid', type=int, default=150)
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--dropout', type=float, default=0.5)
    parser.add_argument('--crf', type=bool, default=False)
    args = parser.parse_known_args()[0]

    run(args.domain, args.data_dir, args.model_dir, args.valid, args.epochs, args.lr, args.dropout, args.batch_size, args.runs, args.crf)