这是用来debug的notebook，目前已经能够在训练集上进行测试。

代码目前和train.py中代码保持一致，在修改train.py前先在此处测试

In [None]:
import torch
import argparse
import pickle
from utils.dict import Dictionary
from utils.dataloader import myDataloader
from model.model import ATT_model
from torch.optim import Adam

In [None]:
def loss_func(true_answers, pred_answers, probs):
    '''Calculate the loss with formulate loss = -sum(log(p(x))), x in answers
    
    Args:
        true_answers: the answers of a batch
        pred_answers: (tensor(batch_size)) predicted answers of a batch
        probs: (tensor(batch_size)) probability of true answer in predict vector s
    Returns:
        loss: -sum(log(probs(x)))
        correct_num: numbers of (true_answer==pred_answer)
    '''
    loss = -1 * torch.sum(torch.log(probs))
    compare = true_answers.squeeze() == pred_answers
    correct_num = 0
    for i in compare:
        if i:
            correct_num += 1
    return loss.cuda(), correct_num

In [None]:
class myParameters:
    traindata = './temp/train_vec.pickle'
    validdata = './temp/valid_vec.pickle'
    dict = './temp/dictionary.pickle'
    batch_size = 32
    embedding_size = 384
    gru_size = 256
    epoch = 1
    lr = 0.001
    l2 = 0.0001
    dropout = 0.1
    gpu = 0
params = myParameters()

In [None]:
# 加载字典
with open(params.dict, 'rb') as f:
    dictionary = pickle.load(f)

# 加载数据
with open(params.traindata, 'rb') as tr, open(params.validdata, 'rb') as v:
    train_vec = pickle.load(tr)
    valid_vec = pickle.load(v)

batched_train_data = myDataloader(dictionary, train_vec, params.batch_size)
batched_valid_data = myDataloader(dictionary, valid_vec, params.batch_size)

In [None]:
# 模型实例化
model = ATT_model(vocab_size=dictionary.len, embed_dim=params.embedding_size, hidden_dim=params.gru_size, dropout_rate=params.dropout, PAD=0)
# 优化器实例化
optimizer = Adam(model.parameters(),
                     lr=params.lr,
                     weight_decay=params.dropout)

In [None]:
def eval(model, data):
    total_correct = 0
    total_loss = 0
    total_sample_num = 0

    batch_num = data.batch_num
    
    model.eval()
    for i in range(batch_num):
        (docs, doc_lengths), (querys, query_lengths), answers = data[i]
        probs, pred_answers = model(docs.cuda(), doc_lengths.cuda(), querys.cuda(),
                                        query_lengths.cuda(), answers.cuda())
        loss, pred_correct = loss_func(answers, pred_answers, probs)

        total_loss += loss
        total_correct += pred_correct
        total_sample_num += answers.shape[0]

        del loss, pred_answers, probs

    model.train()
    return total_loss / total_sample_num, total_correct / total_sample_num


In [None]:
def trainModel(model, train_data, valid_data, optimizer):

    start_time = time.time()
    def trainEpoch(epoch):
        train_data.shuffle()
        batch_num = train_data.batch_num

        total_correct = 0
        total_loss = 0
        total_sample_num = 0
    
        for i in range(batch_num):
            (docs, doc_lengths), (querys, query_lengths), answers = train_data[i]
            
            optimizer.zero_grad()

            probs, pred_answers = model(docs.cuda(), doc_lengths.cuda(), querys.cuda(),
                                        query_lengths.cuda(), answers.cuda())

            loss, pred_correct = loss_func(answers, pred_answers, probs)

            loss.backward()

            # set gradient clipping threshold to 5
            for parameter in model.parameters():
                parameter.grad.data.clamp_(-5.0, 5.0)

            # update parameters
            optimizer.step()

            total_loss += loss.data[0]
            total_correct += pred_correct
            total_sample_num += answers.shape[0]


            end_time = time.time()

            with open('./log.txt', 'a') as f:
                print(
                    "Epoch %d, %d th batch, avg loss: %.2f, acc: %6.2f; %6.0f s elapsed"
                    % (epoch, i, total_loss / total_sample_num, total_correct / total_sample_num *100, end_time-start_time, 
                    file=f))

            del loss, pred_answers, probs

        return total_loss / total, total_correct / total


    for epoch in range(params.epoch):

        # 1. train
        train_loss, train_acc = trainEpoch(epoch)

        with open('./log.txt', 'a') as f:
            print('Epoch %d:\t average loss: %.2f\t train accuracy: %g' % (epoch, train_loss, train_acc * 100), file=f)

        # 2. evaluate on valid dataset
        valid_loss, valid_acc = eval(model, valid_data)
        with open('./log.txt', 'a') as f:
            print('=' * 20)
            print('Evaluating on validation set:', file=f)
            print('Validation loss: %.2f' % valid_loss, file=f)
            print('Validation accuracy: %g' % (valid_acc*100), file=f)
            print('=' * 20, file=f)

        # 3. save model
        model_state_dict = model.state_dict()
        optimizer_state_dict = optimizer.state_dict()
        checkpoint = {
            'model': model_state_dict,
            'epoch': epoch,
            'optimizer': optimizer_state_dict,
            'opt': params,
        }
        torch.save(checkpoint,
                   'models/model_epoch%d_acc_%.2f.pt' % (epoch, 100*valid_acc))


In [None]:
torch.cuda.set_device(params.gpu)
model.cuda()

In [None]:
trainModel(model, batched_train_data, batched_valid_data, optimizer)