这是用来debug的notebook，目前已经能够在训练集上进行测试。

代码目前和train.py中代码保持一致，在修改train.py前先在此处测试

In [1]:
import torch
import argparse
import pickle
from utils.dict import Dictionary
from utils.dataloader import myDataloader
from model.model import ATT_model
from torch.optim import Adam

In [2]:
def loss_func(true_answers, pred_answers, probs):
    '''Calculate the loss with formulate loss = -sum(log(p(x))), x in answers
    
    Args:
        true_answers: the answers of a batch
        pred_answers: (tensor(batch_size)) predicted answers of a batch
        probs: (tensor(batch_size)) probability of true answer in predict vector s
    Returns:
        loss: -sum(log(probs(x)))
        correct_num: numbers of (true_answer==pred_answer)
    '''
    loss = -1 * torch.sum(torch.log(probs))
    compare = true_answers.squeeze() == pred_answers
    correct_num = 0
    for i in compare:
        if i:
            correct_num += 1
    return loss.cuda(), correct_num

In [3]:
class myParameters:
    traindata = './temp/train_vec.pickle'
    validdata = './temp/valid_vec.pickle'
    dict = './temp/dictionary.pickle'
    batch_size = 32
    embedding_size = 384
    gru_size = 256
    epoch = 1
    lr = 0.001
    l2 = 0.0001
    dropout = 0.1
    gpu = 0

In [4]:
params = myParameters()

In [5]:
# 加载字典
with open(params.dict, 'rb') as f:
    dictionary = pickle.load(f)

# 加载数据
with open(params.traindata, 'rb') as tr, open(params.validdata, 'rb') as v:
    train_vec = pickle.load(tr)
    valid_vec = pickle.load(v)

batched_train_data = myDataloader(dictionary, train_vec, params.batch_size)
batched_valid_data = myDataloader(dictionary, valid_vec, params.batch_size)

In [6]:
# 模型实例化
model = ATT_model(vocab_size=dictionary.len, embed_dim=params.embedding_size, hidden_dim=params.gru_size, dropout_rate=params.dropout, PAD=0)
# 优化器实例化
optimizer = Adam(model.parameters(),
                     lr=params.lr,
                     weight_decay=params.dropout)

In [7]:
def trainModel(model, train_data, valid_data, optimizer):
    def trainEpoch(epoch):
        train_data.shuffle()
        batch_num = train_data.batch_num

        total_correct = 0
        total_loss = 0
        total = 0
        for i in range(batch_num):
            (docs, doc_lengths), (querys, query_lengths), answers = train_data[i]
            
            optimizer.zero_grad()

            probs, pred_answers = model(docs.cuda(), doc_lengths.cuda(), querys.cuda(),
                                        query_lengths.cuda(), answers.cuda())

            loss, pred_correct = loss_func(answers, pred_answers, probs)

            loss.backward()

            # set gradient clipping threshold to 5
            for parameter in model.parameters():
                parameter.grad.data.clamp_(-5.0, 5.0)

            # update parameters
            optimizer.step()

            total_loss += loss
            total_correct += pred_correct
            total += answers.shape[0]

            print(
                'Epoch %d, %d th batch, avg loss: %.2f, avg correct_num: %.2f'
                % (epoch, i, loss / answers.shape[0], pred_correct / answers.shape[0]))

            del loss, pred_answers, probs

        return total_loss / total, total_correct / total


    for epoch in range(params.epoch):
        train_loss, train_acc = trainEpoch(epoch)
        print('Epoch %d:\t average loss: %.2f\t train accuracy: %g' %
              (epoch, train_loss, train_acc * 100))

In [8]:
torch.cuda.set_device(params.gpu)

In [9]:
model.cuda()

ATT_model(
  (embedding): Embedding(111016, 384, padding_idx=0)
  (dropout): Dropout(p=0.1, inplace=False)
  (BiGRU): GRU(384, 256, batch_first=True, bidirectional=True)
)

In [10]:
trainModel(model, batched_train_data, batched_valid_data, optimizer)

Epoch 0, 0 th batch, avg loss: 4.81, avg correct_num: 0.00


RuntimeError: CUDA out of memory. Tried to allocate 232.00 MiB (GPU 0; 2.00 GiB total capacity; 1.01 GiB already allocated; 162.43 MiB free; 1.08 GiB reserved in total by PyTorch)

In [None]:
torch.cuda.current_device()

In [11]:
a = torch.tensor([[1,1,1],[2,2,2],[3,3,3]])

In [14]:
len(a.size())

2

In [15]:
torch.stack(a, 0)

TypeError: stack(): argument 'tensors' (position 1) must be tuple of Tensors, not Tensor