# 搭建训练函数并训练

- 先定义一个epoch下的函数
- 在定义多个循环的函数


In [2]:
# 用来计时的方法
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins *60))
    return elapsed_mins, elapsed_secs

# 1.定义一个epoch下训练数据的流动

In [3]:
def train_step(model, data_loader, optimizer, criterion, clip=1, print_every=None):
    
    # 先进入训练模式
    model.train()
    
    # 打印输出
    if print_every == 0:
        print_every = 1
        
    print_loss_total = 0 # 每次打印都重置
    epoch_loss = 0
    
    for i, (dec_inputs, dec_outputs) in enumerate(tqdm(data_loader)):
        # dec_inputs:[batch_size, tgt_len]  tgt_len是每个样本中句子的长度
        # dec_outputs:[batch_size, tgt_len]
        
        optimizer.zero_grad()
        dec_inputs, dec_outputs = dec_inputs.to(device), dec_outputs.to(device)
        
        # outputs:[batch_size * tgt_len, tgt_vocab_size]
        outputs, dec_self_attns = model(dec_inputs)
        
        # 反向传播
        loss = criterion(outputs, dec_outputs.view(-1))
        
        print_loss_total += loss.item()
        epoch_loss += loss.item()
        loss.backward()
        
        # 梯度裁剪 ：对所有梯度乘一个小于1的系数
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        
        # 更新参数
        optimizer.step()
        
        if print_every and (i + 1) % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('\tCurrent Loss: %.4f' % print_loss_avg)
    return epoch_loss / len(data_loader)

# 2.定义训练方法

In [4]:
def train(model, data_loader):
    
    # 损失函数和优化器
    criterion = nn.CrossEntropyLoss(ignore_index=0).to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    
    # 开始循环
    for epoch in range(epochs):
        start_time = time.time()
        train_loss = train_step(model, data_loader, optimizer, criterion, CLIP, print_every=10)
        end_time = time.time()
        
        # 保存模型
        torch.save(model.state_dict(), 'GPT2.pt')
        
        # 显示训练时长
        epoch_mins, epoch_secs = epoch_time(start_time, end_time)
        print(f'Epoch: {epoch +1:02} | Time:{epoch_mins}m{epoch_secs}s')
        print(f'\tTrain Loss: {train_loss:.3f}')

In [5]:
def print_num_paramters(model):
    # Find total paramers and trainable parameters
    total_params = sum(p.numel() for p in model.paramters())
    print(f'{total_params:,} total parameters')
    total_trainable_params = sum(
        p.numel() for p in model.parameters() if p.requires_grad
    )
    print(f'{taotal_trainable_params:,} training paramters')

# 3.开始训练

In [None]:
with open('dataset.txt', 'r', encoding='utf-8') as f:
    datas = f.readlines()
    
train_data = make_data(datas)

train_num_data = [[word2id[word] for word in line] for line in train_data]

batch_size = 8
epochs = 30
dataset = MyDataSet(train_num_data)
data_loader = Data.DataLoader(dataset, batch_size=batch_size, collate_fn=dataset.padding_batch)

model = GPT().to(device)

# 是否加载训练好的参数
# model.load_state_dict(torch.load('GPT2.pt'))

trin(model, data_loader)