In [1]:
import time
import os
import torch
import pytorch_mask_rcnn as pmr
    
    
# ------------------ adjustable parameters ---------------------

use_cuda = True # choose to use GPU or not
epochs = 1 # total epochs during this train
train_num_samples = 100 # number of samples per epoch
lr = 0.001 # learning rate
dataset = 'coco' # coco or voc
data_dir = 'E:/PyTorch/data/coco2017' # dataset directory
num_classes = 91  # 91 for coco, 21 for voc
ckpt_path = '../checkpoint_coco.pth' # path where to save the checkpoint.pth

# ------------------ adjustable parameters ---------------------

device = torch.device('cuda' if torch.cuda.is_available() and use_cuda else 'cpu')
print('cuda: {}\nuse_cuda: {}\n{} GPU(s) available'.format(torch.cuda.is_available(), use_cuda, torch.cuda.device_count()))
print('\ndevice: {}'.format(device))

trainset = pmr.datasets(dataset, data_dir, 'train', train=True, device=device)
indices = torch.randperm(len(trainset)).tolist()
trainset = torch.utils.data.Subset(trainset, indices[:train_num_samples])

torch.manual_seed(3)
model = pmr.maskrcnn_resnet50(True, num_classes).to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=lr, momentum=0.9, weight_decay=0.0005)

cuda: True
use_cuda: True
1 GPU(s) available

device: cuda


In [2]:
if os.path.exists(ckpt_path):
    checkpoint = torch.load(ckpt_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    
    del checkpoint['model_state_dict']
    del checkpoint['optimizer_state_dict']
    torch.cuda.empty_cache()
else:
    checkpoint = dict(epochs=0, num_batches=0)
    
epoch = checkpoint['epochs']
print('already trained: {} epochs, {} batches'.format(epoch, checkpoint['num_batches']))

since = time.time()

# ------------------train---------------------

model.train()
for _ in range(epochs):
    print()
    for i, data in enumerate(trainset):
        optimizer.zero_grad()
        losses = model(*data)
        loss = sum(losses.values())
        loss.backward()
        optimizer.step()
        
        if i % 100 == 0:
            print(i, ' '.join(str(round(l.item(), 3)) for l in losses.values()))
    
    epoch += 1
    if epoch % 7 == 0:
        for pg in optimizer.param_groups:
            pg['lr'] = lr * 0.9 ** (epoch // 7)
            
# ------------------train---------------------

print('total time of this train: {:.2f} s'.format(time.time() - since))

checkpoint['model_state_dict'] = model.state_dict()
checkpoint['optimizer_state_dict']  = optimizer.state_dict()
checkpoint['epochs'] = epoch
checkpoint['num_batches'] += epochs * len(trainset)
torch.save(checkpoint, ckpt_path)

num_batches = checkpoint['num_batches']
del checkpoint
torch.cuda.empty_cache()

print('already trained: {} epochs, {} batches'.format(epoch, num_batches))

already trained: 0 epochs, 0 batches

0 0.695 0.039 0.005 0.0 0.153
total time of this train: 54.30 s
already trained: 1 epochs, 100 batches
