In [None]:
import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image
from train_scripts.dataset import PennFudanDataset, get_transform, get_datasets, get_data_loader
from train_scripts.model import get_model, evaluate, warmup_lr_scheduler


In [3]:
import utils
import math
import sys

def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    lr_scheduler = None
    if epoch == 0:
        warmup_factor = 1. / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)

    for images, targets in metric_logger.log_every(data_loader, print_freq, header):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()

        if not math.isfinite(loss_value):
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if lr_scheduler is not None:
            lr_scheduler.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])


In [4]:
def train(epochs, folder, batch, resume=False):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    # our dataset has two classes only - background and person
    num_classes = 2

    # get the model using our helper function
    model = get_model(num_classes)
    if resume:
        model.load_state_dict(torch.load(f"models/{folder}_{1}.pth", weights_only=True))
    # move model to the right device
    model.to(device)

    # construct an optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005,
                                momentum=0.9, weight_decay=0.0005)

    # and a learning rate scheduler which decreases the learning rate by
    # 10x every 3 epochs
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=3,
                                                gamma=0.1)
    
    data_train, data_valid = get_datasets(folder)
    data_loader_train = get_data_loader(data_train,batch,True)    
    data_loader_valid = get_data_loader(data_valid,1,False)    

    if resume:
        start = 2
    else:
        start = 0
    for epoch in range(start,epochs):
        # train for one epoch, printing every 10 iterations
        train_one_epoch(model, optimizer, data_loader_train, device, epoch, print_freq=20)
        # update the learning rate
        lr_scheduler.step()
        # evaluate on the test dataset
        evaluate(model, data_loader_valid, device=device)
        torch.save(model.state_dict(), f"models/{folder}_{epoch}.pth")


In [5]:
folder = "outdoors"
model = train(10,folder,2)





Epoch: [0]  [ 0/53]  eta: 0:01:15  lr: 0.000101  loss: 1.4305 (1.4305)  loss_classifier: 0.6506 (0.6506)  loss_box_reg: 0.0427 (0.0427)  loss_objectness: 0.6953 (0.6953)  loss_rpn_box_reg: 0.0419 (0.0419)  time: 1.4300  data: 0.0320  max mem: 1434
Epoch: [0]  [20/53]  eta: 0:00:19  lr: 0.002022  loss: 0.9638 (1.0414)  loss_classifier: 0.2482 (0.3357)  loss_box_reg: 0.0388 (0.0407)  loss_objectness: 0.6748 (0.6391)  loss_rpn_box_reg: 0.0162 (0.0259)  time: 0.5482  data: 0.0128  max mem: 1594
Epoch: [0]  [40/53]  eta: 0:00:07  lr: 0.003943  loss: 0.2273 (0.6547)  loss_classifier: 0.0699 (0.2117)  loss_box_reg: 0.0692 (0.0570)  loss_objectness: 0.0286 (0.3565)  loss_rpn_box_reg: 0.0239 (0.0295)  time: 0.5508  data: 0.0115  max mem: 1594
Epoch: [0]  [52/53]  eta: 0:00:00  lr: 0.005000  loss: 0.1530 (0.5422)  loss_classifier: 0.0580 (0.1766)  loss_box_reg: 0.0653 (0.0583)  loss_objectness: 0.0233 (0.2808)  loss_rpn_box_reg: 0.0147 (0.0265)  time: 0.5489  data: 0.0110  max mem: 1594
Epoch: [

In [36]:
folder = "poisson"
model = train(10,folder,4)



Epoch: [0]  [  0/133]  eta: 0:03:08  lr: 0.000043  loss: 1.3959 (1.3959)  loss_classifier: 0.6566 (0.6566)  loss_box_reg: 0.0085 (0.0085)  loss_objectness: 0.6868 (0.6868)  loss_rpn_box_reg: 0.0440 (0.0440)  time: 1.4151  data: 0.0809  max mem: 2946
Epoch: [0]  [ 20/133]  eta: 0:02:11  lr: 0.000800  loss: 1.1144 (1.1562)  loss_classifier: 0.3961 (0.4426)  loss_box_reg: 0.0121 (0.0123)  loss_objectness: 0.6762 (0.6705)  loss_rpn_box_reg: 0.0279 (0.0307)  time: 1.1529  data: 0.0850  max mem: 2946
Epoch: [0]  [ 40/133]  eta: 0:01:49  lr: 0.001556  loss: 0.3599 (0.7913)  loss_classifier: 0.0772 (0.2701)  loss_box_reg: 0.0668 (0.0382)  loss_objectness: 0.1186 (0.4570)  loss_rpn_box_reg: 0.0185 (0.0259)  time: 1.1869  data: 0.0899  max mem: 2946
Epoch: [0]  [ 60/133]  eta: 0:01:25  lr: 0.002313  loss: 0.2058 (0.6012)  loss_classifier: 0.0614 (0.2045)  loss_box_reg: 0.0968 (0.0597)  loss_objectness: 0.0166 (0.3130)  loss_rpn_box_reg: 0.0184 (0.0241)  time: 1.1671  data: 0.0801  max mem: 2946


In [40]:
folder = "laplacian"
model = train(10,folder,4)

Epoch: [0]  [  0/133]  eta: 0:02:37  lr: 0.000043  loss: 1.3304 (1.3304)  loss_classifier: 0.5868 (0.5868)  loss_box_reg: 0.0155 (0.0155)  loss_objectness: 0.6867 (0.6867)  loss_rpn_box_reg: 0.0414 (0.0414)  time: 1.1820  data: 0.0129  max mem: 3106
Epoch: [0]  [ 20/133]  eta: 0:02:03  lr: 0.000800  loss: 1.0600 (1.0968)  loss_classifier: 0.3561 (0.3793)  loss_box_reg: 0.0120 (0.0142)  loss_objectness: 0.6799 (0.6735)  loss_rpn_box_reg: 0.0272 (0.0298)  time: 1.0854  data: 0.0176  max mem: 3263
Epoch: [0]  [ 40/133]  eta: 0:01:41  lr: 0.001556  loss: 0.3159 (0.7760)  loss_classifier: 0.0988 (0.2417)  loss_box_reg: 0.0678 (0.0417)  loss_objectness: 0.1438 (0.4680)  loss_rpn_box_reg: 0.0172 (0.0247)  time: 1.1025  data: 0.0197  max mem: 3263
Epoch: [0]  [ 60/133]  eta: 0:01:20  lr: 0.002313  loss: 0.2058 (0.5898)  loss_classifier: 0.0606 (0.1836)  loss_box_reg: 0.1049 (0.0618)  loss_objectness: 0.0186 (0.3212)  loss_rpn_box_reg: 0.0194 (0.0232)  time: 1.1129  data: 0.0218  max mem: 3263


In [14]:
folder = "cutpasteblur"
model = train(10,folder,4,True)



Epoch: [2]  [  0/133]  eta: 0:03:04  lr: 0.005000  loss: 0.0729 (0.0729)  loss_classifier: 0.0123 (0.0123)  loss_box_reg: 0.0459 (0.0459)  loss_objectness: 0.0010 (0.0010)  loss_rpn_box_reg: 0.0137 (0.0137)  time: 1.3858  data: 0.0255  max mem: 3108
Epoch: [2]  [ 20/133]  eta: 0:02:04  lr: 0.005000  loss: 0.0624 (0.0668)  loss_classifier: 0.0150 (0.0147)  loss_box_reg: 0.0408 (0.0429)  loss_objectness: 0.0010 (0.0016)  loss_rpn_box_reg: 0.0062 (0.0076)  time: 1.0918  data: 0.0173  max mem: 3266
Epoch: [2]  [ 40/133]  eta: 0:01:42  lr: 0.005000  loss: 0.0632 (0.0671)  loss_classifier: 0.0134 (0.0143)  loss_box_reg: 0.0410 (0.0429)  loss_objectness: 0.0010 (0.0015)  loss_rpn_box_reg: 0.0071 (0.0083)  time: 1.0952  data: 0.0162  max mem: 3266
Epoch: [2]  [ 60/133]  eta: 0:01:20  lr: 0.005000  loss: 0.0662 (0.0676)  loss_classifier: 0.0143 (0.0146)  loss_box_reg: 0.0397 (0.0425)  loss_objectness: 0.0015 (0.0017)  loss_rpn_box_reg: 0.0101 (0.0088)  time: 1.0949  data: 0.0157  max mem: 3266


In [18]:
folder = "inpaint_light"
model = train(10,folder,4)

Epoch: [0]  [  0/133]  eta: 0:02:56  lr: 0.000043  loss: 1.4469 (1.4469)  loss_classifier: 0.7067 (0.7067)  loss_box_reg: 0.0003 (0.0003)  loss_objectness: 0.6933 (0.6933)  loss_rpn_box_reg: 0.0467 (0.0467)  time: 1.3261  data: 0.0261  max mem: 3268
Epoch: [0]  [ 20/133]  eta: 0:02:08  lr: 0.000800  loss: 1.1863 (1.2188)  loss_classifier: 0.4760 (0.4952)  loss_box_reg: 0.0090 (0.0110)  loss_objectness: 0.6850 (0.6796)  loss_rpn_box_reg: 0.0314 (0.0330)  time: 1.1238  data: 0.0573  max mem: 3269
Epoch: [0]  [ 40/133]  eta: 0:01:45  lr: 0.001556  loss: 0.3942 (0.8474)  loss_classifier: 0.0865 (0.2982)  loss_box_reg: 0.0520 (0.0321)  loss_objectness: 0.2103 (0.4889)  loss_rpn_box_reg: 0.0208 (0.0282)  time: 1.1367  data: 0.0546  max mem: 3270
Epoch: [0]  [ 60/133]  eta: 0:01:22  lr: 0.002313  loss: 0.1855 (0.6294)  loss_classifier: 0.0545 (0.2199)  loss_box_reg: 0.0786 (0.0487)  loss_objectness: 0.0198 (0.3352)  loss_rpn_box_reg: 0.0196 (0.0255)  time: 1.1317  data: 0.0446  max mem: 3270


In [19]:
folder = "inpaint_dib"
model = train(10,folder,4)

Epoch: [0]  [  0/133]  eta: 0:02:37  lr: 0.000043  loss: 1.3856 (1.3856)  loss_classifier: 0.6585 (0.6585)  loss_box_reg: 0.0002 (0.0002)  loss_objectness: 0.6863 (0.6863)  loss_rpn_box_reg: 0.0406 (0.0406)  time: 1.1809  data: 0.0897  max mem: 3272
Epoch: [0]  [ 20/133]  eta: 0:02:12  lr: 0.000800  loss: 1.1696 (1.1935)  loss_classifier: 0.4777 (0.4925)  loss_box_reg: 0.0002 (0.0012)  loss_objectness: 0.6759 (0.6717)  loss_rpn_box_reg: 0.0259 (0.0282)  time: 1.1690  data: 0.0734  max mem: 3272
Epoch: [0]  [ 40/133]  eta: 0:01:49  lr: 0.001556  loss: 0.3436 (0.8251)  loss_classifier: 0.0817 (0.2954)  loss_box_reg: 0.0446 (0.0219)  loss_objectness: 0.2107 (0.4838)  loss_rpn_box_reg: 0.0161 (0.0240)  time: 1.1784  data: 0.0723  max mem: 3272
Epoch: [0]  [ 60/133]  eta: 0:01:25  lr: 0.002313  loss: 0.1955 (0.6248)  loss_classifier: 0.0674 (0.2224)  loss_box_reg: 0.0906 (0.0466)  loss_objectness: 0.0211 (0.3330)  loss_rpn_box_reg: 0.0205 (0.0228)  time: 1.1809  data: 0.0703  max mem: 3272
