In [6]:
import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image
from train_scripts.dataset import get_datasets, get_data_loader
from train_scripts.model import get_model, evaluate, warmup_lr_scheduler
import train_scripts.utils as utils
import math
import sys


In [7]:

def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    lr_scheduler = None
    if epoch == 0:
        warmup_factor = 1. / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)

    for images, targets in metric_logger.log_every(data_loader, print_freq, header):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()

        if not math.isfinite(loss_value):
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if lr_scheduler is not None:
            lr_scheduler.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])


In [8]:
def train(epochs, folder, batch, resume=False):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    # our dataset has two classes only - background and person
    num_classes = 2

    # get the model using our helper function
    model = get_model(num_classes)
    if resume:
        model.load_state_dict(torch.load(f"models/{folder}_{1}.pth", weights_only=True))
    # move model to the right device
    model.to(device)

    # construct an optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005,
                                momentum=0.9, weight_decay=0.0005)

    # and a learning rate scheduler which decreases the learning rate by
    # 10x every 3 epochs
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=3,
                                                gamma=0.1)
    
    data_train, data_valid = get_datasets(folder)
    data_loader_train = get_data_loader(data_train,batch,True)    
    data_loader_valid = get_data_loader(data_valid,1,False)    

    if resume:
        start = 2
    else:
        start = 0
    for epoch in range(start,epochs):
        # train for one epoch, printing every 10 iterations
        train_one_epoch(model, optimizer, data_loader_train, device, epoch, print_freq=20)
        # update the learning rate
        lr_scheduler.step()
        # evaluate on the test dataset
        evaluate(model, data_loader_valid, device=device)
        torch.save(model.state_dict(), f"models/{folder}_{epoch}.pth")


In [9]:
folder = "outdoors"
model = train(10,folder,2)



Epoch: [0]  [ 0/53]  eta: 0:00:41  lr: 0.000101  loss: 1.4577 (1.4577)  loss_classifier: 0.7040 (0.7040)  loss_box_reg: 0.0125 (0.0125)  loss_objectness: 0.7026 (0.7026)  loss_rpn_box_reg: 0.0387 (0.0387)  time: 0.7781  data: 0.0633  max mem: 1918
Epoch: [0]  [20/53]  eta: 0:00:20  lr: 0.002022  loss: 0.9520 (1.0658)  loss_classifier: 0.2511 (0.3647)  loss_box_reg: 0.0195 (0.0172)  loss_objectness: 0.6830 (0.6585)  loss_rpn_box_reg: 0.0167 (0.0254)  time: 0.6030  data: 0.0547  max mem: 2075
Epoch: [0]  [40/53]  eta: 0:00:07  lr: 0.003943  loss: 0.2563 (0.6791)  loss_classifier: 0.0798 (0.2287)  loss_box_reg: 0.0635 (0.0415)  loss_objectness: 0.0400 (0.3790)  loss_rpn_box_reg: 0.0185 (0.0299)  time: 0.6098  data: 0.0538  max mem: 2076
Epoch: [0]  [52/53]  eta: 0:00:00  lr: 0.005000  loss: 0.1901 (0.5636)  loss_classifier: 0.0709 (0.1913)  loss_box_reg: 0.0634 (0.0450)  loss_objectness: 0.0328 (0.2996)  loss_rpn_box_reg: 0.0180 (0.0277)  time: 0.7181  data: 0.0599  max mem: 2076
Epoch: [

In [10]:
folder = "poisson"
model = train(10,folder,4)



Epoch: [0]  [  0/133]  eta: 0:02:31  lr: 0.000043  loss: 1.3910 (1.3910)  loss_classifier: 0.6430 (0.6430)  loss_box_reg: 0.0055 (0.0055)  loss_objectness: 0.7020 (0.7020)  loss_rpn_box_reg: 0.0406 (0.0406)  time: 1.1380  data: 0.0747  max mem: 3104
Epoch: [0]  [ 20/133]  eta: 0:02:09  lr: 0.000800  loss: 1.0584 (1.1183)  loss_classifier: 0.3440 (0.3924)  loss_box_reg: 0.0048 (0.0068)  loss_objectness: 0.6965 (0.6904)  loss_rpn_box_reg: 0.0273 (0.0287)  time: 1.1435  data: 0.0780  max mem: 3262
Epoch: [0]  [ 40/133]  eta: 0:01:48  lr: 0.001556  loss: 0.3532 (0.7883)  loss_classifier: 0.0794 (0.2428)  loss_box_reg: 0.0651 (0.0350)  loss_objectness: 0.1737 (0.4865)  loss_rpn_box_reg: 0.0173 (0.0240)  time: 1.1839  data: 0.0839  max mem: 3263
Epoch: [0]  [ 60/133]  eta: 0:01:25  lr: 0.002313  loss: 0.1843 (0.5976)  loss_classifier: 0.0630 (0.1884)  loss_box_reg: 0.0834 (0.0535)  loss_objectness: 0.0172 (0.3328)  loss_rpn_box_reg: 0.0163 (0.0230)  time: 1.1683  data: 0.0816  max mem: 3264


In [11]:
folder = "laplacian"
model = train(10,folder,4)

Epoch: [0]  [  0/133]  eta: 0:03:05  lr: 0.000043  loss: 1.3946 (1.3946)  loss_classifier: 0.6705 (0.6705)  loss_box_reg: 0.0003 (0.0003)  loss_objectness: 0.6836 (0.6836)  loss_rpn_box_reg: 0.0403 (0.0403)  time: 1.3924  data: 0.0708  max mem: 3264
Epoch: [0]  [ 20/133]  eta: 0:02:10  lr: 0.000800  loss: 1.1352 (1.1754)  loss_classifier: 0.4458 (0.4787)  loss_box_reg: 0.0026 (0.0027)  loss_objectness: 0.6733 (0.6659)  loss_rpn_box_reg: 0.0258 (0.0281)  time: 1.1451  data: 0.0827  max mem: 3264
Epoch: [0]  [ 40/133]  eta: 0:01:47  lr: 0.001556  loss: 0.3018 (0.7905)  loss_classifier: 0.0711 (0.2854)  loss_box_reg: 0.0629 (0.0310)  loss_objectness: 0.1118 (0.4500)  loss_rpn_box_reg: 0.0150 (0.0240)  time: 1.1567  data: 0.0888  max mem: 3264
Epoch: [0]  [ 60/133]  eta: 0:01:24  lr: 0.002313  loss: 0.1746 (0.5946)  loss_classifier: 0.0481 (0.2117)  loss_box_reg: 0.0863 (0.0521)  loss_objectness: 0.0191 (0.3087)  loss_rpn_box_reg: 0.0176 (0.0221)  time: 1.1542  data: 0.0765  max mem: 3264


In [14]:
folder = "cutpaste"
model = train(10,folder,4)

Epoch: [0]  [  0/133]  eta: 0:03:11  lr: 0.000043  loss: 1.4412 (1.4412)  loss_classifier: 0.7048 (0.7048)  loss_box_reg: 0.0003 (0.0003)  loss_objectness: 0.6927 (0.6927)  loss_rpn_box_reg: 0.0433 (0.0433)  time: 1.4366  data: 0.1046  max mem: 3265
Epoch: [0]  [ 20/133]  eta: 0:02:13  lr: 0.000800  loss: 1.1211 (1.1501)  loss_classifier: 0.4224 (0.4413)  loss_box_reg: 0.0002 (0.0005)  loss_objectness: 0.6810 (0.6777)  loss_rpn_box_reg: 0.0282 (0.0306)  time: 1.1681  data: 0.1280  max mem: 3265
Epoch: [0]  [ 40/133]  eta: 0:01:49  lr: 0.001556  loss: 0.3990 (0.7964)  loss_classifier: 0.0709 (0.2625)  loss_box_reg: 0.0533 (0.0247)  loss_objectness: 0.1973 (0.4845)  loss_rpn_box_reg: 0.0154 (0.0247)  time: 1.1649  data: 0.1108  max mem: 3265
Epoch: [0]  [ 60/133]  eta: 0:01:25  lr: 0.002313  loss: 0.1826 (0.5988)  loss_classifier: 0.0509 (0.1959)  loss_box_reg: 0.0925 (0.0473)  loss_objectness: 0.0205 (0.3321)  loss_rpn_box_reg: 0.0182 (0.0235)  time: 1.1628  data: 0.1096  max mem: 3265


In [15]:
folder = "inpaint"
model = train(10,folder,4)

Epoch: [0]  [  0/133]  eta: 0:02:29  lr: 0.000043  loss: 1.3982 (1.3982)  loss_classifier: 0.6732 (0.6732)  loss_box_reg: 0.0002 (0.0002)  loss_objectness: 0.6838 (0.6838)  loss_rpn_box_reg: 0.0410 (0.0410)  time: 1.1250  data: 0.0822  max mem: 3265
Epoch: [0]  [ 20/133]  eta: 0:02:11  lr: 0.000800  loss: 1.1483 (1.1813)  loss_classifier: 0.4581 (0.4834)  loss_box_reg: 0.0003 (0.0016)  loss_objectness: 0.6734 (0.6677)  loss_rpn_box_reg: 0.0257 (0.0286)  time: 1.1680  data: 0.1056  max mem: 3265
Epoch: [0]  [ 40/133]  eta: 0:01:48  lr: 0.001556  loss: 0.3248 (0.8027)  loss_classifier: 0.0720 (0.2868)  loss_box_reg: 0.0450 (0.0245)  loss_objectness: 0.1605 (0.4676)  loss_rpn_box_reg: 0.0165 (0.0237)  time: 1.1737  data: 0.1044  max mem: 3265
Epoch: [0]  [ 60/133]  eta: 0:01:25  lr: 0.002313  loss: 0.1912 (0.6013)  loss_classifier: 0.0545 (0.2124)  loss_box_reg: 0.0922 (0.0457)  loss_objectness: 0.0177 (0.3207)  loss_rpn_box_reg: 0.0185 (0.0225)  time: 1.1788  data: 0.1054  max mem: 3265


In [16]:
folder = "inpaint_dib"
model = train(10,folder,4)

Epoch: [0]  [  0/133]  eta: 0:02:37  lr: 0.000043  loss: 1.3854 (1.3854)  loss_classifier: 0.6588 (0.6588)  loss_box_reg: 0.0002 (0.0002)  loss_objectness: 0.6862 (0.6862)  loss_rpn_box_reg: 0.0402 (0.0402)  time: 1.1816  data: 0.1211  max mem: 3265
Epoch: [0]  [ 20/133]  eta: 0:02:15  lr: 0.000800  loss: 1.1681 (1.1955)  loss_classifier: 0.4772 (0.4939)  loss_box_reg: 0.0002 (0.0015)  loss_objectness: 0.6754 (0.6719)  loss_rpn_box_reg: 0.0258 (0.0282)  time: 1.1993  data: 0.1367  max mem: 3265
Epoch: [0]  [ 40/133]  eta: 0:01:51  lr: 0.001556  loss: 0.3441 (0.8279)  loss_classifier: 0.0760 (0.2954)  loss_box_reg: 0.0472 (0.0225)  loss_objectness: 0.2145 (0.4857)  loss_rpn_box_reg: 0.0170 (0.0243)  time: 1.2067  data: 0.1349  max mem: 3265
Epoch: [0]  [ 60/133]  eta: 0:01:27  lr: 0.002313  loss: 0.2016 (0.6271)  loss_classifier: 0.0629 (0.2225)  loss_box_reg: 0.0939 (0.0470)  loss_objectness: 0.0223 (0.3342)  loss_rpn_box_reg: 0.0198 (0.0233)  time: 1.2007  data: 0.1267  max mem: 3265
