Traing code for EfficientDet models using the last 25% of each video as validation data. Very similar to Edet UAV. Comments added only at points of divergence.

In [None]:
import numpy as np
import pandas as pd

import os
import cv2
import matplotlib.pyplot as plt
from glob import glob

import time
import random
import warnings
import torch
from torch.utils.data import Dataset
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from datetime import datetime

warnings.filterwarnings("ignore")

from sklearn.model_selection import train_test_split

DRIVE = "/content/drive/MyDrive/CNN/UAV"
IMAGE_ZIP = "images.zip"
IMAGES = f"{DRIVE}/{IMAGE_ZIP}"
ANNOTATION_DIR = f"{DRIVE}/annotations"
BASE_IMAGE_DIR = f"/content/images"
ANNOTATION_FILES = [f"{ANNOTATION_DIR}/{d}" for d in os.listdir(ANNOTATION_DIR) if ".csv" in d]
EDETS = [d for d in os.listdir(DRIVE) if '.pth' in d]
ANNOTATION_FILES.sort()
EDETS.sort()
os.environ["DRIVE"] = DRIVE
os.environ["IMAGE_ZIP"] = IMAGE_ZIP
os.environ["IMAGES"] = IMAGES
os.environ["ANNOTATION_DIR"] = ANNOTATION_DIR
os.environ["BASE_IMAGE_DIR"] = BASE_IMAGE_DIR


The size each video must be scaled to for valdiation.

In [None]:
sizes = {"0000000067_0000000019.csv": [512, 640],
             "0000000067_0000000046.csv": [512, 640],
             "0000000067_0000000050.csv": [512, 640],
             "0000000067_0000000052.csv": [512, 640],
             "0000000354_0000000000.csv": [512, 640],
             "0000000359_0000000000.csv": [384, 640],
             "0000000363_0000000000.csv": [384, 640],
         "0000000010_0000000000.csv": [512, 640],
               "0000000054_0000000000.csv":	[512, 640],
               "0000000056_0000000000.csv":	[512, 640],
               "0000000062_0000000000.csv":	[512, 640],
               "0000000067_0000000005.csv":	[512, 640],
               "0000000067_0000000012.csv":	[512, 640],
               "0000000067_0000000014.csv":	[512, 640],
               "0000000067_0000000015.csv":	[512, 640],
               "0000000067_0000000024.csv":	[512, 640],
               "0000000067_0000000025.csv": [512, 640],
               "0000000067_0000000026.csv":	[512, 640],
               "0000000067_0000000027.csv":	[512, 640],
               "0000000067_0000000028.csv":	[512, 640],
               "0000000067_0000000029.csv":	[512, 640],
               "0000000067_0000000031.csv":	[512,	640],
               "0000000067_0000000032.csv":	[512, 640],
               "0000000067_0000000040.csv":	[512,	640],
               "0000000067_0000000041.csv":	[512,	640],
               "0000000067_0000000045.csv":	[512,	640],
               "0000000067_0000000055.csv":	[512,	640],
               "0000000067_0000000058.csv":	[512,	640],
               "0000000067_0000000059.csv": [512, 640],
               "0000000351_0000000000.csv":	[512,	896],
               "0000000364_0000000000.csv":	[384,	768],
               "0000000367_0000000000.csv":	[384, 768]}

In [None]:
%%bash

cp $IMAGES /content
unzip -q $IMAGE_ZIP
rm $IMAGE_ZIP

pip install -U -q albumentations
pip install -q omegaconf
pip install -q timm
pip install -q effdet

In [None]:
IMAGE_DIRS = [f"{BASE_IMAGE_DIR}/{d}" for d in os.listdir(BASE_IMAGE_DIR) if d != '.DS_Store']
IMAGE_DIRS.sort()

In [None]:
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

from effdet import get_efficientdet_config, EfficientDet, DetBenchTrain, DetBenchPredict
from effdet.efficientdet import HeadNet

TRAIN_SIZE = 512
VAL_SIZE = 512
NUM_CLASSES = 1
BATCH_SIZE = 8
NUM_WORKERS = 4
D_SIZE = 0

EPOCH = 0
MAX_EPOCH = 100

LOG_LR = 4
COEFF_LR = 2

In [None]:
class TrainGlobalConfig:
  def __init__(self, d_size,
               num_workers,
               project_folder,
               batch_size, n_epochs,
               log_lr, coeff_lr):

    self.num_workers = num_workers
    self.batch_size = batch_size
    self.n_epochs = n_epochs
    self.lr = coeff_lr * 10**(-log_lr)

    self.folder = f"{DRIVE}/{project_folder}"

    self.verbose = True
    self.verbose_step = 250

    self.SchedulerClass = torch.optim.lr_scheduler.MultiplicativeLR
    self.scheduler_params = dict(
        lr_lambda = lambda epoch: 0.94**0.25,
        verbose=True
    )
global_config = TrainGlobalConfig(batch_size=BATCH_SIZE,
                                  n_epochs=MAX_EPOCH,
                                  d_size=D_SIZE,
                                  num_workers=NUM_WORKERS,
                                  project_folder=f"effdet{D_SIZE}-{LOG_LR}-{COEFF_LR}_wd4-5_512x640_time_split_ap",
                                  log_lr=LOG_LR,
                                  coeff_lr=COEFF_LR)

# Load and Transform Data

Splits the videos into training an validation. In order to get the last 25% of bounding boxes in validation it is neccessary to take the last 30% of each video. (Bounding boxes are more likely at the beginning of a video.)

In [None]:
def get_train_and_val(rand=6189):
  annotation_list = []
  seq_list = []
  col_names = ["frame_id", "object_id", "x", "y", "width", "height", "object_class",
               "species", "occluded", "noisy_frame"]
  for annot, image_dir in zip(ANNOTATION_FILES, IMAGE_DIRS):
    if ".csv" in annot:
      annotation_list.append(pd.read_csv(annot, header=None,
                         names=col_names))
      annotation_list[-1]['csv'] = annot.split("/")[-1]
      images = [f"{image_dir}/{d}" for d in os.listdir(image_dir)]
      img_shape = cv2.imread(images[0]).shape[:2]
      frames = [int(img.split("_")[-1].split(".")[0]) for img in images]
      frame_df = pd.DataFrame({"frame_id": frames, "file": images})
      frame_df["csv"] = f"{image_dir.split('/')[-1]}.csv"
      frame_df["img_height"] = img_shape[0]
      frame_df["img_width"] = img_shape[1]
      seq_list.append(frame_df)
  frame_df = pd.concat(seq_list).reset_index(drop=True)
  annotations = pd.concat(annotation_list).merge(frame_df).reset_index(drop=True)
  
  df = frame_df.copy()
  df = df.merge((df.groupby('csv')['frame_id'].max() * 0.75).to_frame().copy().rename(columns={'frame_id': 'boundary'}),
        left_on='csv', right_index=True)
  train_df, val_df = df[df['frame_id'] < df['boundary']], df[df['frame_id'] >= df['boundary']]
  train_annotations = annotations[annotations['file'].isin(train_df['file'])].reset_index(drop=True)
  val_annotations = annotations[annotations['file'].isin(val_df['file'])].reset_index(drop=True)
  return train_df.reset_index(drop=True), val_df.reset_index(drop=True), train_annotations, val_annotations
train_df, val_df, train_annotations, val_annotations = get_train_and_val()

**Transformations**

In [None]:
BBOX = A.BboxParams(
             format='pascal_voc',
             min_area=0, 
             min_visibility=0,
             label_fields=['labels'])

def get_train_transforms(img_size):
  """Returns a function to perform the standard sequence of preprocessing steps
     for training data.
  """
  return A.Compose([A.RandomResizedCrop(height=img_size[0], width=img_size[1],
                                        scale=(0.1, 1.0),
                                        ratio=(3/4, 4/3),
                                        p=1.0),
                    A.HorizontalFlip(p=0.5),
                    A.VerticalFlip(p=0.5),
                    A.RandomRotate90(p=1.0),
                    A.Transpose(p=0.5),
                    ToTensorV2(p=1.0)],
                   bbox_params=BBOX, 
                   p=1.0)

def get_val_transform(img_size):
  """Returns a function to perform the standard sequence of preprocessing steps
     for validation data.
  """
  return A.Compose([A.LongestMaxSize(max_size=img_size[1],
                                     p=1.0),
                    A.PadIfNeeded(min_height=img_size[0],
                                  min_width=img_size[1],
                                  border_mode=0,
                                  p=1.0),
                    ToTensorV2(p=1.0)],
                   bbox_params=BBOX, 
                   p=1.0)
  
def get_default_transform(img_size):
  """Returns a function to perform the default transform if the training
     transform fails.
  """
  return A.Compose([A.Resize(height=img_size[0],
                             width=img_size[1], p=1.0),
                    ToTensorV2(p=1.0)], 
                   bbox_params=BBOX,
                   p=1.0)

**Generic UAV Dataset**

In [None]:
class UAVDataset(Dataset):
  
  def __init__(self, meta_data,
               boxes,
               transform=None,
               image_size=None,
               train=False):
    super(UAVDataset).__init__()
    
    self.meta_data = meta_data
    self.boxes = boxes
    if type(image_size) == int:
      self.image_size = (image_size, image_size)
    else:
      self.image_size = image_size
    self.transform = transform(self.image_size) if transform else None
    self.train = train
    
  def _box_to_tensor(self, sample, target):
    """Convert boundind box array to tensor"""
    if len(sample["bboxes"]) > 0:
      target["bboxes"] = torch.tensor(sample["bboxes"])
    else:
      target["bboxes"] = torch.zeros((0,4))
      # Convert bounded box to yxyx format
    if self.train:
      target["bboxes"][:,[0,1,2,3]] = target["bboxes"][:,[1,0,3,2]]
    return target
  
  def __len__(self) -> int:
    """Returns the number of images."""
    return self.meta_data.shape[0]

  def load_image_and_boxes(self, image_meta, image_boxes):
    """Loads image corresponding to image_meta row.
       Converts bounding boxes to x_min, y_min, x_max, y_max format.
    """
    image = cv2.imread(image_meta["file"]).astype(np.float32)/ 255.0
    bboxes = image_boxes[["x", "y", "width", "height"]].values
    bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2]
    bboxes[bboxes[:, 2] > image_meta["img_width"], 2] = image_meta["img_width"]
    bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3]
    bboxes[bboxes[:, 3] > image_meta["img_height"], 3] = image_meta["img_height"]
    return image, bboxes

**Training Data**

In [None]:
class TrainDataset(UAVDataset):

  def __init__(self, meta_data,
               boxes,
               image_size=None,
               transform=None,
               default_transform=None,
               max_iter=30):
    super(TrainDataset, self).__init__(meta_data, boxes, transform, image_size, train=True)
    self.default_transform = default_transform(self.image_size) if default_transform else None
    self.max_iter = max_iter

  def __getitem__(self, index: int):
    """Retrieves the image and boxes with the specified index."""
    image_meta = self.meta_data.loc[index]
    image_boxes = self.boxes[self.boxes["file"] == image_meta["file"]]
    image, bboxes = self.load_image_and_boxes(image_meta, image_boxes)
    labels = torch.ones((bboxes.shape[0]), dtype=torch.int64)
    target = {"bboxes": torch.tensor(bboxes),
              "labels": labels}
    if self.transform and target["bboxes"].shape[0] == 0:
      sample = self.transform(image=image,
                              bboxes=target["bboxes"],
                              labels=target["labels"])
      image, target = sample["image"], self._box_to_tensor(sample, target)  
      return image, target
    elif self.transform:
      for i in range(self.max_iter):
        sample = self.transform(image=image,
                                bboxes=target["bboxes"],
                                labels=target["labels"])
        if len(sample["bboxes"]) > 0:
          image, target= sample["image"], self._box_to_tensor(sample, target)
          target["labels"] = torch.stack(sample["labels"])
          return image, target
    if self.default_transform and image.shape[2] == 3:
      sample = self.default_transform(image=image,
                                      bboxes=target["bboxes"],
                                      labels=target["labels"])
      image, target = sample["image"], self._box_to_tensor(sample, target)
    return image, target

**Validation Data**

In [None]:
class ValDataset(UAVDataset):

  def __init__(self, meta_data,
               boxes,
               image_size,
               transform=None,
               train=False):
    super(ValDataset, self).__init__(meta_data, boxes, transform, image_size, train)

  def __getitem__(self, index: int):
    """Retrieves the image and boxes with the specified index."""
    image_meta = self.meta_data.loc[index]
    image_boxes = self.boxes[self.boxes["file"] == image_meta["file"]]
    image, bboxes = self.load_image_and_boxes(image_meta, image_boxes)
    labels = torch.ones(bboxes.shape[0], dtype=torch.int64)
    target = {"bboxes": bboxes,
              "labels": labels}
    
    if self.transform:
      sample = self.transform(image=image,
                              bboxes=target["bboxes"],
                              labels=target["labels"])
      image, target = sample['image'], self._box_to_tensor(sample, target)
    return image, target

In [None]:
def collate_fn(batch):
    return tuple(zip(*batch)) 

train_dataset = TrainDataset(meta_data=train_df,
                             boxes=train_annotations,
                             image_size=TRAIN_SIZE,
                             transform=get_train_transforms,
                             default_transform=get_default_transform)
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=global_config.batch_size,
    num_workers=global_config.num_workers,
    sampler=RandomSampler(train_dataset),
    pin_memory=False,
    drop_last=True,
    collate_fn=collate_fn)


def get_val_dataset_dict(df,
                     annotations,
                     dataset,
                     get_transforms,
                     size_dict):
  dataset_dict = {}
  dataset_loaders = {}
  for csv in df["csv"].unique():
    csv_df = df[df["csv"] == csv].sort_values('frame_id').reset_index(drop=True)
    img_height, img_width = csv_df["img_height"].iloc[0], csv_df["img_width"].iloc[0]
    annot = annotations[annotations["csv"] == csv]
    dataset_dict[csv] = dataset(meta_data=csv_df,
                                boxes=annot,
                                image_size=size_dict[csv],
                                transform=get_transforms,
                                train=True)
    dataset_loaders[csv] = torch.utils.data.DataLoader(dataset_dict[csv], 
                                                       batch_size=global_config.batch_size,
                                                       num_workers=global_config.num_workers,
                                                       sampler=SequentialSampler(dataset_dict[csv]),
                                                       shuffle=False,
                                                       pin_memory=False,
                                                       collate_fn=collate_fn)
  return dataset_dict, dataset_loaders
val_datasets, val_loaders = get_val_dataset_dict(val_df,
                                                 val_annotations,
                                                 ValDataset,
                                                 get_val_transform,
                                                 size_dict=sizes)

In [None]:
class AverageMeter:
  """Computes and stores the average and current value"""
  def __init__(self):
    self.reset()

  def reset(self):
    self.current = 0
    self.avg = 0
    self.sum = 0
    self.count = 0

  def update(self, val, n=1, avg=False):
    self.current = val
    self.sum += val * n if avg else val
    self.count += n
    self.avg = self.sum / self.count
    
  def concat(self, other_meter):
    self.current += other_meter.current
    self.sum += other_meter.sum
    self.count += other_meter.count
    self.avg = self.sum / self.count
        
class LossMeter():
  def __init__(self, loss=None, class_loss=None, box_loss=None):
    self.loss = loss if loss is not None else AverageMeter()
    self.class_loss = class_loss if class_loss is not None else AverageMeter()
    self.box_loss = box_loss if box_loss is not None else AverageMeter()

  def update(self, output, n=1, avg=False):
    self.loss.update(output['loss'].detach().item(), n, avg)
    self.class_loss.update(output['class_loss'].detach().item(), n, avg)
    self.box_loss.update(output['box_loss'].detach().item(), n, avg)

  def concat(self, other_meter):
    self.loss.concat(other_meter.loss)
    self.class_loss.concat(other_meter.class_loss)
    self.box_loss.concat(other_meter.box_loss)

In [None]:
def load_net(checkpoint_path, d_size, image_size):
    config = get_efficientdet_config(f'tf_efficientdet_d{d_size}_ap')
    config.num_classes = 1
    config.image_size=image_size
    net = EfficientDet(config, pretrained_backbone=False)
    net.reset_head(num_classes=copy_config.num_classes)
    net = ExtendDetBenchTrain(net, copy_config)

    net.class_net = HeadNet(config, num_outputs=config.num_classes)

    checkpoint = torch.load(checkpoint_path)
    net.to(torch.device('cuda:0'))
    net.load_state_dict(checkpoint['model_state_dict'])

    del checkpoint
    net = DetBenchPredict(net)
    net.eval()
    return net.cuda()

In [None]:
class Fitter:
    
    def __init__(self, model, val_model, device, config, d_size, start_epoch=0):
      self.config = config
      self.start_epoch = start_epoch
      
      self.base_dir = config.folder
      if not os.path.exists(self.base_dir):
        os.makedirs(self.base_dir)
        
      self.log_path = f"{self.base_dir}/log.txt"
      self.best_summary_loss = None

      self.model = model
      self.val_model = val_model
      self.device = device

      self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=config.lr,
                                         weight_decay=4e-5)
      self.scheduler = config.SchedulerClass(self.optimizer, **config.scheduler_params)
      self.log(f"Fitter prepared. Device is {self.device}")

    def _print_line(self, summary_loss, step, total_steps, stage, t):
      print(
          f"{stage} Step {step}/{total_steps}, " + \
          f"summary_loss: {summary_loss.loss.avg:.5f}, " + \
          f"class_loss: {summary_loss.class_loss.avg:.5f}, " + \
          f"box_loss: {summary_loss.box_loss.avg:.5f}, " + \
          f"time: {(time.time() - t):.5f}")
      
    def _log_line(self, summary_loss, epoch, stage, t):
      return f"{stage} Epoch: {epoch}, " + \
             f"summary loss: {summary_loss.loss.avg:.5f}, " +\
             f"class loss: {summary_loss.class_loss.avg:.5f}, " + \
             f"box_loss: {summary_loss.box_loss.avg:.5f}, " + \
             f"time: {(time.time() - t):.5f}"

    def _avg_loss(self, loss_list):
      loss, class_loss, box_loss = AverageMeter(), AverageMeter(), AverageMeter()
      for l in loss_list:
        loss.update(l.loss.avg)
        class_loss.update(l.class_loss.avg)
        box_loss.update(l.box_loss.avg)
        
      return LossMeter(loss, class_loss, box_loss)

    

    def fit(self, train_loader, validation_loaders):
      if self.start_epoch > 0 and not self.best_summary_loss:
        self.best_summary_loss = self.validation(validation_loader)

      for epoch in range(self.start_epoch, self.config.n_epochs):
        if self.config.verbose:
          lr = self.optimizer.param_groups[0]["lr"]
          timestamp = datetime.utcnow().isoformat()
          self.log(f"\n{timestamp}\nLR: {lr}")

        t = time.time()
        summary_loss = self.train_one_epoch(train_loader)

        self.log(self._log_line(summary_loss, epoch, "Train", t))
        self.save(f"{self.base_dir}/last-checkpoint.bin", epoch)

        t = time.time()
        for k in self.val_model:
          self.val_model[k].model.load_state_dict(self.model.model.state_dict())
        ## Each validation video is processed separately
        summary_losses =  {k: self.validation(self.val_model[k], vl) for k, vl in validation_loaders.items()}

        for k, sl in summary_losses.items():
          self.log(self._log_line(sl, epoch, f"Val {k}", t))
        total_summary_loss = self._avg_loss(summary_losses.values())
        self.log(self._log_line(total_summary_loss, epoch, "Val", t))
       
        if not self.best_summary_loss or total_summary_loss.loss.avg < self.best_summary_loss:
          self.best_summary_loss = total_summary_loss.loss.avg
          self.model.eval()
          self.save(f"{self.base_dir}/best-checkpoint-{str(epoch).zfill(3)}epoch.bin", epoch)
          for path in sorted(glob(f"{self.base_dir}/best-checkpoint-*epoch.bin"))[:-3]:
            os.remove(path)

        self.scheduler.step()

    def train_one_epoch(self, train_loader):
      self.model.train()
      summary_loss = LossMeter()
      t = time.time()
      for step, (images, targets) in enumerate(train_loader):
        if self.config.verbose and step % self.config.verbose_step == 0:
          self._print_line(summary_loss, step, len(train_loader), "Train", t)
        images = torch.stack(images)
        images = images.to(self.device).float()
        batch_size = images.shape[0]
        bboxes = [target["bboxes"].to(self.device).float() for target in targets]
        labels = [target["labels"].to(self.device).float() for target in targets]

        self.optimizer.zero_grad()
        
        output = self.model(images, {"bbox": bboxes, "cls": labels})
            
        output["loss"].backward()

        summary_loss.update(output, batch_size, avg=True)

        self.optimizer.step()
        
      return summary_loss

    def validation(self, val_model, val_loader):
      val_model.eval()
      summary_loss = LossMeter()
      t = time.time()
      for step, (images, targets) in enumerate(val_loader):
        with torch.no_grad():
          images = torch.stack(images)
          batch_size = images.shape[0]
          images = images.to(self.device).float()
          bboxes = [target["bboxes"].to(self.device).float() for target in targets]
          labels = [target["labels"].to(self.device).float() for target in targets]

          output = val_model(images, {"bbox": bboxes, "cls": labels})
          summary_loss.update(output, batch_size, avg=True)
      return summary_loss


    
    def save(self, path, epoch):
      self.model.eval()
      torch.save({
          "model_state_dict": self.model.model.state_dict(),
          "optimizer_state_dict": self.optimizer.state_dict(),
          "scheduler_state_dict": self.scheduler.state_dict(),
          "best_summary_loss": self.best_summary_loss,
          "epoch": epoch,
          }, path)

    def load(self, path):
      checkpoint = torch.load(path)
      self.model.model.load_state_dict(checkpoint["model_state_dict"])
      self.optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
      self.scheduler.load_state_dict(checkpoint["scheduler_state_dict"])
      self.best_summary_loss = checkpoint["best_summary_loss"]
      self.start_epoch = checkpoint["epoch"] + 1
        
    def log(self, message):
      if self.config.verbose:
        print(message)
      with open(self.log_path, "a+") as logger:
        logger.write(f"{message}\n")

In [None]:
class ExtendDetBenchTrain(DetBenchTrain):

  def __init__(self, model, config):
    super(ExtendDetBenchTrain, self).__init__(model, config)

  def forward(self, x, target):
    class_out, box_out = self.model(x)
    cls_targets, box_targets, num_positives = self.anchor_labeler.batch_label_anchors(
        target['bbox'], target['cls'])
    loss, class_loss, box_loss = self.loss_fn(class_out, box_out, cls_targets, box_targets, num_positives)
    output = dict(loss=loss, class_loss=class_loss, box_loss=box_loss)
    return output

In [None]:
def get_net(epoch=EPOCH,
            global_config=global_config,
            num_classes=NUM_CLASSES,
            image_size=TRAIN_SIZE,
            val_sizes=sizes,
            d_size=D_SIZE):
  device = torch.device('cuda:0')
  enet_config = get_efficientdet_config(f'tf_efficientdet_d{d_size}_ap')
  enet_config.image_size = [image_size, image_size]
  copy_config = enet_config.copy()
  copy_config.num_classes = num_classes
  val_net = {}
  for csv in sizes:
    val_config = get_efficientdet_config(f'tf_efficientdet_d{d_size}_ap')
    val_config.image_size = val_sizes[csv]
    val_net[csv] = EfficientDet(val_config, pretrained_backbone=False)
    val_net[csv].reset_head(num_classes=copy_config.num_classes)
    val_net[csv] = ExtendDetBenchTrain(val_net[csv], copy_config)
    val_net[csv].to(device)
  net = EfficientDet(enet_config, pretrained_backbone=False)
  if epoch == 0:
    checkpoint = torch.load(f"{DRIVE}/{EDETS[d_size]}")
    net.load_state_dict(checkpoint)
    net.reset_head(num_classes=copy_config.num_classes)
    net = ExtendDetBenchTrain(net, copy_config)
    net.to(device)
    fitter = Fitter(model=net,
                    val_model=val_net,
                    device=device, config=global_config, start_epoch=0, d_size=d_size)
  else:
    net.reset_head(num_classes=copy_config.num_classes)
    net = DetBenchTrain(net, copy_config)
    net.to(device)
    fitter = Fitter(model=net,
                    val_model=val_net,
                    device=device, config=global_config, start_epoch=0)
    fitter.load(f"{global_config.folder}/best-checkpoint-{str(epoch).zfill(3)}epoch.bin")
    

  return fitter

fitter = get_net()

Adjusting learning rate of group 0 to 2.0000e-04.
Fitter prepared. Device is cuda:0


In [None]:
fitter.fit(train_loader, val_loaders)


2021-06-01T02:44:17.699503
LR: 0.0002
Train Step 0/3810, summary_loss: 0.00000, class_loss: 0.00000, box_loss: 0.00000, time: 1.01961
Train Step 250/3810, summary_loss: 1.11676, class_loss: 0.78671, box_loss: 0.00660, time: 74.07296
Train Step 500/3810, summary_loss: 1.00734, class_loss: 0.69261, box_loss: 0.00629, time: 147.07250
Train Step 750/3810, summary_loss: 0.93901, class_loss: 0.63560, box_loss: 0.00607, time: 220.22538
Train Step 1000/3810, summary_loss: 0.90589, class_loss: 0.60695, box_loss: 0.00598, time: 293.19583
Train Step 1250/3810, summary_loss: 0.87142, class_loss: 0.57889, box_loss: 0.00585, time: 365.90084
Train Step 1500/3810, summary_loss: 0.83781, class_loss: 0.55185, box_loss: 0.00572, time: 438.90154
Train Step 1750/3810, summary_loss: 0.81765, class_loss: 0.53575, box_loss: 0.00564, time: 511.44036
Train Step 2000/3810, summary_loss: 0.80628, class_loss: 0.52760, box_loss: 0.00557, time: 584.93818
Train Step 2250/3810, summary_loss: 0.79921, class_loss: 0.52