In [1]:
%cd ..

/environment/Projects/dl


In [2]:
! python -m pip install deps/light-torch/

Processing ./deps/light-torch
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hBuilding wheels for collected packages: light-torch
  Building wheel for light-torch (pyproject.toml) ... [?25ldone
[?25h  Created wheel for light-torch: filename=light_torch-0.0.1-py3-none-any.whl size=6012 sha256=4737ed5f6ab9160581810f3a888789da038982d4afe589ef7f87d9b95704decc
  Stored in directory: /home/stefan-cristianhantig/.cache/pip/wheels/76/4b/a5/ce53c36ade717c024783dce5309d843ab5a592d06a292dafc2
Successfully built light-torch
Installing collected packages: light-torch
  Attempting uninstall: light-torch
    Found existing installation: light-torch 0.0.1
    Uninstalling light-torch-0.0.1:
      Successfully uninstalled light-torch-0.0.1
Successfully installed light-torch-0.0.1


In [3]:
from light_torch.module import Module

In [4]:
from src.object_detection.yolov1 import YoloV1Backbone, YoloV1, ObjectDetectionModule

In [5]:
from src.classification.models.resnet import Resnet

In [6]:
from torchsummary import summary

In [7]:
import json

In [8]:
TRAIN_ANNOTATION_FILE = "resources/object_detection/annotations/instances_train2014.json"
VAL_ANNOTATION_FILE = "resources/object_detection/annotations/instances_val2014.json"

In [9]:
with open(TRAIN_ANNOTATION_FILE, "r") as json_file:
    train_coco = json.load(json_file)
    
with open(VAL_ANNOTATION_FILE, "r") as json_file:
    val_coco = json.load(json_file)

In [10]:
import cv2

In [11]:
from pathlib import Path

In [12]:
import matplotlib.pyplot as plt

In [13]:
TRAIN_IMAGES_PATH = Path("./resources/object_detection/train2014/")
VAL_IMAGES_PATH = Path("./resources/object_detection/val2014/")

In [14]:
import numpy as np

In [15]:
from torch.utils.data.dataset import Dataset

In [16]:
from functools import lru_cache

In [17]:
from torchvision.transforms import Compose

In [18]:
from IPython.core.debugger import set_trace

In [19]:
from src.object_detection.data.coco import CocoDetectionDataset, make_id2category_map, collect_annotations

In [20]:
from src.object_detection.utils.visualization import draw_bbox_xyhw, draw_rect_xywh_, draw_grid_, draw_boxes_tlbr_

In [21]:
import torch

In [22]:
from src.transform import to_rgb, to_torch, resize

In [23]:
def convert_to_torch(x):
    if not isinstance(x, np.ndarray):
        x = np.array(x)
    return torch.from_numpy(x)

In [24]:
def target_to_torch(target):
    if isinstance(target, dict):
        return {k: convert_to_torch(t) for k, t in target.items()}
    return convert_to_torch(target)

In [25]:
img_transform = Compose([
    to_torch,
])

In [26]:
def select_categories(annotation_dict, categories=None):
    return annotation_dict["category_id"] in categories
    

In [27]:
from functools import partial

In [29]:
def transpose(xss):
    return list(zip(*xss))

In [30]:
from torch.nn import functional as F

In [31]:
from collections import defaultdict

In [32]:
def pad_collate(batch):
    imgs, targets = transpose(batch)
    _, h_max, w_max = np.max([np.array(img.shape) for img in imgs], axis=0)
    padded_imgs = []
    
    padded_targets = defaultdict(list)
    for img, target in zip(imgs, targets):
        target = target_to_torch(target)
        _, h, w = img.shape
        pad_h, pad_w = h_max - h, w_max - w
        img = F.pad(img, (0, pad_w, 0, pad_h), "constant", 0)
        padded_imgs.append(img)
        if len(target):
            target["boxes"] = target["boxes"] * torch.from_numpy(np.array([h / h_max, w/w_max, h/h_max, w/w_max]))
        for name, v in target.items():
            padded_targets[name].append(v)
        
    
    imgs = torch.stack(padded_imgs)
    return imgs, padded_targets
    

In [33]:
from torch.utils.data.dataloader import DataLoader
coco_train_ds = CocoDetectionDataset(
    train_coco, 
    TRAIN_IMAGES_PATH,
    transform=to_torch,
    target_transform=target_to_torch,
    select_annotation=partial(select_categories, categories=[78, 79]),
    
)

val_train_ds = CocoDetectionDataset(
    val_coco, 
    VAL_IMAGES_PATH,
    transform=to_torch,
    target_transform=target_to_torch,
    select_annotation=partial(select_categories, categories=[78, 79]),
    
)
train_dl = DataLoader(coco_train_ds, collate_fn=pad_collate, batch_size=2, shuffle=True)
val_dl = DataLoader(val_train_ds, collate_fn=pad_collate, batch_size=2, shuffle=True)

In [36]:
#resnet = Resnet()
yolo = YoloV1(coco_train_ds.categories_num)

In [37]:
from src.object_detection.yolov1 import YoloV1Loss

In [38]:
yolov1_loss = YoloV1Loss()

In [39]:
from collections import defaultdict

In [40]:
def transpos_dict(dict_list):
    list_dict = defaultdict(list)
    for d in dict_list:
        for k, v in d.items():
            list_dict[k].append(v)
    return list_dict

In [41]:
def pad_images_by_hw(img, max_h, max_w):
    c, h, w = img.shape
    pad_h = max_h - h
    pad_w = max_w - w
    return F.pad(img, (0, pad_w, 0, pad_h), "constant", 0)

In [42]:
from src.object_detection.yolov1 import YoloV1Loss

In [43]:
yv1_loss = YoloV1Loss()

In [44]:
from light_torch.train import Trainer

In [46]:
obj_detection = ObjectDetectionModule(yolo, yolov1_loss, coco_train_ds.id2name_map, iou_threshold=0.2)

In [47]:
optimizer = torch.optim.SGD(obj_detection.parameters(), lr=0.001)

In [48]:
trainer = Trainer(
    obj_detection, 
    epochs=10,
    accumulation=4,
    optimizer=optimizer,
    name="yolov1",
)

In [49]:
trainer.train()

Trainer(
  (module): ObjectDetectionModule(
    (model): YoloV1(
      (backbone): YoloV1Backbone(
        (st_conv): Conv2dSamePadding(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
        (st_maxpol): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
        (net): Sequential(
          (conv2): Conv2dSamePadding(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (activation2): LeakyReLU(negative_slope=0.01)
          (pool2): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
          (identity_conv31): Conv2dSamePadding(192, 128, kernel_size=(1, 1), stride=(1, 1))
          (conv31): Conv2dSamePadding(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (activation31): LeakyReLU(negative_slope=0.01)
          (identity_conv32): Conv2dSamePadding(256, 256, kernel_size=(1, 1), stride=(1, 1))
          (conv32): Conv2dSamePadding(256, 512, kernel_size=(3, 3), stride=(1, 1), pad

In [50]:
trainer.loggers

[CliLog()]

In [51]:
trainer.fit(train_dl, val_data=val_dl)

Train epoch  001:   0%|          | 0/1227 [00:00<?, ?it/s]

STAGE TRAIN: Epoch:  001
{'box_loss': 30.94063367703909, 'classification_loss': 7.594014616083493, 'confidence_loss': 3.495257728081881, 'loss': 42.02990602120437}


AttributeError: 'ObjectDetectionModule' object has no attribute '_init'

In [51]:
with torch.no_grad():
    obj_detection.val_step(batch)

In [52]:
obj_detection.on_val_end()

In [54]:
obj_detection.get_batch_log()

{'box_loss': 49.54795295827014,
 'classification_loss': 11.735513687133789,
 'confidence_loss': 12.811930656433105,
 'loss': 74.09539730183704,
 'mAP_0.5': 0.0,
 'mAP_0.55': 0.0,
 'mAP_0.6': 0.0,
 'mAP_0.65': 0.0,
 'mAP_0.7': 0.0,
 'mAP_0.75': 0.0,
 'mAP_0.8': 0.0,
 'mAP_0.85': 0.0,
 'mAP_0.9': 0.0,
 'mAP_0.95': 0.0}

In [53]:
obj_detection.get_epoch_log()

{'box_loss': 49.54795295827014,
 'classification_loss': 11.735513687133789,
 'confidence_loss': 12.811930656433105,
 'loss': 74.09539730183704,
 'mAP_0.5': 0.0,
 'mAP_0.55': 0.0,
 'mAP_0.6': 0.0,
 'mAP_0.65': 0.0,
 'mAP_0.7': 0.0,
 'mAP_0.75': 0.0,
 'mAP_0.8': 0.0,
 'mAP_0.85': 0.0,
 'mAP_0.9': 0.0,
 'mAP_0.95': 0.0}