In [1]:
import math
import importlib

import torch
import timm
import cv2
import albumentations as A
import matplotlib.pyplot as plt
from torch import optim
from albumentations.pytorch import ToTensorV2
from torch.optim import lr_scheduler 


import Custom_dataset
import anchor
import backbone
from CustomAugment import Cutmix
from configuration import dataset_config
from configuration import dataloader_config
from configuration import OptimizerConfig
from configuration import augmentation_config
from configuration import training_config




  from .autonotebook import tqdm as notebook_tqdm


In [2]:

torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated() 


image_dir = dataset_config.image_dir
label_dir = dataset_config.label_dir
classes = dataset_config.classes
img_size = dataset_config.image_size

image_test =Custom_dataset.ListDataset(image_dir,
                                       label_dir,
                                       classes,
                                       transform=augmentation_config.transform)

# RabdinSized BBox Safe Crop has box bigger than 1% of image size
train_transform = A.Compose([
    A.RandomSizedBBoxSafeCrop(width=img_size,height=img_size,erosion_rate=0.8),
    A.HorizontalFlip(p=0.3),
    A.VerticalFlip(p=0.2),
], bbox_params=A.BboxParams(format='yolo', # Specify input format
                           label_fields=['class_labels'], # Specify label argument name(s)
                            ))

final_transform = A.Compose([
    A.AdditiveNoise(noise_type="gaussian",
                    spatial_mode="constant",
                    noise_params={"mean_range": (0.0, 0.0), "std_range": (0.05, 0.15)}),
    A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, p=1.0),
    A.Affine(translate_percent={'x': (-0.2, 0.2), 'y': (-0.2, 0.2)},  # x축 10~20%, y축 -20~20% 랜덤 이동
             p=0.3),
    A.Affine(rotate = (-20,20), p = 0.5),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
], bbox_params=A.BboxParams(format='yolo', # Specify input format
                           label_fields=['class_labels'], # Specify label argument name(s)
                           ))

aug_para ={"alpha":augmentation_config.alpha,
           "lambda":augmentation_config.lamda,
           "prob":augmentation_config.prob}


image_test.add_agumentation(train_transform,aug_para,final_transform)

set_size = len(image_test)
train_dataset_size = int(0.8*set_size)
test_dataset_size = set_size-train_dataset_size

train_indces, valid_indces = torch.utils.data.random_split(range(set_size), [train_dataset_size, test_dataset_size])
print("set size: {0}, test_set size : {1}".format(train_dataset_size,test_dataset_size))


train_dataset = torch.utils.data.Subset(image_test, train_indces)
valid_dataset = torch.utils.data.Subset(image_test, train_indces)
# This problem is occured in Dater type interupt between array and tensor

batch_size = dataloader_config.batch_size
num_workers = dataloader_config.num_workers

train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=dataloader_config.batch_size,
                                               num_workers=dataloader_config.num_workers,
                                               shuffle = dataloader_config.shuffle,
                                               collate_fn = image_test.collate_fn)

valid_dataloader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=dataloader_config.batch_size,
                                               num_workers=dataloader_config.num_workers,
                                               collate_fn = image_test.collate_fn)

  


model = backbone.YoloV4Model(num_classes=len(dataset_config.classes),
                             to_vector = True)


model.train()
for epoch in range(training_config.epochs):
    for i, data in enumerate(train_dataloader):
        print(f"batch shape: {data['image'].shape}")
        print(f"bboxes shape: {data['bboxes'].shape}")
        print(f"class_labels shape: {data['class_labels'].shape}")
        print(f"image shape: {data['image'].shape}")
        print("\n")
        gt_datas = data
        output = model(data['image'])
        break
    break



'''
for i, data in enumerate(train_dataset):
    print(f"\n🌀 step : {i}")
    print(f"bboxes shape: {data['bboxes'].shape}")
    print(f"class_labels shape: {data['class_labels'].shape}")
    
    result_bboxes, result_labels = image_test.encoder.encoder(data['bboxes'], data['class_labels'])
    
    print(f"result_bboxes shape: {result_bboxes.shape}")
    print(f"result_labels shape: {result_labels.shape}")
    if i == 5:
        break
'''





set size: 1084, test_set size : 272
batch shape: torch.Size([32, 3, 448, 448])
bboxes shape: torch.Size([32, 12348, 4])
class_labels shape: torch.Size([32, 12348])
image shape: torch.Size([32, 3, 448, 448])


backbone output : [torch.Size([32, 32, 448, 448]), torch.Size([32, 64, 224, 224]), torch.Size([32, 128, 112, 112]), torch.Size([32, 256, 56, 56]), torch.Size([32, 512, 28, 28]), torch.Size([32, 1024, 14, 14])]
P_large_output : torch.Size([32, 512, 14, 14])
route_3 interpolation  : torch.Size([32, 256, 28, 28])
before route_2  : torch.Size([32, 512, 28, 28])
after route_2  : torch.Size([32, 256, 28, 28])
torch.Size([32, 256, 28, 28])
large : torch.Size([32, 21, 14, 14]), mid : torch.Size([32, 21, 28, 28]), small : torch.Size([32, 21, 56, 56])


'\nfor i, data in enumerate(train_dataset):\n    print(f"\n🌀 step : {i}")\n    print(f"bboxes shape: {data[\'bboxes\'].shape}")\n    print(f"class_labels shape: {data[\'class_labels\'].shape}")\n    \n    result_bboxes, result_labels = image_test.encoder.encoder(data[\'bboxes\'], data[\'class_labels\'])\n    \n    print(f"result_bboxes shape: {result_bboxes.shape}")\n    print(f"result_labels shape: {result_labels.shape}")\n    if i == 5:\n        break\n'

In [3]:
print(output[0].shape)
print(output[1].shape)
print(output[2].shape)
result = torch.cat((output[0],output[1],output[2]),dim=1)
print(result.shape)
#내일 아침에 일어나서 확인해봐야될거 -> result 텐서크기 확인인

torch.Size([32, 588, 7])
torch.Size([32, 2352, 7])
torch.Size([32, 9408, 7])
torch.Size([32, 12348, 7])


In [4]:

# Define total batch size 
total_batch_size = math.ceil(train_dataset_size / dataloader_config.batch_size)  # total batch size for all GPUs, accumulate, and gradient steps

nbs = 64  # nominal batch size
accumulate = max(round(nbs / total_batch_size), 1)  # accumulate loss before optimizing
modified_weight_decay= OptimizerConfig.weight_decay * total_batch_size * accumulate / nbs  # scale weight_decay
optimizer = optim.Adam(params = model.parameters(),
                       lr=OptimizerConfig.lr0, 
                       weight_decay=modified_weight_decay, #L2 regularization
                       betas=(0.9,0.999) )  # adjust beta1 to momentum


# Scheduler https://arxiv.org/pdf/1812.01187.pdf
# https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR
warmup_end_lr =  (1-OptimizerConfig.lrf)/2+OptimizerConfig.lrf  # warmup end learning rate
warmup_scheduler = lr_scheduler.LinearLR(optimizer, 
                                         start_factor=OptimizerConfig.lr0, 
                                         end_factor= warmup_end_lr,
                                         total_iters=training_config.warmup_epochs)

# Cosine decay after warmup
epochs_cosine = training_config.epochs - training_config.warmup_epochs
lf = lambda x: ((1 + math.cos(x * math.pi / epochs_cosine)) / 2) * (1 - OptimizerConfig.lrf) + OptimizerConfig.lrf  # cosine
cosine_scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)


In [5]:
from loss import DetectionLoss

In [6]:
from loss import DetectionLoss

pred_loc = result[:,:,:4]
pred_obj = result[:,:,4]
prod_cls = result[:,:,5:]


gt_bboxes = gt_datas['bboxes']
gt_class_labels = gt_datas['class_labels']
gt_class_labels = gt_class_labels.view(gt_class_labels.shape[0],
                                       gt_class_labels.shape[1],
                                       1)
gt = torch.cat((gt_bboxes,gt_class_labels),dim=2)
print(f"gt shape: {gt.shape}")
print(f"result shape: {result.shape}")

loss = DetectionLoss(num_classes=len(dataset_config.classes))
loss_value = loss.forward(result, gt)



gt shape: torch.Size([32, 12348, 5])
result shape: torch.Size([32, 12348, 7])
iou shape: torch.Size([32, 12348])
c shape: torch.Size([32, 12348])
d shape: torch.Size([32, 12348])
v shape: torch.Size([32, 12348])
v shape: torch.Size([32, 12348])
alpha shape: torch.Size([32, 12348])


RuntimeError: result type Float can't be cast to the desired output type Long