## Modifizierter YOLO Algorithmus
Valentino Golob

##### Aufbereitung der Daten
- Hier sollte man noch eine geschickter Aufteilung wählen
- Eine einfache Möglichkeit wäre hier sich an der Uhrzeit zu orientiern, setzt voraus dass die Lichtverhältnisse mit der Uhrzeit korrelieren  

In [None]:
import random

random.seed(3)
lines = open('PathImages.txt').readlines()
random.shuffle(lines)
open('PathImages_train.txt', 'w').writelines(lines[:1600])
open('PathImages_test.txt', 'w').writelines(lines[1600:])

##### Umwandlung von absoluten Bezügen in relativ normierten Bezugsystem
Die Positionsbestimmung der Bounding Boxes (BB) in dem unbearbeiteten Datensatz erfolgt in absoluten Pixel Koordinaten. Um die Anwendung und Implementierung bestehender Algorithmen zu erleichtern, werden die Werte zur Bestimmung der lokalen Position und Größe der BB in einem Preprocessing Schritt in relative Koordinaten umgewandelt.

In [None]:
"""
creates all the labels (bounding boxes) for the corresponding images
all the values are normalized to values between 0 and 1
example: 
    image name: 2020_06_26_06_02_33.png
    label name: 2020_06_26_06_02_33.txt
structure of the 2020_06_26_06_02_33.txt (label file):
    - multiple rows for each bounding box
    - 0 0.6807191568505889 0.21293800539083557 0.030998140111593304 0.16711590296495957
    - class_id, x_center, y_center, width, height
    
"""

import os
import json

# open manifest data
file_manifest = open('label.txt', 'r')

count = 0
for json_file in file_manifest:
    dict = json.loads(json_file)
    file_name = dict['source-ref'].split('/')[-1].replace('png','txt').rstrip()
    image_depth = dict['bee-labeling-2k-batch-01']['image_size'][0]['depth']
    image_width = dict['bee-labeling-2k-batch-01']['image_size'][0]['width']
    image_height = dict['bee-labeling-2k-batch-01']['image_size'][0]['height']
    image_human_annotated = dict['bee-labeling-2k-batch-01-metadata']['human-annotated'].rstrip()

    if len(dict['bee-labeling-2k-batch-01']['annotations']) != 0:
        class_id, xc, yc, top, left, height, width = ([] for _ in range(7))
        for dict_item in dict['bee-labeling-2k-batch-01']['annotations']:
            class_id.append(dict_item['class_id'])
            top.append(dict_item['top'])
            left.append(dict_item['left'])
            height.append((dict_item['height'])/image_height)
            width.append((dict_item['width'])/image_width)
            xc.append((dict_item['left'] + (dict_item['width']/2))/image_width)
            yc.append((dict_item['top'] + (dict_item['height']/2))/image_height)

        path_file_name = os.path.join('../data/labels', file_name)
        with open(path_file_name, 'w') as file_label:
            for i in range(len(class_id)):
                file_label.write("{} {} {} {} {}\n".format(int(class_id[i]), xc[i], yc[i], width[i], height[i]))

##### txt. Datei mit den Dateipfaden zu den Bildern

In [None]:
path = os.path.join('../data/images')

dirList = os.listdir(path)

#f = open('PathImages_' + path_option + '.txt', 'w')
f = open('PathImages.txt', 'w')
for filename in dirList:
    f.write('./data/images/' + filename + '\n')
    #f.write('./data_'+ path_option + '/images/' + filename + '\n')

f.close()

##### Import notwendiger Module

In [None]:
import os
import sys
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from PIL import Image
import torchvision.transforms.functional as TF
import random
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from collections import Counter
import pandas as pd
import optuna
import csv

##### Convert data labels

In [None]:
"""
creates all the labels (bounding boxes) for the corresponding images
all the values are normalized to values between 0 and 1
example: 
    image name: 2020_06_26_06_02_33.png
    label name: 2020_06_26_06_02_33.txt
structure of the 2020_06_26_06_02_33.txt (label file):
    - multiple rows for each bounding box
    - 0 0.6807191568505889 0.21293800539083557 0.030998140111593304 0.16711590296495957
    - class_id, x_center, y_center, width, height
    
"""

import os
import json

# open manifest data
file_manifest = open('label.txt', 'r')

count = 0
for json_file in file_manifest:
    dict = json.loads(json_file)
    file_name = dict['source-ref'].split('/')[-1].replace('png','txt').rstrip()
    image_depth = dict['bee-labeling-2k-batch-01']['image_size'][0]['depth']
    image_width = dict['bee-labeling-2k-batch-01']['image_size'][0]['width']
    image_height = dict['bee-labeling-2k-batch-01']['image_size'][0]['height']
    image_human_annotated = dict['bee-labeling-2k-batch-01-metadata']['human-annotated'].rstrip()

    if len(dict['bee-labeling-2k-batch-01']['annotations']) != 0:
        class_id, xc, yc, top, left, height, width = ([] for _ in range(7))
        for dict_item in dict['bee-labeling-2k-batch-01']['annotations']:
            class_id.append(dict_item['class_id'])
            top.append(dict_item['top'])
            left.append(dict_item['left'])
            height.append((dict_item['height'])/image_height)
            width.append((dict_item['width'])/image_width)
            xc.append((dict_item['left'] + (dict_item['width']/2))/image_width)
            yc.append((dict_item['top'] + (dict_item['height']/2))/image_height)

        path_file_name = os.path.join('../data/labels', file_name)
        with open(path_file_name, 'w') as file_label:
            for i in range(len(class_id)):
                file_label.write("{} {} {} {} {}\n".format(int(class_id[i]), xc[i], yc[i], width[i], height[i]))

##### Parameter

In [None]:
torch.cuda.empty_cache()

DEVICE = "cuda:0" if torch.cuda.is_available else "cpu"
BATCH_SIZE = 8 # 64 in original paper but I don't have that much vram
#WEIGHT_DECAY = 0
EPOCHS = 500
NUM_WORKERS = 2
PIN_MEMORY = True
LOAD_MODEL = False
LOAD_MODEL_FILE = "overfit.pth.tar"
SPLIT_SIZE = (12, 48)

##### Model - angepasst nach Quelle: [Aladdin Persson - YOLO/model.py](https://github.com/aladdinpersson/Machine-Learning-Collection/blob/master/ML/Pytorch/object_detection/YOLO/model.py)

In [None]:
class CNNBlock(nn.Module):
    def __init__(self, in_channels, out_channels, **kwargs):
        super(CNNBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs) # bias to false, it is already included in the batch norm
        self.batchnorm = nn.BatchNorm2d(out_channels)
        self.leakyrelu = nn.LeakyReLU(0.1)

    def forward(self, x):
        return self.leakyrelu(self.batchnorm(self.conv(x)))


class Yolov1(nn.Module):
    def __init__(self, hidden_size_FFN, architecture_config, in_channels=3, **kwargs):
        super(Yolov1, self).__init__()
        self.hidden_size_FFN = hidden_size_FFN
        self.architecture = architecture_config
        self.in_channels = in_channels
        self.darknet = self._create_conv_layers(self.architecture)
        self.fcs = self._create_fcs(**kwargs)

    def forward(self, x):
        x = self.darknet(x)
        return self.fcs(torch.flatten(x, start_dim=1))

    def _create_conv_layers(self, architecture):
        layers = []
        in_channels = self.in_channels

        for x in architecture:
            if type(x) == tuple:
                layers += [
                    CNNBlock(
                        in_channels, x[1], kernel_size=x[0], stride=x[2], padding=x[3],
                    )
                ]
                in_channels = x[1]

            elif type(x) == str:
                layers += [nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))]

            elif type(x) == list:
                conv1 = x[0]
                conv2 = x[1]
                num_repeats = x[2]

                for _ in range(num_repeats):
                    layers += [
                        CNNBlock(
                            in_channels,
                            conv1[1],
                            kernel_size=conv1[0],
                            stride=conv1[2],
                            padding=conv1[3],
                        )
                    ]
                    layers += [
                        CNNBlock(
                            conv1[1],
                            conv2[1],
                            kernel_size=conv2[0],
                            stride=conv2[2],
                            padding=conv2[3],
                        )
                    ]
                    in_channels = conv2[1]

        return nn.Sequential(*layers)

    def _create_fcs(self, split_size, num_boxes, num_classes):
        B, C = num_boxes, num_classes
        S1, S2 = split_size

        return nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * S1 * S2, self.hidden_size_FFN),
            nn.Dropout(0),
            nn.LeakyReLU(0.1),
            nn.Linear(self.hidden_size_FFN, S1 * S2 * (C + B * 5)),
        )

##### Loss function - angepasst nach Quelle: [Aladdin Persson - YOLO/loss.py](https://github.com/aladdinpersson/Machine-Learning-Collection/blob/master/ML/Pytorch/object_detection/YOLO/loss.py)

In [None]:
"""
Implementation of Yolo Loss Function from the original yolo paper
"""

class YoloLoss(nn.Module):
    """
    Calculate the loss for the model
    """

    def __init__(self, S=(7,7), B=2, C=1):
        super(YoloLoss, self).__init__()
        self.mse = nn.MSELoss(reduction="sum")

        """
        S is split size of image (in paper 7), in mod. Version S1 and S2
        B is number of boxes (in paper 2),
        C is number of classes, in Bee Project 1
        """
        self.S1, self.S2 = S
        self.B = B
        self.C = C

        # These are from Yolo paper, signifying how much we should
        # pay loss for no object (noobj) and the box coordinates (coord)
        self.lambda_noobj = 0.5
        self.lambda_coord = 5

    def forward(self, predictions, target):
        # predictions are shaped (BATCH_SIZE, S*S(C+B*5) when inputted
        predictions = predictions.reshape(-1, self.S1, self.S2, self.C + self.B * 5)

        # Calculate IoU for the two predicted bounding boxes with target bbox
        iou_b1 = intersection_over_union(predictions[..., 2:6], target[..., 2:6])
        iou_b2 = intersection_over_union(predictions[..., 7:11], target[..., 2:6])
        ious = torch.cat([iou_b1.unsqueeze(0), iou_b2.unsqueeze(0)], dim=0)

        # Take the box with highest IoU out of the two prediction
        # Note that bestbox will be indices of 0, 1 for which bbox was best
        iou_maxes, bestbox = torch.max(ious, dim=0)
        exists_box = target[..., 1].unsqueeze(3)  # in paper this is Iobj_i

        # ======================== #
        #   FOR BOX COORDINATES    #
        # ======================== #

        # Set boxes with no object in them to 0. We only take out one of the two
        # predictions, which is the one with highest Iou calculated previously.
        box_predictions = exists_box * (
            (
                bestbox * predictions[..., 7:11]
                + (1 - bestbox) * predictions[..., 2:6]
            )
        )

        box_targets = exists_box * target[..., 2:6]

        # Take sqrt of width, height of boxes to ensure that
        box_predictions[..., 2:4] = torch.sign(box_predictions[..., 2:4]) * torch.sqrt(
            torch.abs(box_predictions[..., 2:4] + 1e-6)
        )
        box_targets[..., 2:4] = torch.sqrt(box_targets[..., 2:4])

        box_loss = self.mse(
            torch.flatten(box_predictions, end_dim=-2),
            torch.flatten(box_targets, end_dim=-2),
        )

        # ==================== #
        #   FOR OBJECT LOSS    #
        # ==================== #

        # pred_box is the confidence score for the bbox with highest IoU
        pred_box = (
            bestbox * predictions[..., 6:7] + (1 - bestbox) * predictions[..., 1:2]
        )

        object_loss = self.mse(
            torch.flatten(exists_box * pred_box),
            torch.flatten(exists_box * target[..., 1:2]),
        )

        # ======================= #
        #   FOR NO OBJECT LOSS    #
        # ======================= #

        #max_no_obj = torch.max(predictions[..., 20:21], predictions[..., 25:26])
        #no_object_loss = self.mse(
        #    torch.flatten((1 - exists_box) * max_no_obj, start_dim=1),
        #    torch.flatten((1 - exists_box) * target[..., 20:21], start_dim=1),
        #)

        no_object_loss = self.mse(
            torch.flatten((1 - exists_box) * predictions[..., 1:2], start_dim=1),
            torch.flatten((1 - exists_box) * target[..., 1:2], start_dim=1),
        )

        no_object_loss += self.mse(
            torch.flatten((1 - exists_box) * predictions[..., 6:7], start_dim=1),
            torch.flatten((1 - exists_box) * target[..., 1:2], start_dim=1)
        )

        # ================== #
        #   FOR CLASS LOSS   #
        # ================== #

        class_loss = self.mse(
            torch.flatten(exists_box * predictions[..., :1], end_dim=-2,),
            torch.flatten(exists_box * target[..., :1], end_dim=-2,),
        )

        loss = (
            self.lambda_coord * box_loss  # first two rows in paper
            + object_loss  # third row in paper
            + self.lambda_noobj * no_object_loss  # forth row
            + class_loss  # fifth row
        )

        return loss

##### Bienen Datensatz - Code teilweise aus PyTorch Computer Vision Cookbook - Kapitel [Single-Object-Detection](https://github.com/PacktPublishing/PyTorch-Computer-Vision-Cookbook/blob/master/Chapter04/Ch4_Scirpts.ipynb) und Kapitel [Multi-Object-Detection](https://github.com/PacktPublishing/PyTorch-Computer-Vision-Cookbook/blob/master/Chapter05/Chapter%205.ipynb) und [Aladdin Persson - YOLO/dataset.py](https://github.com/aladdinpersson/Machine-Learning-Collection/blob/master/ML/Pytorch/object_detection/YOLO/dataset.py)

In [None]:
class BeeDataset(Dataset):
    def __init__(self, path2listFile, S=(7,7), B=2, C=1, transform=None, trans_params=None):
        with open(path2listFile, "r") as file:
            self.path2imgs = file.readlines()

        self.path2labels = [
            path.replace("images", "labels").replace(".png", ".txt").replace(".jpg", ".txt")
            for path in self.path2imgs]

        self.trans_params = trans_params
        self.transform = transform
        self.S1, self.S2 = S
        self.B = B
        self.C = C

    def __len__(self):
        return len(self.path2imgs)

    def __getitem__(self, index):
        path2img = self.path2imgs[index].rstrip()

        img = Image.open(path2img).convert('RGB')

        path2label = self.path2labels[index].rstrip()

        labels = None
        if os.path.exists(path2label):
            labels = np.loadtxt(path2label).reshape(-1, 5)
        else:
            print(path2label)

        if self.transform:
            img, labels = self.transform(img, labels, self.trans_params)

        # convert to cells
        label_matrix = torch.zeros((self.S1, self.S2, self.C + 5 * self.B)) # here we could remove * self.B
        for box in labels:
            class_label, x, y, w, h = box
            class_label = int(class_label)
            # i,j represents the cell row and cell column
            i, j = int(self.S1 * y), int(self.S2 * x)
            x_cell, y_cell = self.S2 * x - j, self.S1 * y - i

            """
            Calculating the width and height of cell of bounding box,
            relative to the cell is done by the following, with
            width as the example:

            width_pixels = (width*self.image_width)
            cell_pixels = (self.image_width /  self.S2)

            Then to find the width relative to the cell is simply:
            width_pixels/cell_pixels, simplification leads to the
            formulas below.
            """
            width_cell, height_cell = (
                w * self.S2,
                h * self.S1,
            )

            # If no object already found for specific cell i,j
            # Note: This means we restrict to ONE object
            # per cell!
            if label_matrix[i, j, 1] == 0:
                # Set that there exists an object
                label_matrix[i, j, 1] = 1

                # Box coordinates
                box_coordinates = torch.tensor(
                    [x_cell, y_cell, width_cell, height_cell]
                )

                label_matrix[i, j, 2:6] = box_coordinates

                # Set one hot encoding for class_label
                label_matrix[i, j, class_label] = 1

        return img, label_matrix
        

def hflip(image, labels):
    image = TF.hflip(image)
    labels[:, 1] = 1 - labels[:, 1]

    return image, labels
    
 
def vflip(image, labels):
    image = TF.vflip(image)
    labels[:, 2] = 1 - labels[:, 2]

    return image, labels
    
def grayscale(image):
    image = TF.to_grayscale(image, num_output_channels=3)

    return image

def pad_to_square(img, boxes, pad_value=0, normalized_labels=True):
    w, h = img.size
    w_factor, h_factor = (w, h) if normalized_labels else (1, 1)

    dim_diff = np.abs(h - w)
    pad1 = dim_diff // 2
    pad2 = dim_diff - pad1

    if h <= w:
        left, top, right, bottom = 0, pad1, 0, pad2
    else:
        left, top, right, bottom = pad1, 0, pad2, 0
    padding = (left, top, right, bottom)

    img_padded = TF.pad(img, padding=padding, fill=pad_value)
    w_padded, h_padded = img_padded.size

    x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
    y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
    x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
    y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)

    x1 += padding[0]  # left
    y1 += padding[1]  # top
    x2 += padding[2]  # right
    y2 += padding[3]  # bottom

    boxes[:, 1] = ((x1 + x2) / 2) / w_padded
    boxes[:, 2] = ((y1 + y2) / 2) / h_padded
    boxes[:, 3] *= w_factor / w_padded
    boxes[:, 4] *= h_factor / h_padded

    return img_padded, boxes

def transformer(image, labels, params):
    if params["pad2square"] is True:
        image, labels = pad_to_square(image, labels)

    image = TF.resize(image, params["target_size"])

    if random.random() < params["p_hflip"]:
        image,labels=hflip(image,labels)
        
    if random.random() < params["p_vflip"]:
        image,labels=vflip(image,labels)
        
    if params["grayscale"] is True:
        image = grayscale(image)

    image = TF.to_tensor(image)
    targets = torch.zeros((len(labels), 5))
    targets = torch.from_numpy(labels)

    return image, targets

##### Utilis - Code aus [Aladdin Persson - YOLO/utilils.py](https://github.com/aladdinpersson/Machine-Learning-Collection/blob/master/ML/Pytorch/object_detection/YOLO/utils.py) teilweise angepasst

In [None]:
def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint"):
    """
    Calculates intersection over union
    Parameters:
        boxes_preds (tensor): Predictions of Bounding Boxes (BATCH_SIZE, 4)
        boxes_labels (tensor): Correct labels of Bounding Boxes (BATCH_SIZE, 4)
        box_format (str): midpoint/corners, if boxes (x,y,w,h) or (x1,y1,x2,y2)
    Returns:
        tensor: Intersection over union for all examples
    """

    if box_format == "midpoint":
        box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
        box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
        box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
        box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
        box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
        box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
        box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
        box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2

    if box_format == "corners":
        box1_x1 = boxes_preds[..., 0:1]
        box1_y1 = boxes_preds[..., 1:2]
        box1_x2 = boxes_preds[..., 2:3]
        box1_y2 = boxes_preds[..., 3:4]  # (N, 1)
        box2_x1 = boxes_labels[..., 0:1]
        box2_y1 = boxes_labels[..., 1:2]
        box2_x2 = boxes_labels[..., 2:3]
        box2_y2 = boxes_labels[..., 3:4]

    x1 = torch.max(box1_x1, box2_x1)
    y1 = torch.max(box1_y1, box2_y1)
    x2 = torch.min(box1_x2, box2_x2)
    y2 = torch.min(box1_y2, box2_y2)

    # .clamp(0) is for the case when they do not intersect
    intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)

    box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
    box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))

    return intersection / (box1_area + box2_area - intersection + 1e-6)


def non_max_suppression(bboxes, iou_threshold, threshold, box_format="corners"):
    """
    Does Non Max Suppression given bboxes
    Parameters:
        bboxes (list): list of lists containing all bboxes with each bboxes
        specified as [class_pred, prob_score, x1, y1, x2, y2]
        iou_threshold (float): threshold where predicted bboxes is correct
        threshold (float): threshold to remove predicted bboxes (independent of IoU)
        box_format (str): "midpoint" or "corners" used to specify bboxes
    Returns:
        list: bboxes after performing NMS given a specific IoU threshold
    """

    assert type(bboxes) == list

    bboxes = [box for box in bboxes if box[1] > threshold]
    bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True)
    bboxes_after_nms = []

    while bboxes:
        chosen_box = bboxes.pop(0)

        bboxes = [
            box
            for box in bboxes
            if box[0] != chosen_box[0]
               or intersection_over_union(
                torch.tensor(chosen_box[2:]),
                torch.tensor(box[2:]),
                box_format=box_format,
            )
               < iou_threshold
        ]

        bboxes_after_nms.append(chosen_box)

    return bboxes_after_nms

def mean_average_precision(
        pred_boxes, true_boxes, iou_threshold=0.5, box_format="midpoint", num_classes=1, plot=False
):
    """
    Calculates mean average precision
    Parameters:
        pred_boxes (list): list of lists containing all bboxes with each bboxes
        specified as [train_idx, class_prediction, prob_score, x1, y1, x2, y2]
        true_boxes (list): Similar as pred_boxes except all the correct ones
        iou_threshold (float): threshold where predicted bboxes is correct
        box_format (str): "midpoint" or "corners" used to specify bboxes
        num_classes (int): number of classes
    Returns:
        float: mAP value across all classes given a specific IoU threshold
    """

    # list storing all AP for respective classes
    average_precisions = []

    # used for numerical stability later on
    epsilon = 1e-6

    for c in range(num_classes):
        detections = []
        ground_truths = []

        # Go through all predictions and targets,
        # and only add the ones that belong to the
        # current class c
        for detection in pred_boxes:
            if detection[1] == c:
                detections.append(detection)

        for true_box in true_boxes:
            if true_box[1] == c:
                ground_truths.append(true_box)

        # find the amount of bboxes for each training example
        # Counter here finds how many ground truth bboxes we get
        # for each training example, so let's say img 0 has 3,
        # img 1 has 5 then we will obtain a dictionary with:
        # amount_bboxes = {0:3, 1:5}
        amount_bboxes = Counter([gt[0] for gt in ground_truths])

        # We then go through each key, val in this dictionary
        # and convert to the following (w.r.t same example):
        # ammount_bboxes = {0:torch.tensor[0,0,0], 1:torch.tensor[0,0,0,0,0]}
        for key, val in amount_bboxes.items():
            amount_bboxes[key] = torch.zeros(val)

        # sort by box probabilities which is index 2
        detections.sort(key=lambda x: x[2], reverse=True)
        TP = torch.zeros((len(detections)))
        FP = torch.zeros((len(detections)))
        total_true_bboxes = len(ground_truths)

        # If none exists for this class then we can safely skip
        if total_true_bboxes == 0:
            continue

        for detection_idx, detection in enumerate(detections):
            # Only take out the ground_truths that have the same
            # training idx as detection
            ground_truth_img = [
                bbox for bbox in ground_truths if bbox[0] == detection[0]
            ]

            num_gts = len(ground_truth_img)
            best_iou = 0

            for idx, gt in enumerate(ground_truth_img):
                iou = intersection_over_union(
                    torch.tensor(detection[3:]),
                    torch.tensor(gt[3:]),
                    box_format=box_format,
                )

                if iou > best_iou:
                    best_iou = iou
                    best_gt_idx = idx

                # we have taken out a single bbox for a particular class in a particular image
                # and we have taken out all the ground truth bboxes for that particular image

            if best_iou > iou_threshold:
                # only detect ground truth detection once
                if amount_bboxes[detection[0]][best_gt_idx] == 0:
                    # true positive and add this bounding box to seen
                    TP[detection_idx] = 1
                    amount_bboxes[detection[0]][best_gt_idx] = 1
                else:
                    FP[detection_idx] = 1

            # if IOU is lower then the detection is a false positive
            else:
                FP[detection_idx] = 1

        TP_cumsum = torch.cumsum(TP, dim=0)
        FP_cumsum = torch.cumsum(FP, dim=0)
        recalls = TP_cumsum / (total_true_bboxes + epsilon)
        precisions = torch.div(TP_cumsum, (TP_cumsum + FP_cumsum + epsilon))
        precisions = torch.cat((torch.tensor([1]), precisions))
        recalls = torch.cat((torch.tensor([0]), recalls))
        # torch.trapz for numerical integration
        average_precisions.append(torch.trapz(precisions, recalls))

        mean_avg_prec = sum(average_precisions) / len(average_precisions)

        
        #########################################
        #### Plot fuer Precision Recall Kurve####
        #########################################
        if plot:
            fig, ax = plt.subplots()
            ax.plot(recalls, precisions)
            ax.set(xlabel='Recall', ylabel='Precision')
            ax.grid(which='both')
            print("mAP:{}".format(mean_avg_prec))
            plt.show()

        return mean_avg_prec


def plot_image(image, boxes):
    """Plots predicted bounding boxes on the image"""
    im = np.array(image)
    height, width, _ = im.shape

    # Create figure and axes
    fig, ax = plt.subplots(1)
    # Display the image
    ax.imshow(im)

    # box[0] is x midpoint, box[2] is width
    # box[1] is y midpoint, box[3] is height

    # Create a Rectangle potch
    for box in boxes:
        box = box[2:]
        assert len(box) == 4, "Got more values than in x, y, w, h, in a box!"
        upper_left_x = box[0] - box[2] / 2
        upper_left_y = box[1] - box[3] / 2
        rect = patches.Rectangle(
            (upper_left_x * width, upper_left_y * height),
            box[2] * width,
            box[3] * height,
            linewidth=1,
            edgecolor="r",
            facecolor="none",
        )
        # Add the patch to the Axes
        ax.add_patch(rect)

    plt.show()


def get_bboxes(loader, model, iou_threshold, threshold, pred_format="cells", box_format="midpoint", device="cuda", S=(7, 7)):
    all_pred_boxes = []
    all_true_boxes = []
    S1, S2 = S

    # make sure model is in eval before get bboxes
    model.eval()
    train_idx = 0

    for batch_idx, (x, labels) in enumerate(loader):
        x = x.to(device)
        labels = labels.to(device)

        with torch.no_grad():
            predictions = model(x)

        batch_size = x.shape[0]
        true_bboxes = cellboxes_to_boxes(labels, S=S)
        bboxes = cellboxes_to_boxes(predictions, S=S)

        for idx in range(batch_size):
            nms_boxes = non_max_suppression(
                bboxes[idx],
                iou_threshold=iou_threshold,
                threshold=threshold,
                box_format=box_format,
            )

            # if batch_idx == 0 and idx == 0:
            #    plot_image(x[idx].permute(1,2,0).to("cpu"), nms_boxes)
            #    print(nms_boxes)

            for nms_box in nms_boxes:
                all_pred_boxes.append([train_idx] + nms_box)

            for box in true_bboxes[idx]:
                # many will get converted to 0 pred
                if box[1] > threshold:
                    all_true_boxes.append([train_idx] + box)

            train_idx += 1

    model.train()
    return all_pred_boxes, all_true_boxes


def convert_cellboxes(predictions, S=(7, 7)):
    """
    Converts bounding boxes output from Yolo with
    an image split size of S into entire image ratios
    rather than relative to cell ratios. Tried to do this
    vectorized, but this resulted in quite difficult to read
    code... Use as a black box? Or implement a more intuitive,
    using 2 for loops iterating range(S) and convert them one
    by one, resulting in a slower but more readable implementation.
    """
    S1, S2 = S
    predictions = predictions.to("cpu")
    batch_size = predictions.shape[0]
    predictions = predictions.reshape(batch_size, S1, S2, 11)
    bboxes1 = predictions[..., 2:6]
    bboxes2 = predictions[..., 7:11]
    scores = torch.cat(
        (predictions[..., 1].unsqueeze(0), predictions[..., 6].unsqueeze(0)), dim=0
    )
    best_box = scores.argmax(0).unsqueeze(-1)
    best_boxes = bboxes1 * (1 - best_box) + best_box * bboxes2
    cell_indices_x = torch.arange(S2).repeat(batch_size, S1, 1).unsqueeze(-1)
    cell_indices_y = torch.arange(S1).repeat(batch_size, S2, 1).unsqueeze(-1).permute(0, 2, 1, 3)
    x = 1 / S2 * (best_boxes[..., :1] + cell_indices_x)
    y = 1 / S1 * (best_boxes[..., 1:2] + cell_indices_y)
    w = 1 / S2 * best_boxes[..., 2:3]
    h = 1 / S1 * best_boxes[..., 3:4]
    converted_bboxes = torch.cat((x, y, w, h), dim=-1)
    predicted_class = predictions[..., :1].argmax(-1).unsqueeze(-1)
    best_confidence = torch.max(predictions[..., 1], predictions[..., 6]).unsqueeze(
        -1
    )
    converted_preds = torch.cat(
        (predicted_class, best_confidence, converted_bboxes), dim=-1
    )

    return converted_preds


def cellboxes_to_boxes(out, S=(7, 7)):
    S1, S2 = S
    converted_pred = convert_cellboxes(out, S=S).reshape(out.shape[0], S1 * S2, -1)
    converted_pred[..., 0] = converted_pred[..., 0].long()
    all_bboxes = []

    for ex_idx in range(out.shape[0]):
        bboxes = []

        for bbox_idx in range(S1 * S2):
            bboxes.append([x.item() for x in converted_pred[ex_idx, bbox_idx, :]])
        all_bboxes.append(bboxes)

    return all_bboxes


def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
    print("=> Saving checkpoint")
    torch.save(state, filename)


def load_checkpoint(checkpoint, model, optimizer):
    print("=> Loading checkpoint")
    model.load_state_dict(checkpoint["state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer"])
    
    
def csv_writer(filename, input_size, hidden_size_FNN, learning_rate, train_loss, val_loss, AP, epoch_counter):
    line = [input_size, hidden_size_FNN, learning_rate, train_loss, val_loss, AP, epoch_counter]
    with open(filename, mode='a') as file:
        writer = csv.writer(file)
        writer.writerow(line)

##### Weitere Hilfsdateien

In [None]:
def csv_writer(filename, input_size, hidden_size_FNN, learning_rate, train_loss, val_loss, AP, epoch_counter):
    line = [input_size, hidden_size_FNN, learning_rate, train_loss, val_loss, AP, epoch_counter]
    with open(filename, mode='a') as file:
        writer = csv.writer(file)
        writer.writerow(line)
        
def txt_writer(filename, input_size, hidden_size_FNN, learning_rate, train_loss_list, val_loss_list):
    with open(filename, mode='a') as file:
        file.write('\n'+ "input_size: " + str(input_size) + " hidden_size: " + str(hidden_size_FNN) + " learning_rate: "
                      + str(learning_rate) + '\n')
        for i in range(len(train_loss_list)):
            file.write(str(i) + "," + str(train_loss_list[i]) + "," + str(val_loss_list[i]) + '\n')
        
def architecture_fn(params):
    """
    Information about architecture config:
    Tuple is structured by (kernel_size, filters, stride, padding) 
    "M" is simply maxpooling with stride 2x2 and kernel 2x2
    List is structured by tuples and lastly int with number of repeats
    """
    architecture = [(7, 64, 2, 3), "M", (3, 192, 1, 1)]
    if params["input_size"] != 48:
        architecture.append("M")
    architecture.append((3, 512, 1, 1))
    if params["input_size"] == 192:
        architecture.append("M")
    architecture.append([(1, 256, 1, 0), (3, 512, 1, 1), 3])
    architecture.append((1, 256, 1, 0))
            
    return architecture

##### Engine - Codestrukturierung aus dem aus dem Youtube Video von Abhishek Thakur [End-to-End: Automated Hyperparameter Tuning For Deep Neural Networks](https://www.youtube.com/watch?v=4MK_OJJ82YI&ab_channel=AbhishekThakur)

In [None]:
class Engine:
    def __init__(self, model, optimizer, loss_fn, device):
        self.model = model
        self.optimizer = optimizer
        self.loss_fn = loss_fn
        self.device = device

    def train(self, data_loader):
        self.model.train()
        mean_loss = []
        for x, y in data_loader:
            x, y = x.to(self.device), y.to(self.device)
            self.optimizer.zero_grad()
            score = self.model(x)
            loss = self.loss_fn(score, y)
            mean_loss.append(loss.item())
            loss.backward()
            self.optimizer.step()

        return sum(mean_loss) / len(mean_loss)

    def evaluation(self, data_loader):
        self.model.eval()
        mean_loss = []
        for x, y in data_loader:
            x, y = x.to(self.device), y.to(self.device)
            score = self.model(x)
            loss = self.loss_fn(score, y)
            mean_loss.append(loss.item())

        return sum(mean_loss) / len(mean_loss)

## Hyperparameter anpassen
 <font color='red'>Ab diesen Abschnitt werden einzelne Codes teilweise wiederholt im Jupyter Notebook wiedergegeben.</font> Für die entsprechenden Hyperparametervariationen wurden folgende Funktionen angepasst: 
- **objective()**
- **run_training()**
- **main()** *Code zum Ausführen von run_training()*

**Die jeweiligen Überschriften in den Markdown Zellen orientieren sich and den Kapitel Überschriften in den Bericht**
Der Code wurde teilweise aus dem aus dem Youtube Video von Abhishek Thakur [End-to-End: Automated Hyperparameter Tuning For Deep Neural Networks](https://www.youtube.com/watch?v=4MK_OJJ82YI&ab_channel=AbhishekThakur) entnommen und entsprechend angepasst

##### Architekur (siehe Überschrift Bericht)
First Gridsearch: learning_rate, input_size, hidden_size

In [None]:
"""
Objective function for Gridsearch
"""


def objective(trial):
    params = {
        "input_size": trial.suggest_categorical("input_size", [96, 192]),
        "hidden_size_FFN": trial.suggest_categorical("hidden_size_FFN", [256, 496]),
        "learning_rate": trial.suggest_categorical("learning_rate", [0.00001, 0.0001, 0.001, 0.01])
    }

    return run_training(params, save_model=False)

In [None]:
"""
Function wrapper to run the Hyperparameteroptimization
"""


def run_training(params, save_model=False, plot=False):
    
    target_size_dic = {48: (48, 192), 96: (96, 384), 192: (192, 768)}
    trans_params_train={
        "target_size": target_size_dic[params["input_size"]],
        "pad2square": False,
        "p_hflip" : 0.0,
        "p_vflip" : 0.0,
    }
    
    train_dataset = BeeDataset("HelperFiles/PathImages_train.txt", S=SPLIT_SIZE,
                               transform=transformer,trans_params=trans_params_train)
    test_dataset = BeeDataset("HelperFiles/PathImages_test.txt", S=SPLIT_SIZE,
                              transform=transformer, trans_params=trans_params_train)

    train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS,
                              pin_memory=PIN_MEMORY, shuffle=True, drop_last=True)
    test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS,
                             pin_memory=PIN_MEMORY, shuffle=True, drop_last=True)
    
    architecture = architecture_fn(params=params)    
    
    model = Yolov1(hidden_size_FFN=params["hidden_size_FFN"], architecture_config=architecture, 
                   split_size=SPLIT_SIZE, num_boxes=2, num_classes=1)
    model.to(DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=params["learning_rate"], weight_decay=WEIGHT_DECAY)
    loss_fn = YoloLoss(S=SPLIT_SIZE)

    eng = Engine(model, optimizer, loss_fn, device=DEVICE)

    best_loss = np.inf
    best_train_loss = np.inf
    early_stopping_iter = 50
    early_stopping_counter = 0

    train_loss_list = list()
    val_loss_list = list()
    for epoch in range(EPOCHS):
        train_loss = eng.train(data_loader=train_loader)
        val_loss = eng.evaluation(data_loader=test_loader)
        train_loss_list.append(train_loss)
        val_loss_list.append(val_loss)
        print(f"Epoch: {epoch}, Train Loss: {train_loss}, Validation Loss: {val_loss}")
        if train_loss < best_train_loss:
            best_train_loss = train_loss
        if val_loss < best_loss:
            best_loss = val_loss
            early_stopping_counter = 0
        else:
            early_stopping_counter += 1
        if early_stopping_counter > early_stopping_iter:
            break
            
    if save_model:
        checkpoint = {
            "state_dict": model.state_dict(),
            "optimizer": optimizer.state_dict(),
        }
        save_checkpoint(checkpoint, filename=LOAD_MODEL_FILE)               

    if plot:
        for x, _ in train_loader:
            x = x.to(DEVICE)
            for idx in range(5):
                bboxes = cellboxes_to_boxes(model(x), S=SPLIT_SIZE)
                bboxes = non_max_suppression(bboxes[idx], iou_threshold=0.5, threshold=0.4, box_format="midpoint")
                plot_image(x[idx].permute(1,2,0).to("cpu"), bboxes)
            break

        pred_boxes, target_boxes = get_bboxes(train_loader, model, iou_threshold=0.5, threshold=0.4, S=SPLIT_SIZE)
        mean_avg_prec = mean_average_precision(pred_boxes, target_boxes, iou_threshold=0.5, box_format="midpoint")
        print("Train mAP:{}".format(mean_avg_prec))
    else:
        #pred_boxes, target_boxes = get_bboxes(test_loader, model, iou_threshold=0.5, threshold=0.4, S=SPLIT_SIZE)
        #mean_avg_prec = mean_average_precision(pred_boxes, target_boxes, iou_threshold=0.5, box_format="midpoint")      
        #csv_writer(filename="tmp_export.csv", input_size=params["input_size"], hidden_size_FNN=params["hidden_size_FFN"], 
                   #learning_rate=params["learning_rate"], train_loss=best_train_loss, val_loss=best_loss,
                   #AP=mean_avg_prec.item(), epoch_counter=epoch)
        txt_writer(filename="tmp_export.txt", input_size=params["input_size"], hidden_size_FNN=params["hidden_size_FFN"], 
                   learning_rate=params["learning_rate"], train_loss_list=train_loss_list, val_loss_list=val_loss_list)
    
    return best_loss

In [None]:
tic = time.perf_counter()
def main():
    # Grid Search
    search_space = {
        "input_size": [96, 192],
        "hidden_size_FFN": [256, 496],
        "learning_rate": [0.00001, 0.0001, 0.001, 0.01]
        }
    study = optuna.create_study(sampler=optuna.samplers.GridSampler(search_space), direction="minimize")
    study.optimize(objective, n_trials=2*2*4)
    df = study.trials_dataframe().drop(['state', 'datetime_start', 'datetime_complete'], axis=1)
    df.to_csv(r'export_dataframe.csv', index=False, header=True)

    print("best trial:")
    trial_ = study.best_trial
    print(trial_.value)
    print(trial_.params)

    score = run_training(trial_.params, save_model=True, plot=True)
    print(score)

    toc = time.perf_counter()
    print(f"Runtime: {toc - tic:0.4f} seconds")

#####  Data Augmentation (siehe Überschrift Bericht)
Perform Evaluation to asses the influence of Dataaugmentation: no_aug, h_flip, v_flip, hv_flip, gray, hv_flip_gray

In [None]:
"""
Objective function for Gridsearch
"""


def objective(trial):
    params = {
        "input_size": trial.suggest_categorical("input_size", [192]),
        "hidden_size_FFN": trial.suggest_categorical("hidden_size_FFN", [496]),
        "learning_rate": trial.suggest_categorical("learning_rate", [0.0001]),
        "data_augmentation": trial.suggest_categorical("data_augmentation", ["no_aug", "gray", "h_flip", "v_flip",
                                                                             "hv_flip", "hv_flip_gray"])
    }

    return run_training(params, save_model=True, plot=False)

In [None]:
def run_training(params, save_model=False, plot=False):
    
    target_size_dic = {48: (48, 192), 96: (96, 384), 192: (192, 768)}
    aug_hflip, aug_vflip, aug_grayscale = get_augData(params=params)
    
    trans_params_train={
        "target_size": target_size_dic[params["input_size"]],
        "pad2square": False,
        "p_hflip" : aug_hflip,
        "p_vflip" : aug_vflip,
        "grayscale": False,
    }
    
    trans_params_test={
        "target_size": target_size_dic[params["input_size"]],
        "pad2square": False,
        "p_hflip" : 0.0,
        "p_vflip" : 0.0,
        "grayscale" : False,
    }
    
    train_dataset = BeeDataset("HelperFiles/PathImages_train.txt", S=SPLIT_SIZE,
                               transform=transformer,trans_params=trans_params_train)
    test_dataset = BeeDataset("HelperFiles/PathImages_test.txt", S=SPLIT_SIZE,
                              transform=transformer, trans_params=trans_params_test)

    train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS,
                              pin_memory=PIN_MEMORY, shuffle=True, drop_last=True)
    test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS,
                             pin_memory=PIN_MEMORY, shuffle=True, drop_last=True)
    
    architecture = architecture_fn(params=params)    
    
    model = Yolov1(hidden_size_FFN=params["hidden_size_FFN"], architecture_config=architecture, 
                   split_size=SPLIT_SIZE, num_boxes=2, num_classes=1)
    model.to(DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=params["learning_rate"], weight_decay=WEIGHT_DECAY)
    loss_fn = YoloLoss(S=SPLIT_SIZE)

    eng = Engine(model, optimizer, loss_fn, device=DEVICE)

    best_loss = np.inf
    best_train_loss = np.inf
    early_stopping_iter = 100
    early_stopping_counter = 0

    train_loss_list = list()
    val_loss_list = list()
    for epoch in range(EPOCHS):
        train_loss = eng.train(data_loader=train_loader)
        val_loss = eng.evaluation(data_loader=test_loader)
        train_loss_list.append(train_loss)
        val_loss_list.append(val_loss)
        print(f"Epoch: {epoch}, Train Loss: {train_loss}, Validation Loss: {val_loss}")
        if train_loss < best_train_loss:
            best_train_loss = train_loss
        if val_loss < best_loss:
            best_loss = val_loss
            early_stopping_counter = 0
        else:
            early_stopping_counter += 1
        if early_stopping_counter > early_stopping_iter:
            break
            
    if save_model:
        checkpoint = {
            "state_dict": model.state_dict(),
            "optimizer": optimizer.state_dict(),
        }
        Filename = str(params["data_augmentation"]) + ".pth.tar"
        save_checkpoint(checkpoint, filename=Filename)             

    if plot:
        for x, _ in test_loader:
            x = x.to(DEVICE)
            for idx in range(5):
                bboxes = cellboxes_to_boxes(model(x), S=SPLIT_SIZE)
                bboxes = non_max_suppression(bboxes[idx], iou_threshold=0.5, threshold=0.4, box_format="midpoint")
                plot_image(x[idx].permute(1,2,0).to("cpu"), bboxes)
            break

        pred_boxes, target_boxes = get_bboxes(test_loader, model, iou_threshold=0.5, threshold=0.4, S=SPLIT_SIZE)
        mean_avg_prec = mean_average_precision(pred_boxes, target_boxes, iou_threshold=0.5, box_format="midpoint")
        print("AP:{}".format(mean_avg_prec))
        txt_writer(filename="tmp_export.txt", input_size=params["input_size"], hidden_size_FNN=params["hidden_size_FFN"], 
                   learning_rate=params["learning_rate"], train_loss_list=train_loss_list, val_loss_list=val_loss_list)
    else:
        pred_boxes, target_boxes = get_bboxes(test_loader, model, iou_threshold=0.5, threshold=0.4, S=SPLIT_SIZE)
        mean_avg_prec = mean_average_precision(pred_boxes, target_boxes, iou_threshold=0.5, box_format="midpoint")      
        csv_writer(filename="tmp_export.csv", input_size=params["input_size"], hidden_size_FNN=params["hidden_size_FFN"], 
                   learning_rate=params["learning_rate"], data_aug=params["data_augmentation"], train_loss=best_train_loss, val_loss=best_loss,
                   AP=mean_avg_prec.item(), epoch_counter=epoch)
        txt_writer(filename="tmp_export.txt", input_size=params["input_size"], hidden_size_FNN=params["hidden_size_FFN"], 
                   learning_rate=params["learning_rate"], data_aug=params["data_augmentation"], train_loss_list=train_loss_list, val_loss_list=val_loss_list)
    
    return best_loss

In [None]:
def main():
    tic = time.perf_counter()
 
    # Grid Search
    search_space = {
        "input_size": [192],
        "hidden_size_FFN": [496],
        "learning_rate": [0.0001],
        "data_augmentation": ["no_aug", "gray", "h_flip", "v_flip", "hv_flip", "hv_flip_gray"]
    }
    study = optuna.create_study(sampler=optuna.samplers.GridSampler(search_space), direction="minimize")
    study.optimize(objective, n_trials=6)
    df = study.trials_dataframe().drop(['state', 'datetime_start', 'datetime_complete'], axis=1)
    df.to_csv(r'export_dataframe.csv', index=False, header=True)

    print("best trial:")
    trial_ = study.best_trial
    print(trial_.value)
    print(trial_.params)

    #score = run_training(trial_.params, save_model=True, plot=True)
    #print(score)

    toc = time.perf_counter()
    print(f"Runtime: {toc - tic:0.4f} seconds")

##### Dropout und Weight Decay (siehe Überschrift Bericht)

In [None]:
def objective(trial):
    params = {
        "input_size": trial.suggest_categorical("input_size", [192]),
        "hidden_size_FFN": trial.suggest_categorical("hidden_size_FFN", [496]),
        "learning_rate": trial.suggest_categorical("learning_rate", [0.0001]),
        "data_augmentation": trial.suggest_categorical("data_augmentation", ["hv_flip"]),
        "weight_decay" : trial.suggest_categorical("weight_decay", [0.0, 0.001, 0.01, 0.1]),
        "dropout_rate" : trial.suggest_categorical("dropout_rate", [0.0, 0.2, 0.5]),
    }

    return run_training(params, save_model=True, plot=False)

In [None]:
"""
Function wrapper to run the Hyperparameteroptimization
"""

def run_training(params, save_model=False, plot=False):
    
    target_size_dic = {48: (48, 192), 96: (96, 384), 192: (192, 768)}
    aug_hflip, aug_vflip, aug_grayscale = get_augData(params=params)
    
    trans_params_train={
        "target_size": target_size_dic[params["input_size"]],
        "pad2square": False,
        "p_hflip" : aug_hflip,
        "p_vflip" : aug_vflip,
        "grayscale": False,
    }
    
    trans_params_test={
        "target_size": target_size_dic[params["input_size"]],
        "pad2square": False,
        "p_hflip" : 0.0,
        "p_vflip" : 0.0,
        "grayscale" : False,
    }
    
    train_dataset = BeeDataset("HelperFiles/PathImages_train.txt", S=SPLIT_SIZE,
                               transform=transformer,trans_params=trans_params_train)
    test_dataset = BeeDataset("HelperFiles/PathImages_test.txt", S=SPLIT_SIZE,
                              transform=transformer, trans_params=trans_params_test)

    train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS,
                              pin_memory=PIN_MEMORY, shuffle=True, drop_last=True)
    test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS,
                             pin_memory=PIN_MEMORY, shuffle=True, drop_last=True)
    
    architecture = architecture_fn(params=params)    
    
    model = Yolov1(hidden_size_FFN=params["hidden_size_FFN"], architecture_config=architecture, 
                   dropout_rate=params["dropout_rate"], split_size=SPLIT_SIZE, num_boxes=2, num_classes=1)
    model.to(DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=params["learning_rate"], weight_decay=params["weight_decay"])
    loss_fn = YoloLoss(S=SPLIT_SIZE)

    eng = Engine(model, optimizer, loss_fn, device=DEVICE)

    best_loss = np.inf
    best_train_loss = np.inf
    early_stopping_iter = 50
    early_stopping_counter = 0

    train_loss_list = list()
    val_loss_list = list()
    for epoch in range(EPOCHS):
        train_loss = eng.train(data_loader=train_loader)
        val_loss = eng.evaluation(data_loader=test_loader)
        train_loss_list.append(train_loss)
        val_loss_list.append(val_loss)
        print(f"Epoch: {epoch}, Train Loss: {train_loss}, Validation Loss: {val_loss}")
        if val_loss < best_loss:
            best_loss = val_loss
            best_train_loss = train_loss
            early_stopping_counter = 0
            if save_model == True and epoch > 40:
                checkpoint = {
                    "state_dict": model.state_dict(),
                    "optimizer": optimizer.state_dict(),
                }
                Filename = str(params["weight_decay"]) + "_" + str(params["dropout_rate"]) + ".pth.tar"
                save_checkpoint(checkpoint, filename=Filename)
        else:
            early_stopping_counter += 1
        if early_stopping_counter > early_stopping_iter:
            break
                       
    if plot:
        for x, _ in test_loader:
            x = x.to(DEVICE)
            for idx in range(5):
                bboxes = cellboxes_to_boxes(model(x), S=SPLIT_SIZE)
                bboxes = non_max_suppression(bboxes[idx], iou_threshold=0.5, threshold=0.4, box_format="midpoint")
                plot_image(x[idx].permute(1,2,0).to("cpu"), bboxes)
            break

        pred_boxes, target_boxes = get_bboxes(test_loader, model, iou_threshold=0.5, threshold=0.4, S=SPLIT_SIZE)
        mean_avg_prec = mean_average_precision(pred_boxes, target_boxes, iou_threshold=0.5, box_format="midpoint")
        print("AP:{}".format(mean_avg_prec))
        txt_writer(filename="tmp_export.txt", input_size=params["input_size"], hidden_size_FNN=params["hidden_size_FFN"], 
                   learning_rate=params["learning_rate"], data_aug=params["data_augmentation"], dropout_rate=params["dropout_rate"], weight_decay=params["weight_decay"], train_loss_list=train_loss_list, val_loss_list=val_loss_list)
    else:
        load_checkpoint(torch.load(Filename), model, optimizer)
        pred_boxes, target_boxes = get_bboxes(test_loader, model, iou_threshold=0.5, threshold=0.4, S=SPLIT_SIZE)
        mean_avg_prec = mean_average_precision(pred_boxes, target_boxes, iou_threshold=0.5, box_format="midpoint")      
        csv_writer(filename="tmp_export.csv", input_size=params["input_size"], hidden_size_FNN=params["hidden_size_FFN"], 
                   learning_rate=params["learning_rate"], data_aug=params["data_augmentation"], dropout_rate=params["dropout_rate"], weight_decay=params["weight_decay"], train_loss=best_train_loss, val_loss=best_loss,
                   AP=mean_avg_prec.item(), epoch_counter=epoch)
        txt_writer(filename="tmp_export.txt", input_size=params["input_size"], hidden_size_FNN=params["hidden_size_FFN"], 
                   learning_rate=params["learning_rate"], data_aug=params["data_augmentation"], dropout_rate=params["dropout_rate"], weight_decay=params["weight_decay"], train_loss_list=train_loss_list, val_loss_list=val_loss_list)
    
    return best_loss

In [None]:
def main():
    tic = time.perf_counter()
 
    # Grid Search
    search_space = {
        "input_size": [192],
        "hidden_size_FFN": [496],
        "learning_rate": [0.0001],
        "data_augmentation": ["hv_flip"],
        "weight_decay" : [0.0, 0.001, 0.01, 0.1],
        "dropout_rate" : [0.0, 0.2, 0.5],
    }
    study = optuna.create_study(sampler=optuna.samplers.GridSampler(search_space), direction="minimize")
    study.optimize(objective, n_trials=1)
    df = study.trials_dataframe().drop(['state', 'datetime_start', 'datetime_complete'], axis=1)
    df.to_csv(r'export_dataframe.csv', index=False, header=True)

    print("best trial:")
    trial_ = study.best_trial
    print(trial_.value)
    print(trial_.params)

    #score = run_training(trial_.params, save_model=True, plot=True)
    #print(score)

    toc = time.perf_counter()
    print(f"Runtime: {toc - tic:0.4f} seconds")

##### Bayesian optimization for weight decay parameter
siehe Kapitel Dropout und Weight Decay im Bericht

In [None]:
def objective(trial):
    params = {
        "input_size": trial.suggest_categorical("input_size", [192]),
        "hidden_size_FFN": trial.suggest_categorical("hidden_size_FFN", [496]),
        "learning_rate": trial.suggest_categorical("learning_rate", [0.0001]),
        "data_augmentation": trial.suggest_categorical("data_augmentation", ["hv_flip"]),
        "dropout_rate" : trial.suggest_categorical("dropout_rate", [0.0]),
        "weight_decay" : trial.suggest_float("weight_decay", 0.1, 10),
    }

    return run_training(params, save_model=False, plot=False, early_stopping=True)

In [None]:
"""
Function wrapper to run the Hyperparameteroptimization
"""


def run_training(params, save_model=False, plot=False, early_stopping=True):
    
    target_size_dic = {48: (48, 192), 96: (96, 384), 192: (192, 768)}
    aug_hflip, aug_vflip, aug_grayscale = get_augData(params=params)
    
    trans_params_train={
        "target_size": target_size_dic[params["input_size"]],
        "pad2square": False,
        "p_hflip" : aug_hflip,
        "p_vflip" : aug_vflip,
        "grayscale": False,
    }
    
    trans_params_test={
        "target_size": target_size_dic[params["input_size"]],
        "pad2square": False,
        "p_hflip" : 0.0,
        "p_vflip" : 0.0,
        "grayscale" : False,
    }
    
    train_dataset = BeeDataset("HelperFiles/PathImages_train.txt", S=SPLIT_SIZE,
                               transform=transformer,trans_params=trans_params_train)
    test_dataset = BeeDataset("HelperFiles/PathImages_test.txt", S=SPLIT_SIZE,
                              transform=transformer, trans_params=trans_params_test)

    train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS,
                              pin_memory=PIN_MEMORY, shuffle=True, drop_last=True)
    test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS,
                             pin_memory=PIN_MEMORY, shuffle=True, drop_last=True)
    
    architecture = architecture_fn(params=params)    
    
    model = Yolov1(hidden_size_FFN=params["hidden_size_FFN"], architecture_config=architecture, 
                   dropout_rate=params["dropout_rate"], split_size=SPLIT_SIZE, num_boxes=2, num_classes=1)
    model.to(DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=params["learning_rate"], weight_decay=params["weight_decay"])
    loss_fn = YoloLoss(S=SPLIT_SIZE)

    eng = Engine(model, optimizer, loss_fn, device=DEVICE)

    best_loss = np.inf
    best_train_loss = np.inf
    early_stopping_iter = 50
    early_stopping_counter = 0

    train_loss_list = list()
    val_loss_list = list()
    for epoch in range(EPOCHS):
        train_loss = eng.train(data_loader=train_loader)
        val_loss = eng.evaluation(data_loader=test_loader)
        train_loss_list.append(train_loss)
        val_loss_list.append(val_loss)
        print(f"Epoch: {epoch}, Train Loss: {train_loss}, Validation Loss: {val_loss}")
        if early_stopping == True:
            if val_loss < best_loss:
                best_loss = val_loss
                best_train_loss = train_loss
                early_stopping_counter = 0
                if save_model == True and epoch > 40:
                    checkpoint = {
                        "state_dict": model.state_dict(),
                        "optimizer": optimizer.state_dict(),
                    }
                    Filename = "weightdecay_" + str(params["weight_decay"]) + ".pth.tar"
                    save_checkpoint(checkpoint, filename=Filename)
            else:
                early_stopping_counter += 1
            if early_stopping_counter > early_stopping_iter:
                break
        else:
            if val_loss < best_loss:
                best_loss = val_loss
                best_train_loss = train_loss
                if save_model == True and epoch > 40:
                    checkpoint = {
                        "state_dict": model.state_dict(),
                        "optimizer": optimizer.state_dict(),
                    }
                    Filename = "weightdecay_" + str(params["weight_decay"]) + ".pth.tar"
                    save_checkpoint(checkpoint, filename=Filename)
                       
    if plot:
        for x, _ in test_loader:
            x = x.to(DEVICE)
            for idx in range(5):
                bboxes = cellboxes_to_boxes(model(x), S=SPLIT_SIZE)
                bboxes = non_max_suppression(bboxes[idx], iou_threshold=0.5, threshold=0.4, box_format="midpoint")
                plot_image(x[idx].permute(1,2,0).to("cpu"), bboxes)
            break
        pred_boxes, target_boxes = get_bboxes(test_loader, model, iou_threshold=0.5, threshold=0.4, S=SPLIT_SIZE)
        mean_avg_prec = mean_average_precision(pred_boxes, target_boxes, iou_threshold=0.5, box_format="midpoint")
        print("AP:{}".format(mean_avg_prec))
        txt_writer(filename="tmp_export.txt", input_size=params["input_size"], hidden_size_FNN=params["hidden_size_FFN"], 
                   learning_rate=params["learning_rate"], data_aug=params["data_augmentation"], dropout_rate=params["dropout_rate"], weight_decay=params["weight_decay"], train_loss_list=train_loss_list, val_loss_list=val_loss_list)
    else:
        if save_model is True:
            load_checkpoint(torch.load(Filename), model, optimizer)
        pred_boxes, target_boxes = get_bboxes(test_loader, model, iou_threshold=0.5, threshold=0.4, S=SPLIT_SIZE)
        mean_avg_prec = mean_average_precision(pred_boxes, target_boxes, iou_threshold=0.5, box_format="midpoint")      
        csv_writer(filename="tmp_export.csv", input_size=params["input_size"], hidden_size_FNN=params["hidden_size_FFN"], 
                   learning_rate=params["learning_rate"], data_aug=params["data_augmentation"], dropout_rate=params["dropout_rate"], weight_decay=params["weight_decay"], train_loss=best_train_loss, val_loss=best_loss,
                   AP=mean_avg_prec.item(), epoch_counter=epoch)
        txt_writer(filename="tmp_export.txt", input_size=params["input_size"], hidden_size_FNN=params["hidden_size_FFN"], 
                   learning_rate=params["learning_rate"], data_aug=params["data_augmentation"], dropout_rate=params["dropout_rate"], weight_decay=params["weight_decay"], train_loss_list=train_loss_list, val_loss_list=val_loss_list)
    
    return best_loss

In [None]:
def main():
    tic = time.perf_counter()
 
    study = optuna.create_study(sampler=optuna.samplers.TPESampler(), direction="minimize")
    study.optimize(objective, n_trials=50)
    df = study.trials_dataframe().drop(['state', 'datetime_start', 'datetime_complete'], axis=1)
    df.to_csv(r'export_dataframe.csv', index=False, header=True)

    print("best trial:")
    trial_ = study.best_trial
    print(trial_.value)
    print(trial_.params)

    score = run_training(trial_.params, save_model=True, plot=False, early_stopping=False)
    print(score)

    toc = time.perf_counter()
    print(f"Runtime: {toc - tic:0.4f} seconds")

##### Frames per second (FPS) messen

In [None]:
def objective(trial):
    params = {
        "input_size": trial.suggest_categorical("input_size", [192]),
        "hidden_size_FFN": trial.suggest_categorical("hidden_size_FFN", [496]),
        "learning_rate": trial.suggest_categorical("learning_rate", [0.0001]),
        "data_augmentation": trial.suggest_categorical("data_augmentation", ["no_aug", "gray", "h_flip", "v_flip",
                                                                             "hv_flip", "hv_flip_gray"]),
        "dropout_rate" : trial.suggest_categorical("dropout_rate", [0.0]),
        "weight_decay" : trial.suggest_categorical("weight_decay", [0.2]),
    }

    return run_training(params, save_model=True, plot=False, early_stopping=True, ap=True)


"""
Function wrapper to run the Hyperparameteroptimization
"""


def run_training(params, save_model=False, plot=False, early_stopping=True, ap=True):
            
    target_size_dic = {48: (48, 192), 96: (96, 384), 192: (192, 768)}
    aug_hflip, aug_vflip, aug_grayscale = get_augData(params=params)
    
    trans_params_train={
        "target_size": target_size_dic[params["input_size"]],
        "pad2square": False,
        "p_hflip" : aug_hflip,
        "p_vflip" : aug_vflip,
        "grayscale": False,
    }
    
    trans_params_test={
        "target_size": target_size_dic[params["input_size"]],
        "pad2square": False,
        "p_hflip" : 0.0,
        "p_vflip" : 0.0,
        "grayscale" : False,
    }
    
    train_dataset = BeeDataset("HelperFiles/PathImages_train.txt", S=SPLIT_SIZE,
                               transform=transformer,trans_params=trans_params_train)
    test_dataset = BeeDataset("HelperFiles/PathImages_test.txt", S=SPLIT_SIZE,
                              transform=transformer, trans_params=trans_params_test)

    train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS,
                              pin_memory=PIN_MEMORY, shuffle=True, drop_last=True)
    test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS,
                             pin_memory=PIN_MEMORY, shuffle=True, drop_last=True)
    
    architecture = architecture_fn(params=params)    
    
    model = Yolov1(hidden_size_FFN=params["hidden_size_FFN"], architecture_config=architecture, 
                   dropout_rate=params["dropout_rate"], split_size=SPLIT_SIZE, num_boxes=2, num_classes=1).to(DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=params["learning_rate"], weight_decay=params["weight_decay"])
    
    if ap == True:
        LOAD_MODEL_FILE = "weightdecay_0_2.pth.tar"
        LOAD_MODEL = True
        load_checkpoint(torch.load(LOAD_MODEL_FILE), model, optimizer)
            
        
        iou_threshold = [0.5]
        #iou_threshold = np.arange(0.05,1,0.05).tolist()
        mean_avg_prec_list = list()
        for i, iou in enumerate(iou_threshold):
            break
            pred_boxes, target_boxes = get_bboxes(test_loader, model, iou_threshold=0.5, threshold=threshold[i], S=SPLIT_SIZE)
            mean_avg_prec = mean_average_precision(pred_boxes, target_boxes, iou_threshold=iou, box_format="midpoint", plot=False, txt=True)
            print("iou: " + str(mean_avg_prec) + ", threshold: ")
        ################################################################################################
        ##### Important Part ###########################################################################
        ################################################################################################
        import time
        batch_size = 16
        counter = 0
        tic = time.perf_counter()
        for x, _ in test_loader:
            x = x.to(DEVICE)
            counter += 1
            for idx in range(len(x)):
                bboxes = cellboxes_to_boxes(model(x), S=SPLIT_SIZE)
                bboxes = non_max_suppression(bboxes[idx], iou_threshold=0.5, threshold=0.4, box_format="midpoint")
                #plot_image(x[idx].permute(1,2,0).to("cpu"), bboxes)
        toc = time.perf_counter()
        print(f"Time {toc - tic:0.4f} seconds")
        print(counter)
        print(f"FPS: {counter*batch_size/(toc - tic)}")
        import sys
        sys.exit()
        ###############################################################################################
    return best_loss


def main():
    tic = time.perf_counter()
 
    # Grid Search
    search_space = {
        "input_size": [192],
        "hidden_size_FFN": [496],
        "learning_rate": [0.0001],
        "data_augmentation": ["hv_flip"],
        "dropout_rate" : [0.0],
        "weight_decay" : [0.2],
    }
    study = optuna.create_study(sampler=optuna.samplers.GridSampler(search_space), direction="minimize")
    study.optimize(objective, n_trials=1)
    df = study.trials_dataframe().drop(['state', 'datetime_start', 'datetime_complete'], axis=1)
    df.to_csv(r'export_dataframe.csv', index=False, header=True)

    print("best trial:")
    trial_ = study.best_trial
    print(trial_.value)
    print(trial_.params)

    #score = run_training(trial_.params, save_model=True, plot=False, early_stopping=False)
    #print(score)

    toc = time.perf_counter()
    print(f"Runtime: {toc - tic:0.4f} seconds")