In [1]:
import os
import json
import torch
import torchvision.transforms as transforms
import torch.nn as nn
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np

from torchvision.models.detection import SSD300_VGG16_Weights
from torchvision.models.vgg import VGG16_Weights
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset
from torchvision.transforms import ToTensor
from torchvision.models.detection import ssd
from PIL import Image
from tqdm import tqdm

import PIL.Image
import torchvision.transforms.functional as F

In [2]:
root_dir = r"C:\Users\Domi\Documents\GitHub\Deep-Vision-sta\Datasets\Face Mask Detection Dataset\Medical mask\Medical mask\Medical Mask"


MEAN = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]
NORMALIZE = False
BATCH_SIZE = 1
NUM_EPOCHS = 10

RESIZE = (300, 300)
ROUND_RESIZED_BBOXES = False
LEARNING_RATE = 0.00001
MOMENTUM = 0.9
WEIGHT_DECAY = 0.0005
NESTEROV = True
TEST_SIZE = 0.2

ALLOWED_LABELS = [3, 4, 5, 6]
class_mapping = {
    "hijab_niqab": 0,
    "mask_colorful": 1,
    "mask_surgical": 2,
    "face_no_mask": 3,
    "face_with_mask_incorrect": 4,
    "face_with_mask": 5,
    "face_other_covering": 6,
    "scarf_bandana": 7,
    "balaclava_ski_mask": 8,
    "face_shield": 9,
    "other": 10,
    "gas_mask": 11,
    "turban": 12,
    "helmet": 13,
    "sunglasses": 14,
    "eyeglasses": 15,
    "hair_net": 16,
    "hat": 17,
    "goggles": 18,
    "hood": 19
}

## DATACLASSES

In [3]:
class MaskDetectionDataset(Dataset):
    def __init__(self, root_dir, target_size=(600, 900)):
        self.root_dir = root_dir
        self.annotations = []
        self.target_size = target_size
        self.load_annotations()

    def load_annotations(self):
        annotation_files = os.listdir(f"{self.root_dir}/annotations")
        for file_name in annotation_files:
            with open(f"{self.root_dir}/annotations/{file_name}", "r") as f:
                annotation_data = json.load(f)
                annotations = annotation_data["Annotations"]
                file_name = annotation_data["FileName"]
                self.annotations.append((annotations, file_name))
                # Check if the boxes are valid
                for annotation in annotations:
                    boxes = annotation["BoundingBox"]
                    if boxes[0] >= boxes[2] or boxes[1] >= boxes[3]:
                        print("Invalid bounding box coordinates in file:", file_name)
                        break

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        annotations = self.annotations[idx][0]
        file_name = self.annotations[idx][1]
        image_path = f"{self.root_dir}/images/{file_name}"
        image = PIL.Image.open(image_path).convert("RGB")
        original_image_width, original_image_height = image.size
        image = F.resize(image, self.target_size)
        image = F.to_tensor(image)
        print(image)
        if NORMALIZE:
            image = F.normalize(image, MEAN, STD)
        print(image)

        boxes = []
        labels = []
        for annotation in annotations:
            box = annotation["BoundingBox"]
            if box[0] < box[2] and box[1] < box[3]:
                # Resize the bounding box coordinates
                box_resized = [
                    box[0] * self.target_size[0] / original_image_width,
                    box[1] * self.target_size[1] / original_image_height,
                    box[2] * self.target_size[0] / original_image_width,
                    box[3] * self.target_size[1] / original_image_height
                ]
                boxes.append(box_resized)
                class_name = annotation["classname"]
                # Get the class label based on the class name
                class_label = self.get_class_label(class_name)
                labels.append(class_label)

        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["area"] = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        target["iscrowd"] = torch.zeros((boxes.shape[0],), dtype=torch.int64)
        target["image_id"] = torch.tensor([idx])

        return image, target

    
    def get_class_label(self, class_name):
        return class_mapping.get(class_name, -1)  # Return -1 if class_name is not found


## Model Setup

In [4]:
import utils

def setup_model(batch_size, lr, momentum, weight_decay, nesterov, test_size, weights_backbone=None, weights=None,):
    # Modell initialisieren
    model = ssd.ssd300_vgg16(weights=weights, weights_backbone=weights_backbone)

    # Daten in Trainings- und Testdaten aufteilen    
    dataset = MaskDetectionDataset(root_dir, RESIZE)
    train_size = int((1-test_size) * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

    # Trainingsdaten vorbereiten und DataLoader erstellen
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=utils.collate_fn)

    # Testdaten vorbereiten und DataLoader erstellen
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True, collate_fn=utils.collate_fn)

    # Optimizer erstellen
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, nesterov=nesterov)    

    return model, train_dataloader, test_dataloader, optimizer

## Draw Images

In [5]:
def visualize_sample(dataloader, x):
    # Rufen Sie das x-te Element aus dem Dataloader ab
    x -=1 
    images, annotations = next(iter(dataloader))
    image = transforms.ToPILImage(images[x])
    boxes = annotations[x]['annotations']
    labels = [box['label'] for box in boxes]
    print(labels)
    # Erstellen Sie eine neue Figur und Achse
    fig, ax = plt.subplots(1)
    # Zeigen Sie das Bild in der Achse an
    ax.imshow(image.permute(1, 2, 0))
    # Iterieren Sie über die Bounding Boxes und zeichnen Sie sie als Rechtecke in der Achse
    for box, label in (boxes, labels):
        x_min, y_min, x_max, y_max = box['bbox']
        width = x_max - x_min
        height = y_max - y_min
        #print(width, height)
        rect = patches.Rectangle((x_min, y_min), width, height, linewidth=2, edgecolor='r', facecolor='none')
        ax.add_patch(rect)
        
        #ax.text(x_min, y_min, f"Label: {class_mapping[label]}", color='r', fontsize=8, bbox=dict(facecolor='white', alpha=0.7, edgecolor='none'))
        ax.text(x_min, y_min, f"{label}", color='r', fontsize=8, bbox=dict(facecolor='white', alpha=0.7, edgecolor='none'))
    # Zeigen Sie die visualisierten Bounding Boxes an
    plt.show()


def draw_image_with_boxes(image, target):    
    # Unnormalize the image
    if NORMALIZE:
        image = transforms.Normalize(mean=[-m / s for m, s in zip(MEAN, STD)], std=[1 / s for s in STD])(image)
    image_pil = transforms.ToPILImage()(image)

    # Kopiere die Bounding-Box-Koordinaten auf die CPU und konvertiere sie in numpy-Arrays
    boxes = target["boxes"]
    labels = target["labels"]
    boxes = boxes.cpu().numpy()
    labels = labels.cpu().numpy()
        
    # Erstelle eine neue Figur und Achse
    fig, ax = plt.subplots(1)    
    # Zeige das Bild in der Achse
    ax.imshow(image_pil)
    print(target)
    
    # Iteriere über die Bounding-Boxen und zeichne sie als Rechtecke in der Achse
    for box, label in zip(boxes, labels):
        for label in ALLOWED_LABELS:
            x_min, y_min, x_max, y_max = box
            width = x_max - x_min
            height = y_max - y_min
            rect = patches.Rectangle((x_min, y_min), width, height, linewidth=2, edgecolor='r', facecolor='none')
            ax.add_patch(rect)
            ax.text(x_min, y_min, f"Label: {class_mapping[label]}", color='r', fontsize=8, bbox=dict(facecolor='white', alpha=0.7, edgecolor='none'))
    # Zeige die Achse
    plt.show()


def visualize_prediction(images, model, confidence_threshold, counter = 10):
    # Set the model to evaluation mode
    model.eval() 
    # Perform inference
    with torch.no_grad():
        predictions = model(images)
        #good = torch.argwhere(scores > confidence_threshold)

    for image, prediction in zip(images, predictions):
        if NORMALIZE:
            # Unnormalize the image
            image = transforms.Normalize(mean=[-m / s for m, s in zip(MEAN, STD)], std=[1 / s for s in STD])(image)
        image_pil = transforms.ToPILImage()(image)

        # Get the predicted bounding boxes, labels, and scores
        boxes = prediction['boxes'].cpu().numpy()
        labels = prediction['labels'].cpu().numpy()
        scores = prediction['scores'].cpu().numpy()

        # Visualize the image and predicted bounding boxes
        fig, ax = plt.subplots(1)
        ax.imshow(image_pil)
        allowed_labels = [3, 4, 5, 6]

        for box, label, score in zip(boxes, labels, scores):
            if label in allowed_labels and score > confidence_threshold and counter%10 == 0:
                x_min, y_min, x_max, y_max = box
                width = x_max - x_min
                height = y_max - y_min
                class_name = list(class_mapping.keys())[list(class_mapping.values()).index(label)]
                rect = patches.Rectangle((x_min, y_min), width, height, linewidth=2, edgecolor='r', facecolor='none')
                ax.add_patch(rect)
                ax.text(x_min, y_min, f"{class_name}: {score}", color='r', fontsize=8, bbox=dict(facecolor='white', alpha=0.7, edgecolor='none'))
                counter=0
        plt.show()
    model.train()

In [6]:
#visualize_sample(train_dataloader, 1)

## Evaluation

In [7]:
%matplotlib inline

def plot_loss(train_losses):
    plt.plot(train_losses)
    plt.xlabel("Iteration")
    plt.ylabel("Loss")
    plt.show()

def plot_metrics(ap_values, ar_values):
    # Convert the arrays to numpy arrays for easier plotting
    ap_values = np.array(ap_values)
    ar_values = np.array(ar_values)

    iou_thresholds_available = ["0.50:0.95", "0.50", "0.75", "0.50:0.95_small", "0.50:0.95_medium", "0.50:0.95_large"]

    # Plot the average precisions over epochs
    plt.figure(figsize=(10, 5))
    for i, iou_thresh in enumerate(iou_thresholds_available):
        plt.plot(ap_values[:, i], label=f"IoU={iou_thresh}")
    plt.xlabel("Epochs")
    plt.ylabel("Average Precision")
    plt.title("Average Precision vs. Epochs")
    plt.legend()
    plt.show()

    # Plot the average recalls over epochs
    plt.figure(figsize=(10, 5))
    for i, iou_thresh in enumerate(iou_thresholds_available):
        plt.plot(ar_values[:, i], label=f"IoU={iou_thresh}")
    plt.xlabel("Epochs")
    plt.ylabel("Average Recall")
    plt.title("Average Recall vs. Epochs")
    plt.legend()
    plt.show()

def visualize_prediction(images, model, confidence_threshold, device, allowed_labels = [3, 4, 5, 6]):
    # Set the model to evaluation mode
    model.eval()

    # Preprocess the images
    ims = list(image.to(device) for image in images)
    # Perform inference
    with torch.no_grad():
        predictions = model(ims)
        #good = torch.argwhere(scores > confidence_threshold)
        #print(predictions)

    for image, prediction in zip(images, predictions):
        if NORMALIZE:
            # Unnormalize the image
            image = F.normalize(mean=[-m / s for m, s in zip(MEAN, STD)], std=[1 / s for s in STD])(image)
        # Convert the image tensor to a PIL Image
        image_pil = transforms.ToPILImage()(image)

        # Get the predicted bounding boxes, labels, and scores
        boxes = prediction['boxes'].cpu().numpy()
        labels = prediction['labels'].cpu().numpy()
        scores = prediction['scores'].cpu().numpy()

        # Visualize the image and predicted bounding boxes
        fig, ax = plt.subplots(1)
        ax.imshow(image_pil)

        for box, label, score in zip(boxes, labels, scores):
            if label in allowed_labels and score > confidence_threshold:
                x_min, y_min, x_max, y_max = box
                width = x_max - x_min
                height = y_max - y_min
                class_name = list(class_mapping.keys())[list(class_mapping.values()).index(label)]
                rect = patches.Rectangle((x_min, y_min), width, height, linewidth=2, edgecolor='r', facecolor='none')
                ax.add_patch(rect)
                ax.text(x_min, y_min, f"{class_name}", color='r', fontsize=8, bbox=dict(facecolor='white', alpha=0.7, edgecolor='none'))

        plt.show()
    model.train()

## Training

In [8]:
from engine import train_one_epoch, evaluate

def start_training(model, train_dataloader, test_dataloader, optimizer, device, num_epochs=2):
    # # Trainingsschleife
    model.to(device)

    # Define empty arrays to collect metrics
    ap_values = []
    ar_values = []
    losses = []

    for epoch in range(num_epochs):
        # training for one epoch
        train_one_epoch(model, optimizer, train_dataloader, device, epoch, print_freq=1, losses_out=losses)

        # update the learning rate
        # lr_scheduler.step()
        
        # evaluate on the test dataset        
        evaluator = evaluate(model, test_dataloader, device=device)

        # Extract the metrics from the evaluator
        iou_thresholds = evaluator.coco_eval['bbox'].params.iouThrs
        average_precisions = evaluator.coco_eval['bbox'].stats[:6]
        average_recalls = evaluator.coco_eval['bbox'].stats[6:]

        # Append the metrics to the arrays
        ap_values.append(average_precisions)
        ar_values.append(average_recalls)
    
    return ap_values, ar_values, losses

In [9]:
import itertools

# Definiere verschiedene Werte für die Hyperparameter
normalize_options = [True, False]
batch_size_options = [1, 2, 4, 8, 16]
momentum_options = [0.85, 0.9, 0.95]
nesterov_options = [True, False]

# Erzeuge alle möglichen Kombinationen der Hyperparameter
hyperparameter_combinations = list(itertools.product(normalize_options, batch_size_options, momentum_options, nesterov_options))

best_accuracy = 0.0
best_hyperparameters = None

# Durchlaufe alle Kombinationen und trainiere das Modell mit jeder Kombination
for normalize, batch_size, momentum, nesterov_options in hyperparameter_combinations:
    # Setze die Hyperparameter auf die aktuellen Werte
    NORMALIZE = normalize
    BATCH_SIZE = batch_size
    MOMENTUM = momentum
    NESTEROV = nesterov_options
    print(f"Normalize: {normalize}, Batch Size: {batch_size}, Momentum: {momentum}, Nesterov: {nesterov_options}")
    # Erstelle das Modell und den Optimizer mit den aktuellen Hyperparametern
    model, train_dataloader, test_dataloader, optimizer = setup_model(BATCH_SIZE, 
                                                       weights_backbone=VGG16_Weights.DEFAULT, 
                                                       weights=SSD300_VGG16_Weights.DEFAULT,                                                        
                                                       lr=LEARNING_RATE,
                                                       momentum=MOMENTUM,
                                                       weight_decay=WEIGHT_DECAY,
                                                       nesterov=NESTEROV,
                                                       test_size=TEST_SIZE)
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    ap_values, ar_values, losses = start_training(model, train_dataloader, test_dataloader, optimizer, device=device, num_epochs=NUM_EPOCHS)
    plot_loss(losses)
    plot_metrics(ap_values, ar_values)

    # Bewerte die Leistung des Modells (z. B. Genauigkeit)
    accuracy = np.max(ar_values)

    # Speichere die besten Hyperparameter
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_hyperparameters = (normalize, batch_size, momentum)

print("Beste Hyperparameter: ", best_hyperparameters)


Normalize: True, Batch Size: 1, Momentum: 0.85, Nesterov: True
tensor([[[0.3098, 0.3098, 0.3098,  ..., 0.1922, 0.1922, 0.1922],
         [0.3098, 0.3098, 0.3098,  ..., 0.1882, 0.1882, 0.1882],
         [0.3098, 0.3098, 0.3098,  ..., 0.1882, 0.1882, 0.1882],
         ...,
         [0.5647, 0.1843, 0.1137,  ..., 0.1333, 0.1137, 0.0941],
         [0.6627, 0.3490, 0.1490,  ..., 0.1373, 0.1255, 0.1020],
         [0.7098, 0.6118, 0.3294,  ..., 0.1333, 0.1412, 0.1098]],

        [[0.3608, 0.3608, 0.3608,  ..., 0.1961, 0.1961, 0.1961],
         [0.3608, 0.3608, 0.3608,  ..., 0.1922, 0.1922, 0.1922],
         [0.3608, 0.3608, 0.3608,  ..., 0.1922, 0.1922, 0.1922],
         ...,
         [0.4745, 0.1059, 0.0667,  ..., 0.1294, 0.1098, 0.0902],
         [0.5412, 0.2510, 0.0824,  ..., 0.1333, 0.1216, 0.0980],
         [0.5765, 0.5020, 0.2549,  ..., 0.1294, 0.1373, 0.1059]],

        [[0.3843, 0.3843, 0.3843,  ..., 0.1765, 0.1765, 0.1765],
         [0.3843, 0.3843, 0.3843,  ..., 0.1725, 0.1725, 0.17

KeyboardInterrupt: 

print("Beste Genauigkeit: ", best_accuracy)
print("Beste Hyperparameter: ", best_hyperparameters)

In [None]:
model, train_dataloader, test_dataloader, optimizer = setup_model(BATCH_SIZE, 
                                                       weights_backbone=VGG16_Weights.DEFAULT, 
                                                       weights=SSD300_VGG16_Weights.DEFAULT,                                                        
                                                       lr=LEARNING_RATE,
                                                       momentum=MOMENTUM,
                                                       weight_decay=WEIGHT_DECAY,
                                                       nesterov=NESTEROV,
                                                       test_size=TEST_SIZE)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ap_values, ar_values, losses = start_training(model, train_dataloader, test_dataloader, optimizer, device=device, num_epochs=NUM_EPOCHS)

In [None]:
plot_loss(losses)
plot_metrics(ap_values, ar_values)

In [None]:
samples = next(iter(test_dataloader))
visualize_prediction(samples[0], model, 0.5, [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19])