In [41]:
import os
import json
import torch
import torchvision.transforms as transforms
import torch.nn as nn
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np

from torchvision.models.detection import SSD300_VGG16_Weights
from torchvision.models.vgg import VGG16_Weights
from torch.utils.data import Dataset
from torchvision.models.detection import ssd
from PIL import Image
from torchvision.models.detection import SSDLite320_MobileNet_V3_Large_Weights
from torchvision.models import MobileNet_V3_Large_Weights
from torchvision.models.detection import ssdlite320_mobilenet_v3_large
from engine import train_one_epoch, evaluate

import utils
import PIL.Image
import torchvision.transforms.functional as F
import xml.etree.ElementTree as ET
import warnings

In [42]:
root_dir_json = r"C:\Users\Domi\Documents\GitHub\Deep-Vision-sta\Datasets\Face Mask Detection Dataset\Medical mask\Medical mask\Medical Mask"
root_dir_xml = r"C:\Users\Domi\Documents\GitHub\Deep-Vision-sta\Datasets\Kaggle Face Mask Detection Full"


#Normalize: True, Batch Size: 1, Momentum: 0.9, Nesterov: True
#Normalize: True, Batch Size: 1, Momentum: 0.95, Nesterov: True
#Normalize: True, Batch Size: 1, Momentum: 0.95, Nesterov: False
#Normalize: True, Batch Size: 2, Momentum: 0.95, Nesterov: True


MEAN = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]
NORMALIZE = False
BATCH_SIZE = 1
NUM_EPOCHS = 10

RESIZE = (300, 300)
ROUND_RESIZED_BBOXES = False
LEARNING_RATE = 0.00001
MOMENTUM = 0.9
WEIGHT_DECAY = 0.0005
NESTEROV = True
TEST_SIZE = 0.2

ALLOWED_LABELS = [3, 4, 5, 6]
CLASS_MAPPING = {
    "hijab_niqab": 0,
    "mask_colorful": 1,
    "mask_surgical": 2,
    "face_no_mask": 3,
    "face_with_mask_incorrect": 4,
    "face_with_mask": 5,
    "face_other_covering": 6,
    "scarf_bandana": 7,
    "balaclava_ski_mask": 8,
    "face_shield": 9,
    "other": 10,
    "gas_mask": 11,
    "turban": 12,
    "helmet": 13,
    "sunglasses": 14,
    "eyeglasses": 15,
    "hair_net": 16,
    "hat": 17,
    "goggles": 18,
    "hood": 19
}

# DATACLASSES

## Dataclass for json annotations

In [43]:
class JsonDataset(Dataset):
    def __init__(self, root_dir, class_mapping, only_single_faces=False, only_multiple_faces=False, target_size=(300,300)):
        self.root_dir = root_dir
        self.class_label_map = class_mapping
        self.annotations = []
        self.target_size = target_size
        self.only_single_faces = only_single_faces
        self.only_multiple_faces = only_multiple_faces
        if(self.only_single_faces and self.only_multiple_faces):
            raise ValueError("only_single_faces and only_multiple_faces cannot both be true\n Only one of them can be true")
        
        self.load_annotations(self.class_label_map.values())

    def load_annotations(self, allowed_classes):
        annotation_files = os.listdir(f"{self.root_dir}/annotations")
        for file_name in annotation_files:
            with open(f"{self.root_dir}/annotations/{file_name}", "r") as f:
                annotation_data = json.load(f)
                annotations = annotation_data["Annotations"]
                file_name = annotation_data["FileName"]
                #get the allowed class names from the keys of the CLASS_LABELS dictionary
                allowed_classnames = [key for key, value in self.class_label_map.items() if value in allowed_classes]
                face_classes = ["face_no_mask", "face_with_mask_incorrect", "face_with_mask", "face_other_covering"]
                annotations = [annotation for annotation in annotations if annotation["classname"] in allowed_classnames]

                if self.only_single_faces:
                    #check if multiple of the face_classes are present in the annotations, indicating multiple faces
                    face_annotations = [annotation for annotation in annotations if annotation["classname"] in face_classes]
                    if len(face_annotations) > 1:
                        continue 
                    self.annotations.append((annotations, file_name))

                elif self.only_multiple_faces:
                    #check if multiple of the face_classes are present in the annotations, indicating multiple faces
                    face_annotations = [annotation for annotation in annotations if annotation["classname"] in face_classes]
                    if len(face_annotations) <= 1:
                        continue
                    else:
                        self.annotations.append((annotations, file_name))

                else:
                    if(annotations == []):
                        #warnings.warn(f"File {file_name} has no annotations")
                        continue
                    self.annotations.append((annotations, file_name))
                # Check if the boxes are valid
                for annotation in annotations:
                    boxes = annotation["BoundingBox"]
                    if boxes[0] >= boxes[2] or boxes[1] >= boxes[3]:
                        print("Invalid bounding box coordinates in file:", file_name)
                        break

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        annotations = self.annotations[idx][0]
        file_name = self.annotations[idx][1]
        image_path = f"{self.root_dir}/images/{file_name}"
        image = PIL.Image.open(image_path).convert("RGB")
        original_image_width, original_image_height = image.size
        image = F.resize(image, self.target_size)
        image = F.to_tensor(image)
        if NORMALIZE:
            image = F.normalize(image, MEAN, STD)
        print(image.shape)

        boxes = []
        labels = []
        for annotation in annotations:
            box = annotation["BoundingBox"]
            if box[0] < box[2] and box[1] < box[3]:
                # Resize the bounding box coordinates
                box_resized = [
                    box[0] * self.target_size[0] / original_image_width,
                    box[1] * self.target_size[1] / original_image_height,
                    box[2] * self.target_size[0] / original_image_width,
                    box[3] * self.target_size[1] / original_image_height
                ]
                boxes.append(box_resized)
                class_name = annotation["classname"]
                # Get the class label based on the class name
                class_label = self.get_class_label(class_name)
                labels.append(class_label)

        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["area"] = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        target["iscrowd"] = torch.zeros((boxes.shape[0],), dtype=torch.int64)
        target["image_id"] = torch.tensor([idx])

        print(target)

        return image, target

    
    def get_class_label(self, class_name):
        return self.class_label_map.get(class_name, -1)  # Return -1 if class_name is not found

## Dataclass for XML Annotations

In [44]:
class XMLDataset(Dataset):
    def __init__(self, root_dir, class_mapping, target_size=RESIZE, use_dark_images=False):
        self.root_dir = root_dir
        self.class_mapping = class_mapping
        self.annotations = []
        self.target_size = target_size
        self.use_dark_images = use_dark_images
        self.load_annotations()

    def load_annotations(self):
        annotation_files = os.listdir(f"{self.root_dir}/annotations")
        for file_name in annotation_files:
            with open(f"{self.root_dir}/annotations/{file_name}", "r") as f:
                tree = ET.parse(f)
                root = tree.getroot()
                annotations = []
                for obj in root.findall('object'):
                    name = obj.find('name').text
                    bndbox = obj.find('bndbox')
                    xmin = int(bndbox.find('xmin').text)
                    ymin = int(bndbox.find('ymin').text)
                    xmax = int(bndbox.find('xmax').text)
                    ymax = int(bndbox.find('ymax').text)
                    bounding_box = [xmin, ymin, xmax, ymax]
                    annotation = {
                        "BoundingBox": bounding_box,
                        "classname": name
                    }
                    annotations.append(annotation)
                file_name = root.find('filename').text
                self.annotations.append((annotations, file_name))

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        annotations = self.annotations[idx][0]
        file_name = self.annotations[idx][1]
        image_path = f"{self.root_dir}/images/{file_name}"
        if(self.use_dark_images):
            image_path = f"{self.root_dir}/dark/images/{file_name}"
        image = PIL.Image.open(image_path).convert("RGB")
        original_image_width, original_image_height = image.size
        image = F.resize(image, self.target_size)
        image = F.to_tensor(image)
        if NORMALIZE:
            image = F.normalize(image, MEAN, STD)

        boxes = []
        labels = []
        for annotation in annotations:
            box = annotation["BoundingBox"]
            if box[0] < box[2] and box[1] < box[3]:
                # Resize the bounding box coordinates
                box_resized = [
                    box[0] * self.target_size[0] / original_image_width,
                    box[1] * self.target_size[1] / original_image_height,
                    box[2] * self.target_size[0] / original_image_width,
                    box[3] * self.target_size[1] / original_image_height
                ]
                boxes.append(box_resized)
                class_name = annotation["classname"]
                # Get the class label based on the class name
                class_label = self.get_class_label(class_name)
                #print(class_name)
                labels.append(class_label)

        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["area"] = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        target["iscrowd"] = torch.zeros((boxes.shape[0],), dtype=torch.int64)
        target["image_id"] = torch.tensor([idx])

        return image, target

    def get_class_label(self, class_name):
        return self.class_mapping.get(class_name, -1)  # Return -1 if class_name is not found

# Draw Images

In [45]:
def draw_image_with_boxes(image, target, class_mapping):    
    # Unnormalize the image
    if NORMALIZE:
        image = transforms.Normalize(mean=[-m / s for m, s in zip(MEAN, STD)], std=[1 / s for s in STD])(image)
    image_pil = transforms.ToPILImage()(image)

    # Kopiere die Bounding-Box-Koordinaten auf die CPU und konvertiere sie in numpy-Arrays
    boxes = target["boxes"]
    labels = target["labels"]
    boxes = boxes.cpu().numpy()
    labels = labels.cpu().numpy()
        
    # Erstelle eine neue Figur und Achse
    fig, ax = plt.subplots(1)    
    # Zeige das Bild in der Achse
    ax.imshow(image_pil)    
    
    # Iteriere über die Bounding-Boxen und zeichne sie als Rechtecke in der Achse
    for box, label in zip(boxes, labels):        
        x_min, y_min, x_max, y_max = box
        width = x_max - x_min
        height = y_max - y_min
        class_name = list(class_mapping.keys())[list(class_mapping.values()).index(label)]
        rect = patches.Rectangle((x_min, y_min), width, height, linewidth=2, edgecolor='r', facecolor='none')
        ax.add_patch(rect)            
        ax.text(x_min, y_min, f"{class_name}, {label}", color='r', fontsize=8, bbox=dict(facecolor='white', alpha=0.7, edgecolor='none'))

    # Zeige die Achse
    plt.show()

# Model Setup

In [46]:
def setup_model(batch_size, lr, momentum, nesterov, test_size, dataformat, model, class_mapping, use_dark_images=False):

    # Modell initialisieren
    if model == 'ssd300':
        model = ssd.ssd300_vgg16(weights=SSD300_VGG16_Weights.DEFAULT, weights_backbone=VGG16_Weights.DEFAULT)
    elif model == 'ssd320lite':
        model = ssdlite320_mobilenet_v3_large(weights=SSDLite320_MobileNet_V3_Large_Weights.DEFAULT, weights_backbone=MobileNet_V3_Large_Weights.DEFAULT)

    # Daten in Trainings- und Testdaten aufteilen
    if dataformat == 'json':            
        dataset = JsonDataset(root_dir_json, class_mapping, target_size=RESIZE)
    elif dataformat == 'xml':
        dataset = XMLDataset(root_dir_xml, class_mapping, target_size=RESIZE, use_dark_images=use_dark_images)
        
    train_size = int((1-test_size) * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

    # Trainingsdaten vorbereiten und DataLoader erstellen
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=utils.collate_fn)

    # Testdaten vorbereiten und DataLoader erstellen
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True, collate_fn=utils.collate_fn)

    # Optimizer erstellen
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, nesterov=nesterov)    

    return model, train_dataloader, test_dataloader, optimizer

In [47]:
def save_model(model, path, name, class_mapping, ap_values, ar_values, losses, hyperparameters):
    # Erstelle den Ordner mit dem Namen des Modells
    model_dir = os.path.join(path, name)
    os.makedirs(model_dir, exist_ok=True)

    # Speichere das Zustandsdictionary des Modells
    model_path = os.path.join(model_dir, name + ".pth")
    torch.save(model.state_dict(), model_path)

    # Konvertiere NumPy-Arrays in reguläre Python-Listen
    ap_values = [ap.tolist() for ap in ap_values]
    ar_values = [ar.tolist() for ar in ar_values]
    losses = [float(loss) for loss in losses]

    # Speichere Metadaten in einer JSON-Datei
    metadata = {
        "class_mapping": class_mapping,
        "ap_values": ap_values,
        "ar_values": ar_values,
        "losses": losses,
        "hyperparameters": hyperparameters
    }
    metadata_path = os.path.join(model_dir, name + ".json")
    with open(metadata_path, "w") as f:
        json.dump(metadata, f)


In [48]:
def create_val_dataset(filter_classes, dataformat = 'json', class_mapping=CLASS_MAPPING):
     # Daten in Trainings- und Testdaten aufteilen
    if dataformat == 'json':            
        dataset = JsonDataset(root_dir_json, RESIZE, class_mapping, filter_classes)
    elif dataformat == 'xml':
        dataset = XMLDataset(root_dir_xml, RESIZE, class_mapping)
        
    train_size = 0
    val_size = len(dataset)
    train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])
    return test_dataset

# Training

In [49]:
def start_training(model, train_dataloader, test_dataloader, optimizer, device, num_epochs=2):
    # # Trainingsschleife
    model.to(device)

    # Define empty arrays to collect metrics
    ap_values = []
    ar_values = []
    losses = []

    for epoch in range(num_epochs):
        # training for one epoch
        train_one_epoch(model, optimizer, train_dataloader, device, epoch, print_freq=1, losses_out=losses)

        # update the learning rate
        # lr_scheduler.step()
        
        # evaluate on the test dataset        
        evaluator = evaluate(model, test_dataloader, device=device)

        # Extract the metrics from the evaluator
        iou_thresholds = evaluator.coco_eval['bbox'].params.iouThrs
        average_precisions = evaluator.coco_eval['bbox'].stats[:6]
        average_recalls = evaluator.coco_eval['bbox'].stats[6:]

        # Append the metrics to the arrays
        ap_values.append(average_precisions)
        ar_values.append(average_recalls)
    
    return ap_values, ar_values, losses

# Evaluation

In [50]:
def plot_loss(train_losses):
    plt.plot(train_losses)
    plt.xlabel("Iteration")
    plt.ylabel("Loss")
    average_loss = sum(train_losses)/len(train_losses)
    print("Average Loss: ", average_loss)
    print("last Loss: ", train_losses[-1])



def plot_metrics(ap_values, ar_values):
    # Convert the arrays to numpy arrays for easier plotting
    ap_values = np.array(ap_values)
    ar_values = np.array(ar_values)

    iou_thresholds_available = ["0.50:0.95", "0.50", "0.75", "0.50:0.95_small", "0.50:0.95_medium", "0.50:0.95_large"]

    # Plot the average precisions over epochs
    plt.figure(figsize=(10, 5))
    for i, iou_thresh in enumerate(iou_thresholds_available):
        plt.plot(ap_values[:, i], label=f"IoU={iou_thresh}")
    plt.xlabel("Epochs")
    plt.ylabel("Average Precision")
    plt.title("Average Precision vs. Epochs")
    plt.legend()
    plt.show()

    # Plot the average recalls over epochs
    plt.figure(figsize=(10, 5))
    for i, iou_thresh in enumerate(iou_thresholds_available):
        plt.plot(ar_values[:, i], label=f"IoU={iou_thresh}")
    plt.xlabel("Epochs")
    plt.ylabel("Average Recall")
    plt.title("Average Recall vs. Epochs")
    plt.legend()
    plt.show()
    print("Average Precisions: ", ap_values[-1])
    print("Max Precision: ", ap_values.max())
    print("Average Recalls: ", ar_values[-1])
    print("Max Recall: ", ar_values.max())

    

def visualize_prediction(images, model, confidence_threshold, device, allowed_labels, class_mapping):
    # Set the model to evaluation mode
    model.eval()

    if len(images) == 1:
        # Batch size is 1
        image = images[0].to(device)
        # Perform inference for a single image
        with torch.no_grad():
            predictions = model([image])
    else:
        # Batch size > 1
        imgs = list(image.to(device) for image in images)
        # Perform inference for the batch
        with torch.no_grad():
            predictions = model(imgs)
            # Perform inference
    

    for image, prediction in zip(images, predictions):
        if NORMALIZE:
            # Unnormalize the image
            image = F.normalize(image, mean=[-m / s for m, s in zip(MEAN, STD)], std=[1 / s for s in STD])
            #image = F.normalize(image, mean=MEAN, std=STD)
        # Convert the image tensor to a PIL Image
        image_pil = transforms.ToPILImage()(image)

        # Get the predicted bounding boxes, labels, and scores
        boxes = prediction['boxes'].cpu().numpy()
        labels = prediction['labels'].cpu().numpy()
        scores = prediction['scores'].cpu().numpy()

        # Visualize the image and predicted bounding boxes
        fig, ax = plt.subplots(1)
        ax.imshow(image_pil)

        for box, label, score in zip(boxes, labels, scores):
            if label in allowed_labels and score > confidence_threshold:
                x_min, y_min, x_max, y_max = box
                width = x_max - x_min
                height = y_max - y_min
                class_name = list(class_mapping.keys())[list(class_mapping.values()).index(label)]
                rect = patches.Rectangle((x_min, y_min), width, height, linewidth=2, edgecolor='r', facecolor='none')
                ax.add_patch(rect)
                ax.text(x_min, y_min, f"{class_name}, {label}", color='r', fontsize=8, bbox=dict(facecolor='white', alpha=0.7, edgecolor='none'))

        plt.show()


In [None]:
class_label_mapping = {
    "background": 0, # "background
    "face_no_mask": 1,
    "face_with_mask_incorrect": 2,
    "face_with_mask": 3,
    "face_other_covering": 4,
}

BATCH_SIZE = 2
MOMENTUM = 0.9
NESTEROV = True
NORMALIZE = False
RESIZE = (320, 320)

model, train_dataloader, test_dataloader, optimizer = setup_model(batch_size=BATCH_SIZE,                                                                                                                        
                                                                lr=LEARNING_RATE,
                                                                momentum=MOMENTUM,                                                       
                                                                nesterov=NESTEROV,
                                                                test_size=TEST_SIZE,
                                                                class_mapping=class_label_mapping,                                                                
                                                                dataformat='json', 
                                                                model='ssd320lite')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ap_values, ar_values, losses = start_training(model, train_dataloader, test_dataloader, optimizer, device=device, num_epochs=12)

# Parameter optimierung

In [None]:
import itertools
def optimize_params(normalize_options = [True, False], batch_size_options = [1], momentum_options = [0.9, 0.95], nesterov_options = [True, False]):
    # Erzeuge alle möglichen Kombinationen der Hyperparameter
    hyperparameter_combinations = list(itertools.product(batch_size_options, momentum_options, nesterov_options, normalize_options))

    best_accuracy = 0.0
    best_hyperparameters = None

    # Durchlaufe alle Kombinationen und trainiere das Modell mit jeder Kombination
    for batch_size, momentum, nesterov_options, normalize in hyperparameter_combinations:
        # Setze die Hyperparameter auf die aktuellen Werte
        NORMALIZE = normalize
        BATCH_SIZE = batch_size
        MOMENTUM = momentum
        NESTEROV = nesterov_options
        print(f"Normalize: {normalize}, Batch Size: {batch_size}, Momentum: {momentum}, Nesterov: {nesterov_options}")
        # Erstelle das Modell und den Optimizer mit den aktuellen Hyperparametern
        model, train_dataloader, test_dataloader, optimizer = setup_model(BATCH_SIZE, 
                                                        weights_backbone=VGG16_Weights.DEFAULT, 
                                                        weights=SSD300_VGG16_Weights.DEFAULT,                                                        
                                                        lr=LEARNING_RATE,
                                                        momentum=MOMENTUM,                                                        
                                                        nesterov=NESTEROV,
                                                        test_size=TEST_SIZE)
        
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        ap_values, ar_values, losses = start_training(model, train_dataloader, test_dataloader, optimizer, device=device, num_epochs=NUM_EPOCHS)
        plot_loss(losses)
        plot_metrics(ap_values, ar_values)

        # Bewerte die Leistung des Modells (z. B. Genauigkeit)
        accuracy = np.max(ar_values)

        # Speichere die besten Hyperparameter
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_hyperparameters = (normalize, batch_size, momentum, nesterov_options)
        print(f"Beste Hyperparameter: {best_hyperparameters} \n Best Accuracy: {best_accuracy}")

    print(f"FINAL!!!!\nBeste Hyperparameter: {best_hyperparameters} \n Best Accuracy: {best_accuracy}")

# Setup and Train SSD300_VGG16 on Json Model on 4 classes only Resolution 640 x 640

In [None]:
BATCH_SIZE = 1
MOMENTUM = 0.9
NESTEROV = True
NORMALIZE = True
RESIZE = (640, 640)

class_label_mapping = {
    "empty": 0,
    "face_no_mask": 1,
    "face_with_mask_incorrect": 2,
    "face_with_mask": 3,
    "face_other_covering": 4,
}

model, train_dataloader, test_dataloader, optimizer = setup_model(BATCH_SIZE,                                                                                                                        
                                                                lr=LEARNING_RATE,
                                                                momentum=MOMENTUM,                                                       
                                                                nesterov=NESTEROV,
                                                                test_size=TEST_SIZE,
                                                                class_mapping=class_label_mapping,                                                                
                                                                dataformat='json', 
                                                                model='ssd300')


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ap_values, ar_values, losses = start_training(model, train_dataloader, test_dataloader, optimizer, device=device, num_epochs=6)

plot_loss(losses)
plot_metrics(ap_values, ar_values)

In [None]:
samples = next(iter(test_dataloader))

visualize_prediction(samples[0], model, 0.5, device, class_label_mapping.values(), class_label_mapping)
draw_image_with_boxes(samples[0][0], samples[1][0], class_label_mapping)

# Setup and Train SSD300_VGG16 on Json Model on 4 classes only

Normalize: True, Batch Size: 1, Momentum: 0.9, Nesterov: True
Normalize: True, Batch Size: 1, Momentum: 0.95, Nesterov: True
Normalize: True, Batch Size: 1, Momentum: 0.95, Nesterov: False
Normalize: True, Batch Size: 2, Momentum: 0.95, Nesterov: True

In [None]:
BATCH_SIZE = 1
MOMENTUM = 0.9
NESTEROV = True
NORMALIZE = True
RESIZE = (300, 300)

class_label_mapping = {
    "empty": 0,
    "face_no_mask": 1,
    "face_with_mask_incorrect": 2,
    "face_with_mask": 3,
    "face_other_covering": 4,
}

model, train_dataloader, test_dataloader, optimizer = setup_model(BATCH_SIZE,                                                                                                                        
                                                                lr=LEARNING_RATE,
                                                                momentum=MOMENTUM,                                                       
                                                                nesterov=NESTEROV,
                                                                test_size=TEST_SIZE,
                                                                class_mapping=class_label_mapping,                                                                
                                                                dataformat='json', 
                                                                model='ssd300')


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ap_values, ar_values, losses = start_training(model, train_dataloader, test_dataloader, optimizer, device=device, num_epochs=6)

In [None]:
samples = next(iter(test_dataloader))

visualize_prediction(samples[0], model, 0.5, device, class_label_mapping.values(), class_label_mapping)
draw_image_with_boxes(samples[0][0], samples[1][0], class_label_mapping)


In [None]:
plot_loss(losses)
plot_metrics(ap_values, ar_values)

In [None]:
save_model(model, "models/", "ssd300_vgg16_4_classes", class_label_mapping, ap_values, ar_values, losses, hyperparameters=(BATCH_SIZE, MOMENTUM, NESTEROV, NORMALIZE))

# Setup SDD300_VGG16 Model on Json Dataset with all classes

In [None]:
class_mapping = {
    "empty": 0,
    "hijab_niqab": 1,
    "mask_colorful": 2,
    "mask_surgical": 3,
    "face_no_mask": 4,
    "face_with_mask_incorrect": 5,
    "face_with_mask": 6,
    "face_other_covering": 7,
    "scarf_bandana": 8,
    "balaclava_ski_mask": 9,
    "face_shield": 10,
    "other": 11,
    "gas_mask": 12,
    "turban": 13,
    "helmet": 14,
    "sunglasses": 15,
    "eyeglasses": 16,
    "hair_net": 17,
    "hat": 18,
    "goggles": 19,
    "hood": 20
}


NORMALIZE = True
BATCH_SIZE = 1
MOMENTUM = 0.9
NESTEROV = True
RESIZE = (300, 300)

model, train_dataloader, test_dataloader, optimizer = setup_model(batch_size=BATCH_SIZE,                                                                                                                        
                                                                lr=0.00001,
                                                                momentum=MOMENTUM,                                                       
                                                                nesterov=NESTEROV,
                                                                test_size=TEST_SIZE,
                                                                class_mapping=class_mapping,                                                                
                                                                dataformat='json', 
                                                                model='ssd300')

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ap_values, ar_values, losses = start_training(model, train_dataloader, test_dataloader, optimizer, device=device, num_epochs=12)

In [None]:
samples = next(iter(test_dataloader))
visualize_prediction(samples[0], model, 0.5, device, class_mapping.values(), class_mapping)
draw_image_with_boxes(samples[0][0], samples[1][0], class_mapping)

In [None]:
plot_loss(losses)
plot_metrics(ap_values, ar_values)

In [None]:
save_model(model, "models/", "ssd300_vgg16_all_classes", class_mapping, ap_values, ar_values, losses, hyperparameters=(BATCH_SIZE, MOMENTUM, NESTEROV, NORMALIZE))

# Setup SDD320lite_MobileNetV3 Model on Json Dataset with 4 classes

In [None]:
class_label_mapping = {
    "empty": 0,
    "face_no_mask": 1,
    "face_with_mask_incorrect": 2,
    "face_with_mask": 3,
    "face_other_covering": 4,
}

BATCH_SIZE = 4
MOMENTUM = 0.9
NESTEROV = True
NORMALIZE = False
RESIZE = (320, 320)

model, train_dataloader, test_dataloader, optimizer = setup_model(batch_size=BATCH_SIZE,                                                                                                                        
                                                                lr=LEARNING_RATE,
                                                                momentum=MOMENTUM,                                                       
                                                                nesterov=NESTEROV,
                                                                test_size=TEST_SIZE,
                                                                class_mapping=class_label_mapping,                                                                
                                                                dataformat='json', 
                                                                model='ssd320lite')

In [17]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ap_values, ar_values, losses = start_training(model, train_dataloader, test_dataloader, optimizer, device=device, num_epochs=12)

Test:  [200/216]  eta: 0:00:05  model_time: 0.3095 (0.2691)  evaluator_time: 0.0113 (0.0159)  time: 0.4073  data: 0.0665  max mem: 768
Test:  [215/216]  eta: 0:00:00  model_time: 0.2850 (0.2694)  evaluator_time: 0.0119 (0.0157)  time: 0.3880  data: 0.0863  max mem: 768
Test: Total time: 0:01:13 (0.3388 s / it)
Averaged stats: model_time: 0.2850 (0.2694)  evaluator_time: 0.0119 (0.0157)
Accumulating evaluation results...
DONE (t=0.84s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.218
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.336
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.245
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.001
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.179
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.363
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDe

KeyboardInterrupt: 

In [None]:
samples = next(iter(test_dataloader))
visualize_prediction(samples[0], model, 0.5, device, class_label_mapping.values(), class_label_mapping)
draw_image_with_boxes(samples[0][0], samples[1][0], class_label_mapping)

In [None]:
plot_loss(losses)
plot_metrics(ap_values, ar_values)

In [None]:
save_model(model, "models/", "ssd320lite_MobielNetV3_4_classes", class_label_mapping, ap_values, ar_values, losses, hyperparameters=(BATCH_SIZE, MOMENTUM, NESTEROV, NORMALIZE))

# Setup SDD320lite_MobileNetV3 Model on Json Dataset with all classes

In [29]:
class_mapping = {
    "empty": 0,
    "hijab_niqab": 1,
    "mask_colorful": 2,
    "mask_surgical": 3,
    "face_no_mask": 4,
    "face_with_mask_incorrect": 5,
    "face_with_mask": 6,
    "face_other_covering": 7,
    "scarf_bandana": 8,
    "balaclava_ski_mask": 9,
    "face_shield": 10,
    "other": 11,
    "gas_mask": 12,
    "turban": 13,
    "helmet": 14,
    "sunglasses": 15,
    "eyeglasses": 16,
    "hair_net": 17,
    "hat": 18,
    "goggles": 19,
    "hood": 20
}


BATCH_SIZE = 2
MOMENTUM = 0.9
NESTEROV = True
NORMALIZE = True
RESIZE = (320, 320)

model, train_dataloader, test_dataloader, optimizer = setup_model(batch_size=BATCH_SIZE,                                                                                                                        
                                                                lr=LEARNING_RATE,
                                                                momentum=MOMENTUM,                                                       
                                                                nesterov=NESTEROV,
                                                                test_size=TEST_SIZE,
                                                                class_mapping=class_mapping,                                                                
                                                                dataformat='json', 
                                                                model='ssd320lite')

In [30]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ap_values, ar_values, losses = start_training(model, train_dataloader, test_dataloader, optimizer, device=device, num_epochs=12)

Epoch: [0]  [   0/1730]  eta: 0:01:16  lr: 0.000000  loss: 15.1695 (15.1695)  bbox_regression: 3.6219 (3.6219)  classification: 11.5476 (11.5476)  time: 0.0440  data: 0.0200  max mem: 768
Epoch: [0]  [   1/1730]  eta: 0:01:10  lr: 0.000000  loss: 12.1717 (13.6706)  bbox_regression: 1.8562 (2.7390)  classification: 10.3155 (10.9315)  time: 0.0408  data: 0.0178  max mem: 768
Epoch: [0]  [   2/1730]  eta: 0:01:23  lr: 0.000000  loss: 14.9044 (14.0819)  bbox_regression: 1.9461 (2.4747)  classification: 11.5476 (11.6071)  time: 0.0482  data: 0.0255  max mem: 768
Epoch: [0]  [   3/1730]  eta: 0:01:21  lr: 0.000000  loss: 14.9044 (14.8577)  bbox_regression: 1.9461 (2.5812)  classification: 11.5476 (12.2765)  time: 0.0471  data: 0.0244  max mem: 768
Epoch: [0]  [   4/1730]  eta: 0:01:31  lr: 0.000000  loss: 14.9044 (14.7541)  bbox_regression: 2.6189 (2.5887)  classification: 11.7211 (12.1654)  time: 0.0531  data: 0.0297  max mem: 768
Epoch: [0]  [   5/1730]  eta: 0:01:38  lr: 0.000000  loss: 1

KeyboardInterrupt: 

In [None]:
samples = next(iter(test_dataloader))
visualize_prediction(samples[0], model, 0.5, device, class_mapping.values(), class_mapping)
draw_image_with_boxes(samples[0][0], samples[1][0], class_mapping)

In [None]:
plot_loss(losses)
plot_metrics(ap_values, ar_values)

In [None]:
save_model(model, "models/", "ssd320lite_MobielNetV3_all_classes", class_mapping, ap_values, ar_values, losses, hyperparameters=(BATCH_SIZE, MOMENTUM, NESTEROV, NORMALIZE))

# Setup SDD300_VGG16 Model on XML Dataset

In [31]:
class_mapping_xml = {
    "empty": 0,
    "without_mask": 1,
    "with_mask": 2,
    "mask_weared_incorrect": 3,
}

BATCH_SIZE = 1
MOMENTUM = 0.9
NESTEROV = True
NORMALIZE = True
RESIZE = (300, 300)

model, train_dataloader, test_dataloader, optimizer = setup_model(BATCH_SIZE,                                                                                                                        
                                                                lr=LEARNING_RATE,
                                                                momentum=MOMENTUM,                                                       
                                                                nesterov=NESTEROV,
                                                                test_size=TEST_SIZE,
                                                                class_mapping=class_mapping_xml,                                                                
                                                                dataformat='xml', 
                                                                model='ssd300')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ap_values, ar_values, losses = start_training(model, train_dataloader, test_dataloader, optimizer, device=device, num_epochs=12)

samples = next(iter(test_dataloader))
visualize_prediction(samples[0], model, 0.5, device, class_mapping_xml.values(), class_mapping_xml)
draw_image_with_boxes(samples[0][0], samples[1][0], class_mapping_xml)

plot_loss(losses)
plot_metrics(ap_values, ar_values)

save_model(model, "models/", "ssd300_vgg16_XML_Dataset", class_mapping_xml, ap_values, ar_values, losses, hyperparameters=(BATCH_SIZE, MOMENTUM, NESTEROV, NORMALIZE))

Epoch: [0]  [  0/682]  eta: 0:02:59  lr: 0.000000  loss: 15.4241 (15.4241)  bbox_regression: 2.8523 (2.8523)  classification: 12.5717 (12.5717)  time: 0.2639  data: 0.0120  max mem: 768
Epoch: [0]  [  1/682]  eta: 0:01:41  lr: 0.000000  loss: 14.0080 (14.7161)  bbox_regression: 2.8523 (3.9267)  classification: 9.0070 (10.7894)  time: 0.1492  data: 0.0100  max mem: 768
Epoch: [0]  [  2/682]  eta: 0:01:15  lr: 0.000000  loss: 15.4241 (16.4002)  bbox_regression: 3.6262 (3.8265)  classification: 12.5717 (12.5737)  time: 0.1111  data: 0.0098  max mem: 768
Epoch: [0]  [  3/682]  eta: 0:01:03  lr: 0.000000  loss: 14.0080 (15.2519)  bbox_regression: 2.8523 (3.2494)  classification: 10.2890 (12.0025)  time: 0.0932  data: 0.0103  max mem: 768
Epoch: [0]  [  4/682]  eta: 0:00:55  lr: 0.000000  loss: 15.4241 (15.3426)  bbox_regression: 2.8523 (3.0026)  classification: 12.5717 (12.3400)  time: 0.0822  data: 0.0104  max mem: 768
Epoch: [0]  [  5/682]  eta: 0:00:50  lr: 0.000000  loss: 15.4241 (17.39

KeyboardInterrupt: 

# Setup SDD320lite_MobileNetV3 Model on XML Dataset

In [32]:
class_mapping_xml = {
    "empty": 0,
    "without_mask": 1,
    "with_mask": 2,
    "mask_weared_incorrect": 3,
}

BATCH_SIZE = 2
MOMENTUM = 0.9
NESTEROV = True
NORMALIZE = True
RESIZE = (320, 320)

model, train_dataloader, test_dataloader, optimizer = setup_model(BATCH_SIZE,                                                                                                                        
                                                                lr=LEARNING_RATE,
                                                                momentum=MOMENTUM,                                                       
                                                                nesterov=NESTEROV,
                                                                test_size=TEST_SIZE,
                                                                class_mapping=class_mapping_xml,                                                                
                                                                dataformat='xml', 
                                                                model='ssd320lite')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ap_values, ar_values, losses = start_training(model, train_dataloader, test_dataloader, optimizer, device=device, num_epochs=12)

plot_loss(losses)
plot_metrics(ap_values, ar_values)

save_model(model, "models/", "ssd320lite_MobielNetV3_XML_Dataset", class_mapping_xml, ap_values, ar_values, losses, hyperparameters=(BATCH_SIZE, MOMENTUM, NESTEROV, NORMALIZE))

Epoch: [0]  [  0/341]  eta: 0:01:43  lr: 0.000000  loss: 16.1263 (16.1263)  bbox_regression: 2.1271 (2.1271)  classification: 13.9992 (13.9992)  time: 0.3044  data: 0.0254  max mem: 837
Epoch: [0]  [  1/341]  eta: 0:00:59  lr: 0.000000  loss: 14.5401 (15.3332)  bbox_regression: 1.7695 (1.9483)  classification: 12.7706 (13.3849)  time: 0.1757  data: 0.0232  max mem: 859
Epoch: [0]  [  2/341]  eta: 0:00:44  lr: 0.000000  loss: 14.5401 (14.9623)  bbox_regression: 2.1271 (2.6102)  classification: 12.7706 (12.3521)  time: 0.1321  data: 0.0228  max mem: 859
Epoch: [0]  [  3/341]  eta: 0:00:37  lr: 0.000000  loss: 14.5401 (15.0887)  bbox_regression: 2.1271 (2.5470)  classification: 12.7706 (12.5417)  time: 0.1111  data: 0.0226  max mem: 859
Epoch: [0]  [  4/341]  eta: 0:00:32  lr: 0.000000  loss: 14.5401 (14.2480)  bbox_regression: 2.3574 (2.6050)  classification: 12.7706 (11.6430)  time: 0.0965  data: 0.0211  max mem: 859
Epoch: [0]  [  5/341]  eta: 0:00:29  lr: 0.000000  loss: 14.5401 (14.3

KeyboardInterrupt: 

In [None]:
samples = next(iter(test_dataloader))
visualize_prediction(samples[0], model, 0.5, device, class_mapping_xml.values(), class_mapping_xml)
draw_image_with_boxes(samples[0][0], samples[1][0], class_mapping_xml)

# Setup SDD300_VGG16 Model on Dark XML Dataset

In [33]:
class_mapping_xml = {
    "empty": 0,
    "without_mask": 1,
    "with_mask": 2,
    "mask_weared_incorrect": 3,
}

BATCH_SIZE = 1
MOMENTUM = 0.9
NESTEROV = True
NORMALIZE = True
RESIZE = (300, 300)

model, train_dataloader, test_dataloader, optimizer = setup_model(BATCH_SIZE,                                                                                                                        
                                                                lr=LEARNING_RATE,
                                                                momentum=MOMENTUM,                                                       
                                                                nesterov=NESTEROV,
                                                                test_size=TEST_SIZE,
                                                                class_mapping=class_mapping_xml,                                                                
                                                                dataformat='xml', 
                                                                model='ssd300',
                                                                use_dark_images=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ap_values, ar_values, losses = start_training(model, train_dataloader, test_dataloader, optimizer, device=device, num_epochs=12)

plot_loss(losses)
plot_metrics(ap_values, ar_values)

save_model(model, "models/", "ssd300_vgg16_Dark_XML_Dataset", class_mapping_xml, ap_values, ar_values, losses, hyperparameters=(BATCH_SIZE, MOMENTUM, NESTEROV, NORMALIZE))

Epoch: [0]  [  0/682]  eta: 0:02:07  lr: 0.000000  loss: 12.6428 (12.6428)  bbox_regression: 2.1716 (2.1716)  classification: 10.4712 (10.4712)  time: 0.1870  data: 0.0108  max mem: 876
Epoch: [0]  [  1/682]  eta: 0:01:34  lr: 0.000000  loss: 12.6428 (17.8004)  bbox_regression: 2.1029 (2.1372)  classification: 10.4712 (15.6632)  time: 0.1386  data: 0.0099  max mem: 1086
Epoch: [0]  [  2/682]  eta: 0:01:10  lr: 0.000000  loss: 12.6428 (15.9526)  bbox_regression: 2.1716 (2.1945)  classification: 10.4712 (13.7581)  time: 0.1039  data: 0.0100  max mem: 1086
Epoch: [0]  [  3/682]  eta: 0:00:59  lr: 0.000000  loss: 12.6428 (17.5831)  bbox_regression: 2.1716 (4.7518)  classification: 10.0506 (12.8313)  time: 0.0870  data: 0.0092  max mem: 1086
Epoch: [0]  [  4/682]  eta: 0:00:51  lr: 0.000000  loss: 22.2375 (18.5139)  bbox_regression: 2.3089 (4.5033)  classification: 10.4712 (14.0106)  time: 0.0764  data: 0.0090  max mem: 1086
Epoch: [0]  [  5/682]  eta: 0:00:47  lr: 0.000000  loss: 13.1410 (

KeyboardInterrupt: 

In [None]:
samples = next(iter(test_dataloader))
visualize_prediction(samples[0], model, 0.5, device, class_mapping_xml.values(), class_mapping_xml)
draw_image_with_boxes(samples[0][0], samples[1][0], class_mapping_xml)

# Setup SDD320lite_MobileNetV3 Model on Dark XML Dataset

In [40]:
class_mapping_xml = {
    "empty": 0,
    "without_mask": 1,
    "with_mask": 2,
    "mask_weared_incorrect": 3,
}

BATCH_SIZE = 2
MOMENTUM = 0.9
NESTEROV = True
NORMALIZE = True
RESIZE = (320, 320)

model, train_dataloader, test_dataloader, optimizer = setup_model(BATCH_SIZE,                                                                                                                        
                                                                lr=LEARNING_RATE,
                                                                momentum=MOMENTUM,                                                       
                                                                nesterov=NESTEROV,
                                                                test_size=TEST_SIZE,
                                                                class_mapping=class_mapping_xml,                                                                
                                                                dataformat='xml', 
                                                                model='ssd320lite',
                                                                use_dark_images=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ap_values, ar_values, losses = start_training(model, train_dataloader, test_dataloader, optimizer, device=device, num_epochs=12)

plot_loss(losses)
plot_metrics(ap_values, ar_values)

save_model(model, "models/", "ssd320lite_MobielNetV3_Dark_XML_Dataset", class_mapping_xml, ap_values, ar_values, losses, hyperparameters=(BATCH_SIZE, MOMENTUM, NESTEROV, NORMALIZE))

Epoch: [0]  [  0/341]  eta: 0:00:59  lr: 0.000000  loss: 13.3293 (13.3293)  bbox_regression: 3.6108 (3.6108)  classification: 9.7184 (9.7184)  time: 0.1758  data: 0.0090  max mem: 1086
Epoch: [0]  [  1/341]  eta: 0:00:50  lr: 0.000000  loss: 12.6593 (12.9943)  bbox_regression: 2.0392 (2.8250)  classification: 9.7184 (10.1693)  time: 0.1498  data: 0.0087  max mem: 1086
Epoch: [0]  [  2/341]  eta: 0:00:38  lr: 0.000000  loss: 13.3293 (14.2761)  bbox_regression: 3.6108 (3.8438)  classification: 10.6201 (10.4323)  time: 0.1123  data: 0.0087  max mem: 1086
Epoch: [0]  [  3/341]  eta: 0:00:31  lr: 0.000000  loss: 13.3293 (14.2170)  bbox_regression: 2.0392 (3.3521)  classification: 10.6201 (10.8649)  time: 0.0923  data: 0.0089  max mem: 1086
Epoch: [0]  [  4/341]  eta: 0:00:27  lr: 0.000000  loss: 14.0399 (15.0693)  bbox_regression: 3.6108 (3.7835)  classification: 10.9583 (11.2858)  time: 0.0805  data: 0.0087  max mem: 1086
Epoch: [0]  [  5/341]  eta: 0:00:24  lr: 0.000000  loss: 13.3293 (14

KeyboardInterrupt: 

In [None]:
samples = next(iter(test_dataloader))
visualize_prediction(samples[0], model, 0.5, device, class_mapping_xml.values(), class_mapping_xml)
draw_image_with_boxes(samples[0][0], samples[1][0], class_mapping_xml)