In [None]:
import os
import json
import torch
import torchvision.transforms as transforms
import torch.nn as nn
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np

from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset
from torchvision.transforms import ToTensor

from torchvision.models.detection import ssd
from PIL import Image

from tqdm import tqdm


class DatasetLoader:
    def __init__(self, root_dir):
        self.root_dir = root_dir
        self.images_dir = os.path.join(root_dir, "images")
        self.annotations_dir = os.path.join(root_dir, "annotations")
    
    
    def load_dataset(self):
        dataset = []
        supported_image_extensions = (".jpg", ".jpeg", ".png")
        supported_annotation_extensions = (".jpg.json", ".jpeg.json", ".png.json")  # Beispielerweiterungen, anpassen Sie dies entsprechend

        for filename in os.listdir(self.images_dir):
            if filename.lower().endswith(supported_image_extensions):
                image_path = os.path.join(self.images_dir, filename)

                # Annotationen
                annotation_filename = os.path.splitext(filename)[0]
                for extension in supported_annotation_extensions:
                    annotation_file = annotation_filename + extension
                    annotation_path = os.path.join(self.annotations_dir, annotation_file)
                    if os.path.exists(annotation_path):
                        break

                # Bild und Annotationen einlesen
                image, annotations = self._read_data(image_path, annotation_path)
                dataset.append((image, annotations))

        return dataset



    def _read_data(self, image_path, annotation_path):
        image = Image.open(image_path)
        
        with open(annotation_path, 'r') as f:
            annotations = json.load(f)
        
        image_annotations = {
            "filename": annotations["FileName"],
            "annotations": []
        }
        
        for annotation in annotations["Annotations"]:
            bbox = annotation["BoundingBox"]
            xmin, ymin, xmax, ymax = bbox
            label = annotation["classname"]
            
            formatted_annotation = {
                "bbox": [xmin, ymin, xmax, ymax],
                "label": label
            }
            
            image_annotations["annotations"].append(formatted_annotation)
        
        return image, image_annotations
    


root_dir = r"C:\Users\Domi\Documents\GitHub\Deep-Vision-sta\Datasets\Face Mask Detection Dataset\Medical mask\Medical mask\Medical Mask"

dataset_loader = DatasetLoader(root_dir)
dataset = dataset_loader.load_dataset()
image, annotations = dataset[0]
print(len(dataset))





class MyCustomDataset(Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
        self.transform = ToTensor()

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, index):
        data = self.dataset[index]
        image = self.transform(data[0])  # Bild in Tensor umwandeln
        annotations = data[1]  # Annotationen beibehalten

        return image, annotations
    



def collate_fn(batch):
    images = []
    annotations = []

    # Definiere die Zielgröße für das Rescaling
    target_size = (300, 300)
    rescale_transform = transforms.Resize(target_size, interpolation=Image.Resampling.BILINEAR)

    for image, annotation in batch:
        # Wandele den Tensor in eine PIL-Image-Instanz um
        image_size = image.size()
        image = transforms.ToPILImage()(image)

        # Wende die Rescaling-Transformation auf das Bild an
        image = rescale_transform(image)

        # Konvertiere das Bild in ein Tensor und füge es zur Liste hinzu
        image = transforms.ToTensor()(image)
        images.append(image)

        # Passe die Bounding-Boxen an die neue Größe des Bildes an
        width_ratio = target_size[0] / image_size[2]
        height_ratio = target_size[1] / image_size[1]
        for bbox_dict in annotation['annotations']:
            bbox = bbox_dict['bbox']
            x_min, y_min, x_max, y_max = bbox
            w = x_max - x_min
            h = y_max - y_min
            x_min *= width_ratio
            y_min *= height_ratio
            w *= width_ratio
            h *= height_ratio
            bbox_dict['bbox'] = [x_min, y_min, w, h]

        # Füge die Annotations zur Annotations-Liste hinzu
        annotations.append(annotation)

    # Passe die Größen der Bilder an, um stapelbar zu sein
    images = torch.stack(images)

    return images, annotations







# Modell initialisieren
model = ssd.ssd300_vgg16(num_classes=20)

# Daten in Trainings- und Testdaten aufteilen
train_data, test_data = train_test_split(dataset, test_size=0.2, random_state=42)

# Trainingsdaten vorbereiten und DataLoader erstellen
train_dataset = MyCustomDataset(train_data)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=collate_fn)
#train_dataloader = torch.utils.data.DataLoader(train_data, batch_size=8, shuffle=True)

# Testdaten vorbereiten und DataLoader erstellen
test_dataset = MyCustomDataset(test_data)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=8, shuffle=False, collate_fn=collate_fn)

# Optimizer und Loss-Funktion definieren
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()





def visualize_sample(dataloader, x):
    # Rufen Sie das x-te Element aus dem Dataloader ab
    images, annotations = next(iter(dataloader))
    image = images[x]
    boxes = annotations[x]['annotations']

    # Erstellen Sie eine neue Figur und Achse
    fig, ax = plt.subplots(1)

    # Zeigen Sie das Bild in der Achse an
    ax.imshow(image.permute(1, 2, 0))

    # Iterieren Sie über die Bounding Boxes und zeichnen Sie sie als Rechtecke in der Achse
    for box in boxes:
        x_min, y_min, x_max, y_max = box['bbox']
        width = x_max - x_min
        height = y_max - y_min
        #print(width, height)
        rect = patches.Rectangle((x_min, y_min), width, height, linewidth=2, edgecolor='r', facecolor='none')
        ax.add_patch(rect)


    # Zeigen Sie die visualisierten Bounding Boxes an
    plt.show()

visualize_sample(train_dataloader, 4)




def draw_image_with_boxes(image, target):
    image = image.cpu().permute(1, 2, 0).numpy()

    # Kopiere die Bounding-Box-Koordinaten auf die CPU und konvertiere sie in numpy-Arrays
    boxes = target["boxes"]
    labels = target["labels"]
    boxes = boxes.cpu().numpy()
    labels = labels.cpu().numpy()
        
    # Erstelle eine neue Figur und Achse
    fig, ax = plt.subplots(1)
    
    # Zeige das Bild in der Achse
    ax.imshow(image)
    print(target)
    
    
    # Iteriere über die Bounding-Boxen und zeichne sie als Rechtecke in der Achse
    for box, label in zip(boxes, labels):                  
            x_min, y_min, x_max, y_max = box
            width = x_max - x_min
            height = y_max - y_min
            rect = patches.Rectangle((x_min, y_min), width, height, linewidth=2, edgecolor='r', facecolor='none')
            ax.add_patch(rect)
            ax.text(x_min, y_min, f"Label: {label}", color='r', fontsize=8, bbox=dict(facecolor='white', alpha=0.7, edgecolor='none'))

    # Zeige die Achse
    plt.show()





# Trainingsschleife
num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.train()

for epoch in range(num_epochs):
    pbar = tqdm(train_dataloader, total=len(train_dataloader))
    total_loss = 0.0
    
    for images, annotations in pbar:
        images = images.to(device)        
        # Annotationsdaten aufbereiten
        targets = []        
        class_mapping = {
            "hijab_niqab": 0,
            "mask_colorful": 1,
            "mask_surgical": 2,
            "face_no_mask": 3,
            "face_with_mask_incorrect": 4,
            "face_with_mask": 5,
            "face_other_covering": 6,
            "scarf_bandana": 7,
            "balaclava_ski_mask": 8,
            "face_shield": 9,
            "other": 10,
            "gas_mask": 11,
            "turban": 12,
            "helmet": 13,
            "sunglasses": 14,
            "eyeglasses": 15,
            "hair_net": 16,
            "hat": 17,
            "goggles": 18,
            "hood": 19
        }

        for annotation in annotations:
            boxes = annotation["annotations"]
            labels = [box["label"] for box in boxes]
            bboxes = [box["bbox"] for box in boxes]
            # Wandele Labels in numerische Werte um
            labels = [class_mapping[label] for label in labels]
            target = {
                "boxes": torch.tensor(bboxes, dtype=torch.float32).to(device),
                "labels": torch.tensor(labels).to(device)
            }
            targets.append(target)

        optimizer.zero_grad()
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        losses.backward()
        optimizer.step()        
        total_loss += losses.item()
        average_loss = total_loss / (pbar.n + 1)
        
        pbar.set_description(f"Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss:.4f}")
        #draw_image_with_boxes(images[0], targets[0])    
    pbar.close()

