In [5]:
# Install necessary dependencies
#!pip install torch torchvision pycocotools

# Import required libraries
import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image
import torchvision.transforms as T
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

In [6]:
# Define the color-to-ID mapping
COLOR_TO_ID = {
    (0, 0, 255, 255): 0,    # A-Building
    (0, 255, 0, 255): 1,    # B-Building
    (255, 0, 0, 255): 2,    # C-Building
    (255, 255, 255, 255): 3,  # E-Building
    (255, 235, 4, 255): 4,  # F-Building
    (128, 128, 128, 255): 5, # G-Building
    (255, 32, 98, 255): 6,  # H-Building
    (255, 25, 171, 255): 7, # I-Building
    (93, 71, 255, 255): 8,  # L-Building
    (255, 73, 101, 255): 9, # M-Building
    (145, 255, 114, 255): 10, # N-Building
    (153, 168, 255, 255): 11, # O-Building
    (64, 0, 75, 255): 12,    # R-Building
    (18, 178, 0, 255): 13,   # Z-Building
    (255, 169, 0, 255): 14,  # Other
}

In [7]:
from tqdm import tqdm  # Fortschrittsbalken hinzufügen

class BuildingSegmentationDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms

        # Liste aller Sequenzen abrufen
        self.sequences = sorted(os.listdir(root))
        self.imgs = []
        self.masks = []

        print("Lade Sequenzen...")
        # Durch alle Sequenzen iterieren und Bilder/Masks sammeln
        for seq in tqdm(self.sequences, desc="Fortschritt", unit="seq"):
            seq_path = os.path.join(root, seq)
            img_file = os.path.join(seq_path, "step0.camera.png")
            mask_file = os.path.join(seq_path, "step0.camera.semantic segmentation.png")
            if os.path.exists(img_file) and os.path.exists(mask_file):
                self.imgs.append(img_file)
                self.masks.append(mask_file)

    def __getitem__(self, idx):
        # Load image and mask
        img_path = self.imgs[idx]
        mask_path = self.masks[idx]
        img = Image.open(img_path).convert("RGB")
        mask = Image.open(mask_path).convert("RGBA")

        # Convert mask to class IDs
        mask_np = np.array(mask)
        instance_mask = np.zeros(mask_np.shape[:2], dtype=np.int64)
        for color, class_id in COLOR_TO_ID.items():
            instance_mask[np.all(mask_np == np.array(color), axis=-1)] = class_id

        # Get unique class IDs and generate binary masks
        obj_ids = np.unique(instance_mask)
        obj_ids = obj_ids[obj_ids > 0]  # Exclude background (0)
        masks = instance_mask == obj_ids[:, None, None]

        # Compute bounding boxes
        boxes = []
        for i in range(len(obj_ids)):
            pos = np.where(masks[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])

        # Convert to tensors
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(obj_ids, dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)

        image_id = torch.tensor([idx])
        if len(boxes) > 0:
            area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        else:
            area = torch.tensor([0.0])  # Falls keine Boxen existieren, setze area auf 0
        iscrowd = torch.zeros((len(obj_ids),), dtype=torch.int64)

        target = {
            "boxes": boxes,
            "labels": labels,
            "masks": masks,
            "image_id": image_id,
            "area": area,
            "iscrowd": iscrowd,
        }

        if self.transforms is not None:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.imgs)


In [8]:
# Define transformations
def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

In [10]:
#from google.colab import drive
#drive.mount('/content/drive/')

# Load the dataset
root_dir = "C:/Users/Lukas/AppData/LocalLow/DefaultCompany/Fuwa_HDRP/solo_1/"  # Replace with your dataset directory
dataset = BuildingSegmentationDataset(root=root_dir, transforms=get_transform(train=True))
dataset_test = BuildingSegmentationDataset(root=root_dir, transforms=get_transform(train=False))

Lade Sequenzen...


Fortschritt: 100%|██████████| 10005/10005 [00:01<00:00, 6766.94seq/s]


Lade Sequenzen...


Fortschritt: 100%|██████████| 10005/10005 [00:01<00:00, 8086.19seq/s]


In [None]:
# Split dataset into train and test
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-50])  # Last 50 for testing
dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])

In [12]:
# DataLoader
data_loader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=True, num_workers=4,
                                          collate_fn=lambda x: tuple(zip(*x)))
data_loader_test = torch.utils.data.DataLoader(dataset_test, batch_size=1, shuffle=False, num_workers=4,
                                               collate_fn=lambda x: tuple(zip(*x)))

In [13]:
# Load the pre-trained Mask R-CNN model
def get_model_instance_segmentation(num_classes):
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes)
    return model

In [14]:
# Define model and optimizer
num_classes = len(COLOR_TO_ID) + 1  # 15 classes (0-14) + background
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = get_model_instance_segmentation(num_classes)
model.to(device)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" to C:\Users\Lukas/.cache\torch\hub\checkpoints\maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
100.0%


In [None]:
# # Training and evaluation utilities
# from engine import train_one_epoch, evaluate
# import utils

# # Training loop
# num_epochs = 10
# for epoch in range(num_epochs):
#     train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
#     lr_scheduler.step()
#     evaluate(model, data_loader_test, device=device)

# # Save the model
# torch.save(model.state_dict(), "mask_rcnn_finetuned.pth")


import time
import torch
from tqdm import tqdm

# Simple training loop
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
    model.train()
    i = 0
    for images, targets in tqdm(data_loader, desc=f"Epoch {epoch+1}/{num_epochs}", ncols=100):
        # Move images and targets to the device (GPU)
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        loss_dict = model(images, targets)

        # Total loss
        losses = sum(loss for loss in loss_dict.values())

        # Backward pass and optimize
        losses.backward()
        optimizer.step()

        # Print the loss periodically
        if i % print_freq == 0:
            print(f"Epoch {epoch+1}, Iteration {i}, Loss: {losses.item()}")
        i += 1

# Evaluation loop
def evaluate(model, data_loader, device):
    model.eval()
    results = []
    for images, targets in tqdm(data_loader, desc="Evaluating", ncols=100):
        # Move images and targets to the device (GPU)
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # Perform evaluation
        with torch.no_grad():
            prediction = model(images)

        # Collect results (e.g., use COCO evaluation here)
        results.append(prediction)

    # Return the evaluation results (you can modify this part to use COCO metrics, etc.)
    return results

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    lr_scheduler.step()
    evaluate(model, data_loader_test, device)

Epoch 1/10:   0%|                                                          | 0/4976 [00:00<?, ?it/s]