In [12]:
import torch
import numpy as np
from PIL import Image
from torchvision import transforms
from torchvision.datasets import CocoDetection
from pycocotools.coco import COCO
from torch.utils.data import DataLoader

data_dir = '../data/Bamberg_coco2048/coco2048'
img_folder = f'{data_dir}/train2023'
ann_file = f'{data_dir}/annotations/instances_tree_train2023.json'

In [13]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

def get_train_augs():
    return A.Compose([
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.2),
        A.Resize(512, 512),
        A.Normalize(),
        ToTensorV2()
    ], bbox_params=A.BboxParams(format='coco'))

def get_val_augs():
    return A.Compose([
        A.Resize(512, 512),
        A.Normalize(),
        ToTensorV2()
    ], bbox_params=A.BboxParams(format='coco'))

def transform(image: Image, target: dict, is_train: bool):
    image_np = np.array(image)
    
    # Извлеките аннотации (маски, bboxes)
    bboxes = [ann['bbox'] for ann in target]
    masks = [ann['segmentation'] for ann in target]
    
    # Выберите аугментации в зависимости от режима
    if is_train:
        aug = get_train_augs()
    else:
        aug = get_val_augs()
    
    # Примените преобразования
    transformed = aug(image=image_np, bboxes=bboxes, masks=masks)
    return transformed['image'], transformed['bboxes'], transformed['masks']

In [14]:
from torchvision.datasets import CocoDetection

class CustomCocoDataset(CocoDetection):
    def __init__(self, root, annFile, is_train: bool = True):
        super().__init__(root, annFile)
        self.is_train = is_train

    def __getitem__(self, idx):
        image, target = super().__getitem__(idx)
        return transform(image, target, self.is_train)

In [15]:
train_dataset = CustomCocoDataset(
    root=img_folder,
    annFile=ann_file,
    is_train=True
)

# Валидационный датасет без аугментаций
val_dataset = CustomCocoDataset(
    root=img_folder,
    annFile=ann_file,
    is_train=False
)

loading annotations into memory...
Done (t=1.40s)
creating index...
index created!
loading annotations into memory...
Done (t=1.40s)
creating index...
index created!


In [16]:
# Пример для тренировочного изображения
image, bboxes, masks = train_dataset[0]
print("Train image shape:", image.shape)

# Пример для валидационного изображения
image, bboxes, masks = val_dataset[0]
print("Val image shape:", image.shape)

TypeError: All elements in masks must be numpy arrays

In [20]:
from torchvision.datasets import CocoDetection
from torchvision.transforms import ToTensor, ToPILImage
from torch.utils.data import DataLoader


def transform(image: Image, target: dict):
    return image, target  # Apply ToTensor only to the image

def collate_fn(batch):
    images = [item[0] for item in batch]
    targets = [item[1] for item in batch]
    return images, targets

# Create a dataset
train_dataset = CocoDetection(
    root=img_folder, 
    annFile=ann_file, 
    transforms=transform
)

val_dataset = CocoDetection(
    root=img_folder, 
    annFile=ann_file, 
    transforms=transform
)

train_loader = DataLoader(train_dataset, batch_size=2, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=2, collate_fn=collate_fn)

loading annotations into memory...
Done (t=1.42s)
creating index...
index created!
loading annotations into memory...
Done (t=1.42s)
creating index...
index created!


In [None]:
image, target = train_dataset[0]
print(image.())
print(target[0])

image, target = val_dataset[0]
print(image.shape)
print(image.mean())
print(target[0])

AttributeError: 'Image' object has no attribute 'mean'

In [19]:
label2id = {"background": 0, "tree": 1}
id2label = {v: k for k, v in label2id.items()}


In [23]:
from transformers import (
    AutoImageProcessor,
    AutoModelForUniversalSegmentation,
    HfArgumentParser,
    Trainer,
    TrainingArguments,
)

model = AutoModelForUniversalSegmentation.from_pretrained(
        'facebook/mask2former-swin-tiny-coco-instance',
        label2id=label2id,
        id2label=id2label,
        ignore_mismatched_sizes=True,

    )

image_processor = AutoImageProcessor.from_pretrained(
        'facebook/mask2former-swin-tiny-coco-instance',
        do_resize=True,
        size={"height": 2048, "width": 2048},
        use_fast=True
    )

Some weights of Mask2FormerForUniversalSegmentation were not initialized from the model checkpoint at facebook/mask2former-swin-tiny-coco-instance and are newly initialized because the shapes did not match:
- class_predictor.bias: found shape torch.Size([81]) in the checkpoint and torch.Size([3]) in the model instantiated
- class_predictor.weight: found shape torch.Size([81, 256]) in the checkpoint and torch.Size([3, 256]) in the model instantiated
- criterion.empty_weight: found shape torch.Size([81]) in the checkpoint and torch.Size([3]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
`use_fast` is set to `True` but the image processor class does not have a fast version.  Falling back to the slow version.


In [24]:
train_augment_and_transform = A.Compose(
        [
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(p=0.5),
            A.HueSaturationValue(p=0.1),
        ],
    )
validation_transform = A.Compose(
    [A.NoOp()],
)
