In [None]:
import torch
import torchvision
from torchvision.models.detection import maskrcnn_resnet50_fpn
from torchvision.transforms import functional as F
from torch.utils.data import DataLoader
from pycocotools.coco import COCO
import os
from PIL import Image

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
class CustomCocoDataset(torch.utils.data.Dataset):
    def __init__(self, root, annFile, transform=None):
        self.root = root
        self.coco = COCO(annFile)
        self.ids = list(sorted(self.coco.imgs.keys()))
        self.transform = transform

    def __getitem__(self, index):
        img_id = self.ids[index]
        coco = self.coco
        ann_ids = coco.getAnnIds(imgIds=img_id)
        anns = coco.loadAnns(ann_ids)
        path = coco.loadImgs(img_id)[0]['file_name']

        # Изображение
        img = Image.open(os.path.join(self.root, path)).convert("RGB")

        # Аннотации
        boxes = []
        labels = []
        masks = []
        for ann in anns:
            x, y, w, h = ann['bbox']
            boxes.append([x, y, x+w, y+h])
            labels.append(ann['category_id'])
            masks.append(coco.annToMask(ann))

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)

        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['masks'] = masks

        if self.transform is not None:
            img, target = self.transform(img, target)

        return img, target

    def __len__(self):
        return len(self.ids)


In [None]:
!pip install roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="aN3GiBEKIwWclb6IUSNG")
project = rf.workspace("dovgal-vladislav").project("vineyards-erbnx")
version = project.version(1)
dataset = version.download("coco")

Collecting roboflow
  Downloading roboflow-1.1.37-py3-none-any.whl.metadata (9.4 kB)
Collecting chardet==4.0.0 (from roboflow)
  Downloading chardet-4.0.0-py2.py3-none-any.whl.metadata (3.5 kB)
Collecting python-dotenv (from roboflow)
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting requests-toolbelt (from roboflow)
  Downloading requests_toolbelt-1.0.0-py2.py3-none-any.whl.metadata (14 kB)
Collecting filetype (from roboflow)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Downloading roboflow-1.1.37-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.9/76.9 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading chardet-4.0.0-py2.py3-none-any.whl (178 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m178.7/178.7 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)
Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)


Downloading Dataset Version Zip in vineyards-1 to coco:: 100%|██████████| 24449/24449 [00:00<00:00, 28049.94it/s]





Extracting Dataset Version Zip to vineyards-1 in coco:: 100%|██████████| 399/399 [00:00<00:00, 2229.10it/s]


In [None]:
data_dir = "vineyards-1"
train_dataset = CustomCocoDataset(root=os.path.join(data_dir, "train"), annFile=os.path.join(data_dir, "train", "_annotations.coco.json"))
val_dataset = CustomCocoDataset(root=os.path.join(data_dir, "valid"), annFile=os.path.join(data_dir, "valid", "_annotations.coco.json"))

loading annotations into memory...
Done (t=0.35s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


In [None]:
def collate_fn(batch):
    # Разделяем изображения и метки
    images, targets = zip(*batch)

    # Преобразуем изображения в тензоры
    images = [F.to_tensor(img) for img in images]

    # Объединяем в батч и перемещаем на устройство
    images = torch.stack(images, dim=0).to(device)

    # Обрабатываем метки
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

    return images, targets

In [None]:
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)

In [None]:
# Создание модели
model = maskrcnn_resnet50_fpn(pretrained=True)
num_classes = len(train_dataset.coco.getCatIds()) + 1  # включая фон
model.roi_heads.box_predictor.cls_score = torch.nn.Linear(1024, num_classes)
model.roi_heads.mask_predictor.mask_fcn_logits = torch.nn.Conv2d(256, num_classes, kernel_size=(1, 1))
model.to(device)
model.train()

The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.
Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=MaskRCNN_ResNet50_FPN_Weights.COCO_V1`. You can also use `weights=MaskRCNN_ResNet50_FPN_Weights.DEFAULT` to get the most up-to-date weights.
Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" to /root/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
100%|██████████| 170M/170M [00:01<00:00, 148MB/s]


MaskRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(in

In [None]:
# Оптимизатор и функция потерь
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

In [None]:
# Обучение
num_epochs = 10
for epoch in range(num_epochs):
    for images, targets in train_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()


    print(f"Epoch {epoch}/{num_epochs}, Loss: {losses.item()}")

# Сохранение модели
torch.save(model.state_dict(), "maskrcnn_model.pth")


Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at ../torch/csrc/utils/tensor_new.cpp:274.)


In [None]:
# Инференс
model.eval()
with torch.no_grad():
    for images, _ in val_loader:
        images = list(image.to(device) for image in images)
        outputs = model(images)
        # обработка результатов инференса