# Ищем мусор на побережье Камчатки по фоткам с дрона

Мы обучили бейзлайн-модель для поиска и сегментации мусора. Она была обучена на смеси датасета, который предоставлен вам как обучающий и на наших дополнительных данных.
Этот пример предназначен только для того, чтобы продемонстрировать как собрать данные для отправки, вы не обязаны использовать архитектуру/фреймворк/whatsoever. Более того, так как у вас нет наших весов и части функций, этот пример невозможно будет просто взять и завести на вашей машине, модель придется менять.

### Импорты, функции

In [1]:
import torch, torchvision

In [2]:
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

<Logger detectron2 (DEBUG)>

In [3]:
import numpy as np
import os, json, cv2, random, yaml

In [4]:
from tqdm.auto import tqdm
from matplotlib import pyplot as plt
%matplotlib inline

In [5]:
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.utils.visualizer import ColorMode
from detectron2.engine import default_setup

In [6]:
def setup(path_to_subconfig, load_from_checkpoint=None):
    """
    Create configs and perform basic setups.
    """

    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
    
    cfg.MODEL.BACKBONE.IN_CHANNELS = None
    cfg.MODEL.BACKBONE.NUM_CLASSES = None
    cfg.MODEL.BACKBONE.TYPE = None
    cfg.merge_from_file(path_to_subconfig)

    if load_from_checkpoint is not None:
        cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(load_from_checkpoint)

    default_setup(cfg, None)
    return cfg

In [7]:
from skimage.io import imread

In [8]:
def iou(preds, labels, C, EMPTY=1., ignore=None, per_image=False):
    """
    Array of IoU for each (non ignored) class
    """
    if not per_image:
        preds, labels = (preds,), (labels,)
    ious = []
    for pred, label in zip(preds, labels):
        iou = []
        ran = C if isinstance(C, list) else range(C)
        for i in ran:
            if i != ignore: # The ignored label is sometimes among predicted classes (ENet - CityScapes)
                intersection = ((label == i) & (pred == i)).sum()
                union = ((label == i) | ((pred == i) & (label != ignore))).sum()
                if not union:
                    iou.append(EMPTY)
                else:
                    iou.append(float(intersection) / float(union))
        ious.append(iou)
    ious = [np.mean(iou) for iou in zip(*ious)] # mean accross images if per_image
    return 100 * np.array(ious)

def get_iou(preds, labels, label_to_calculate=None):
    C = preds.shape[1]
    preds = torch.argmax(preds, 1)
    if label_to_calculate is not None:
        return iou(preds, labels[:, 0], [label_to_calculate,]).mean()
    else:
        return iou(preds, labels[:, 0], C)[1:].mean() # ignoiring background label.

### Конфигурируем модель и параметры датасета

In [9]:
cfg = setup('detectron_config.yaml', 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml')
# cfg = setup('detectron_config.yaml', 'COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml')

[32m[12/05 09:49:15 detectron2]: [0mRank of current process: 0. World size: 1
[32m[12/05 09:49:15 detectron2]: [0mRank of current process: 0. World size: 1
[32m[12/05 09:49:16 detectron2]: [0mEnvironment info:
----------------------  ----------------------------------------------------------------------
sys.platform            linux
Python                  3.9.5 (default, Jun  4 2021, 12:28:51) [GCC 7.5.0]
numpy                   1.19.2
detectron2              0.6 @/home/miniconda3/lib/python3.9/site-packages/detectron2
Compiler                GCC 8.4
CUDA compiler           not available
DETECTRON2_ENV_MODULE   <not set>
PyTorch                 1.10.0+cu102 @/home/miniconda3/lib/python3.9/site-packages/torch
PyTorch debug build     False
GPU available           Yes
GPU 0,1                 Tesla V100S-PCIE-32GB (arch=7.0)
Driver version          470.57.02
CUDA_HOME               None - invalid!
Pillow                  8.4.0
torchvision             0.11.1+cu102 @/home/miniconda3/l

In [10]:
MetadataCatalog.get("waste").set(thing_classes=['metall', 'net', 'plastic', 'wood'])
waste_metadata = MetadataCatalog.get("waste")

Критично важная функция для отправки своих результатов: Run Length Encoding масок.

In [11]:
def rle(inarray):
    """ run length encoding. Partial credit to R rle function. 
        Multi datatype arrays catered for including non Numpy
        returns: tuple (runlengths, startpositions, values) """
    ia = np.asarray(inarray)                # force numpy
    n = len(ia)
    if n == 0: 
        return (None, None, None)
    else:
        y = ia[1:] != ia[:-1]               # pairwise unequal (string safe)
        i = np.append(np.where(y), n - 1)   # must include last element posi
        z = np.diff(np.append(-1, i))       # run lengths
        p = np.cumsum(np.append(0, z))[:-1] # positions
        return(z, p, ia[i])

def rlencode_mask(mask):
    l,s,v = rle(mask.flatten()) # length, starts, values
    l,s = l[v], s[v]
    encoded = ' '.join([' '.join(map(str, e)) for e in zip(s, l)])
    if not encoded:
        encoded = '0 0'
    return encoded

### Грузим веса, инициализируем инференс-класс

In [12]:
cfg.MODEL.WEIGHTS = "model_final.pth" # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

[32m[12/05 09:49:20 fvcore.common.checkpoint]: [0m[Checkpointer] Loading from model_final.pth ...


### convert binary imgs to COCO format and train

In [13]:
# import cv2

# im = cv2.imread('data/00_net.png', 0)

In [14]:
# img_paths

In [15]:
# cfg.get("DEVICE")

In [16]:
# import json
# import numpy as np
# from pycocotools import mask
# from skimage import measure
# from glob import glob
# from detectron2.structures import BoxMode

# class_convert = {'metall':0, 'net':1, 'plastic':2, 'wood':3}
# img_paths = [img_path for img_path in glob('data/*') if 'JPG' in img_path]
# dataset = []
# count = 112
# for im_path in img_paths:
#     im_num = im_path.split('/')[-1].split('_')[0]
#     img_masks_paths = [img_path for img_path in glob('data/*') if 'png' in img_path and im_num in img_path]
#     annotation_base = {
#             "file_name": im_path, 
#             "image_id": count,
#             "annotations": []\
#     }
#     for mask_path in img_masks_paths:
#         class_name = mask_path.split('_')[-1].split('.')[0]
#         im = cv2.imread(mask_path, 0)
#         ground_truth_binary_mask = im

#         fortran_ground_truth_binary_mask = np.asfortranarray(ground_truth_binary_mask)
#         encoded_ground_truth = mask.encode(fortran_ground_truth_binary_mask)
#         ground_truth_area = mask.area(encoded_ground_truth)
#         ground_truth_bounding_box = mask.toBbox(encoded_ground_truth)
#         print(ground_truth_bounding_box)
#         break
#     break
#         contours = measure.find_contours(ground_truth_binary_mask, 0.5)

#         orig_im_path = im_path.split('_')[0]+'_'+'image.JPG'

#         annotation = {
#                     "segmentation": [],
#                     "area": ground_truth_area.tolist(),
#                     "iscrowd": 0,
#                     "bbox": ground_truth_bounding_box.tolist(),
#                     'bbox_mode': BoxMode.XYXY_ABS,
#                     "category_id": class_convert[class_name],
#                     }

#         for contour in contours:
#             contour = np.flip(contour, axis=1)
#             segmentation = contour.ravel().tolist()
#             annotation["segmentation"].append(segmentation)
#         annotation_base['annotations'].append(annotation)
    
#     # dataset.append(json.dumps(annotation_base, indent=4))
#     dataset.append(annotation_base)
#     # with open('coco_format_masks/'+json_name, 'w') as f:
#     #     f.write(json.dumps(annotation, indent=4))
#     # print()
#     count += 1

In [17]:
import json

def load_data(t="train"):
    if t == "waste_train":
        with open("image-to-coco-json-converter/output/train.json", 'r') as file:
            train = json.load(file)
        return train
    elif t == "waste_test":
        with open("image-to-coco-json-converter/output/val.json", 'r') as file:
            val = json.load(file)
        return val

In [18]:
from detectron2.structures import BoxMode

train_dataset = []
data = load_data('waste_train')
for i in range(len(data['images'])):
    annots_list = []
    base_annot = data['images'][i]
    base_annot['file_name'] = base_annot['file_name'].replace('img', 'image')
    for annot in data['annotations']:
        if annot['image_id'] == i:
            annot['category_id'] = annot['category_id'] - 1
            annot['bbox_mode'] = BoxMode.XYXY_ABS
            annots_list.append(annot)
            # print('image_id', i, 'added annotation')

        base_annot['annotations'] = annots_list
    train_dataset.append(base_annot)
        
val_dataset = []
data = load_data('waste_test')
for i in range(len(data['images'])):
    annots_list = []
    base_annot = data['images'][i]
    base_annot['file_name'] = base_annot['file_name'].replace('img', 'image')
    for annot in data['annotations']:
        if annot['image_id'] == i:
            annot['category_id'] = annot['category_id'] - 1
            annot['bbox_mode'] = BoxMode.XYXY_ABS
            annots_list.append(annot)
            # print('image_id', i, 'added annotation')
            
        base_annot['annotations'] = annots_list
    val_dataset.append(base_annot)

In [19]:
import json
from detectron2.data import MetadataCatalog, DatasetCatalog

DatasetCatalog.register('waste_train', lambda d='waste_train': train_dataset)
DatasetCatalog.register('waste_test', lambda d='waste_test': val_dataset)
MetadataCatalog.get('waste_train').set(thing_classes=['metall', 'net', 'plastic', 'wood'])
MetadataCatalog.get('waste_test').set(thing_classes=['metall', 'net', 'plastic', 'wood'])
# metadata = MetadataCatalog.get("waste_train")

namespace(name='waste_test',
          thing_classes=['metall', 'net', 'plastic', 'wood'])

In [22]:
cfg

CfgNode({'VERSION': 2, 'MODEL': CfgNode({'LOAD_PROPOSALS': False, 'MASK_ON': True, 'KEYPOINT_ON': False, 'DEVICE': 'cuda', 'META_ARCHITECTURE': 'GeneralizedRCNN', 'WEIGHTS': 'model_final.pth', 'PIXEL_MEAN': [103.53, 116.28, 123.675], 'PIXEL_STD': [1.0, 1.0, 1.0], 'BACKBONE': CfgNode({'NAME': 'build_resnet_fpn_backbone', 'FREEZE_AT': 2, 'IN_CHANNELS': None, 'NUM_CLASSES': None, 'TYPE': None}), 'FPN': CfgNode({'IN_FEATURES': ['res2', 'res3', 'res4', 'res5'], 'OUT_CHANNELS': 256, 'NORM': '', 'FUSE_TYPE': 'sum'}), 'PROPOSAL_GENERATOR': CfgNode({'NAME': 'RPN', 'MIN_SIZE': 0}), 'ANCHOR_GENERATOR': CfgNode({'NAME': 'DefaultAnchorGenerator', 'SIZES': [[32], [64], [128], [256], [512]], 'ASPECT_RATIOS': [[0.5, 1.0, 2.0]], 'ANGLES': [[-90, 0, 90]], 'OFFSET': 0.0}), 'RPN': CfgNode({'HEAD_NAME': 'StandardRPNHead', 'IN_FEATURES': ['p2', 'p3', 'p4', 'p5', 'p6'], 'BOUNDARY_THRESH': -1, 'IOU_THRESHOLDS': [0.3, 0.7], 'IOU_LABELS': [0, -1, 1], 'BATCH_SIZE_PER_IMAGE': 256, 'POSITIVE_FRACTION': 0.5, 'BBOX_

In [20]:
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.engine import DefaultTrainer

  
if __name__ == '__main__':
    for d in ["waste_train", "waste_test"]:
        # DatasetCatalog.register(d, lambda d=d: load_data(d))
        MetadataCatalog.get(d).set(thing_classes=['metall', 'net', 'plastic', 'wood'])
        metadata = MetadataCatalog.get(d)
        
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

[32m[12/05 09:49:21 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[12/05 09:49:52 d2.utils.events]: [0m eta: 0:47:16  iter: 19  total_loss: 9.567  loss_cls: 1.161  loss_box_reg: 0.06839  loss_mask: 4.353  loss_rpn_cls: 3.096  loss_rpn_loc: 0.6894  time: 1.4373  data_time: 0.7628  lr: 4.9953e-06  max_mem: 8785M
[32m[12/05 09:49:52 d2.utils.events]: [0m eta: 0:47:16  iter: 19  total_loss: 9.567  loss_cls: 1.161  loss_box_reg: 0.06839  loss_mask: 4.353  loss_rpn_cls: 3.096  loss_rpn_loc: 0.6894  time: 1.4373  data_time: 0.7628  lr: 4.9953e-06  max_mem: 8785M
[32m[12/05 09:50:21 d2.utils.events]: [0m eta: 0:47:01  iter: 39  total_loss: 4.307  loss_cls: 0.9235  loss_box_reg: 0.08494  loss_mask: 0.9748  loss_rpn_cls: 1.223  loss_rpn_loc: 0.669  time: 1.4403  data_time: 0.6884  lr: 9.9902e-06  max_mem: 8785M
[32m[12/05 09:50:21 d2.utils.events]: [0m eta: 0:47:01  iter: 39  total_loss: 4.307  loss_cls: 0.9235  loss_box_reg: 0.08494  loss_mask: 0.9748  loss_rpn_cls: 1.223  loss_rpn_loc: 0.669  time: 1.4403  data_time: 0.6884  lr: 9.9902e-06  max_m

In [21]:
torch.save(trainer.model.state_dict(), "./mymodel.pth")

### Просматриваем, тестируем и формируем посылку
Ниже три секции, которые делают следующее:
1. Просто визуализирует результаты
2. Визуализирует на два фрейма предсказанную и истинную маски для отладки, показывает IoU скор по классам. (Он не равен Dice но они тесно связаны).
3. Формирует csv файл посылки.

Критично важной для вас является только третья ячейка, остальные оставлены здесь, для возможности визуальной оценки работы предложенного бейзлайна.

In [None]:
data_folder = './data/'

In [None]:
# from glob import glob

# for i in sorted(glob(data_folder+'/*'))[:3]:
#     im = cv2.imread(i) 
    
#     outputs = predictor(im)  
#     v = Visualizer(im[:, :, ::-1],
#                    metadata=waste_metadata, 
#                    scale=0.5, 
#                    instance_mode=ColorMode.IMAGE_BW
#     )
#     out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
#     plt.figure(figsize=(16, 16))
#     plt.xticks([])
#     plt.yticks([])
#     plt.imshow(out.get_image()[:, :, ::-1])
#     plt.show()

In [None]:
for i in range(0, 3):
    im = cv2.imread(os.path.join(data_folder, f'{i:02}_image.JPG')) 
    
    outputs = predictor(im)  
    v = Visualizer(im[:, :, ::-1],
                   metadata=waste_metadata, 
                   scale=0.5, 
                   instance_mode=ColorMode.IMAGE_BW
    )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    plt.figure(figsize=(16, 16))
    plt.xticks([])
    plt.yticks([])
    plt.imshow(out.get_image()[:, :, ::-1])
    plt.show()

In [None]:
for i in tqdm(range(0, 3)):
    im = cv2.imread(os.path.join(data_folder, f'{i:02}_image.JPG'))  
    outputs = predictor(im)
    
    predicted_mask = np.zeros(im.shape[:-1], np.uint8)
    for c, m in zip(outputs['instances'].pred_classes.detach().cpu().numpy(), 
                    outputs['instances'].pred_masks.detach().cpu().numpy()):
        predicted_mask[m] = c+1
    
    expected_mask = np.zeros(im.shape[:-1], np.uint8)
    for class_id, class_name in enumerate(waste_metadata.thing_classes, start=1):
        addr = os.path.join(data_folder, f'{i:02}_{class_name}.png')
        if os.path.exists(addr):
            img = imread(addr)
            mask = (img.sum(-1) > 0)
            expected_mask[mask] = class_id
    fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(16,10))
    ax1.imshow(predicted_mask)
    ax2.imshow(expected_mask)
    plt.show()
    print(iou(predicted_mask, expected_mask, C=[1,2,3,4]))

In [None]:
lines = ['Type_Id,Mask\n']
for i in tqdm(range(0, 32)):
    im = cv2.imread(os.path.join(data_folder, f'{i:02}_image.JPG'))
    if im is not None:
        outputs = predictor(im)
        predicted_mask = np.zeros(im.shape[:-1], np.uint8)
        for c, m in zip(outputs['instances'].pred_classes.detach().cpu().numpy(), 
                        outputs['instances'].pred_masks.detach().cpu().numpy()):
            predicted_mask[m] = c+1
    for class_id, class_name in enumerate(waste_metadata.thing_classes, start=1):
        if im is not None:
            encoded = f'{class_name}_{i},' + rlencode_mask(predicted_mask == class_id) + f'\n'
        else:
            encoded = f'{class_name}_{i},' + '0 0' + f'\n'
        lines.append(encoded)

with open('baseline_solution.csv', 'w') as f:
    f.writelines(lines)