# Ищем мусор на побережье Камчатки по фоткам с дрона

Мы обучили бейзлайн-модель для поиска и сегментации мусора. Она была обучена на смеси датасета, который предоставлен вам как обучающий и на наших дополнительных данных.
Этот пример предназначен только для того, чтобы продемонстрировать как собрать данные для отправки, вы не обязаны использовать архитектуру/фреймворк/whatsoever. Более того, так как у вас нет наших весов и части функций, этот пример невозможно будет просто взять и завести на вашей машине, модель придется менять.

### Импорты, функции

In [3]:
import torch, torchvision

In [4]:
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

<Logger detectron2 (DEBUG)>

In [5]:
import numpy as np
import os, json, cv2, random, yaml

In [6]:
from tqdm.auto import tqdm
from matplotlib import pyplot as plt
%matplotlib inline

In [7]:
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.utils.visualizer import ColorMode
from detectron2.engine import default_setup

In [8]:
def setup(path_to_subconfig, load_from_checkpoint=None):
    """
    Create configs and perform basic setups.
    """

    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
    
    cfg.MODEL.BACKBONE.IN_CHANNELS = None
    cfg.MODEL.BACKBONE.NUM_CLASSES = None
    cfg.MODEL.BACKBONE.TYPE = None
    cfg.merge_from_file(path_to_subconfig)

    if load_from_checkpoint is not None:
        cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(load_from_checkpoint)

    default_setup(cfg, None)
    return cfg

In [9]:
from skimage.io import imread

In [10]:
def iou(preds, labels, C, EMPTY=1., ignore=None, per_image=False):
    """
    Array of IoU for each (non ignored) class
    """
    if not per_image:
        preds, labels = (preds,), (labels,)
    ious = []
    for pred, label in zip(preds, labels):
        iou = []
        ran = C if isinstance(C, list) else range(C)
        for i in ran:
            if i != ignore: # The ignored label is sometimes among predicted classes (ENet - CityScapes)
                intersection = ((label == i) & (pred == i)).sum()
                union = ((label == i) | ((pred == i) & (label != ignore))).sum()
                if not union:
                    iou.append(EMPTY)
                else:
                    iou.append(float(intersection) / float(union))
        ious.append(iou)
    ious = [np.mean(iou) for iou in zip(*ious)] # mean accross images if per_image
    return 100 * np.array(ious)

def get_iou(preds, labels, label_to_calculate=None):
    C = preds.shape[1]
    preds = torch.argmax(preds, 1)
    if label_to_calculate is not None:
        return iou(preds, labels[:, 0], [label_to_calculate,]).mean()
    else:
        return iou(preds, labels[:, 0], C)[1:].mean() # ignoiring background label.

### Конфигурируем модель и параметры датасета

In [11]:
cfg = setup('detectron_config.yaml', 'COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml')

[32m[12/05 10:54:01 detectron2]: [0mRank of current process: 0. World size: 1
[32m[12/05 10:54:01 detectron2]: [0mRank of current process: 0. World size: 1
[32m[12/05 10:54:02 detectron2]: [0mEnvironment info:
----------------------  ----------------------------------------------------------------------
sys.platform            linux
Python                  3.9.5 (default, Jun  4 2021, 12:28:51) [GCC 7.5.0]
numpy                   1.19.2
detectron2              0.6 @/home/miniconda3/lib/python3.9/site-packages/detectron2
Compiler                GCC 8.4
CUDA compiler           not available
DETECTRON2_ENV_MODULE   <not set>
PyTorch                 1.10.0+cu102 @/home/miniconda3/lib/python3.9/site-packages/torch
PyTorch debug build     False
GPU available           Yes
GPU 0,1                 Tesla V100S-PCIE-32GB (arch=7.0)
Driver version          470.57.02
CUDA_HOME               None - invalid!
Pillow                  8.4.0
torchvision             0.11.1+cu102 @/home/miniconda3/l

In [12]:
MetadataCatalog.get("waste").set(thing_classes=['metall', 'net', 'plastic', 'wood'])
waste_metadata = MetadataCatalog.get("waste")

Критично важная функция для отправки своих результатов: Run Length Encoding масок.

In [13]:
def rle(inarray):
    """ run length encoding. Partial credit to R rle function. 
        Multi datatype arrays catered for including non Numpy
        returns: tuple (runlengths, startpositions, values) """
    ia = np.asarray(inarray)                # force numpy
    n = len(ia)
    if n == 0: 
        return (None, None, None)
    else:
        y = ia[1:] != ia[:-1]               # pairwise unequal (string safe)
        i = np.append(np.where(y), n - 1)   # must include last element posi
        z = np.diff(np.append(-1, i))       # run lengths
        p = np.cumsum(np.append(0, z))[:-1] # positions
        return(z, p, ia[i])

def rlencode_mask(mask):
    l,s,v = rle(mask.flatten()) # length, starts, values
    l,s = l[v], s[v]
    encoded = ' '.join([' '.join(map(str, e)) for e in zip(s, l)])
    if not encoded:
        encoded = '0 0'
    return encoded

### Грузим веса, инициализируем инференс-класс

In [14]:
cfg.MODEL.WEIGHTS = "model_final.pth" # path to the model we just trained
# cfg.MODEL.WEIGHTS = "mymodel.pth" # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5   # set a custom testing threshold
cfg.MODEL.DEVICE = 'cuda:1'
predictor = DefaultPredictor(cfg)

[32m[12/05 10:54:08 fvcore.common.checkpoint]: [0m[Checkpointer] Loading from model_final.pth ...


In [49]:
predictor.model

GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res2): Sequential(
        (0): BottleneckBlock

In [None]:
GeneralizedRCNNWithTTA(cfg, model)

### Просматриваем, тестируем и формируем посылку
Ниже три секции, которые делают следующее:
1. Просто визуализирует результаты
2. Визуализирует на два фрейма предсказанную и истинную маски для отладки, показывает IoU скор по классам. (Он не равен Dice но они тесно связаны).
3. Формирует csv файл посылки.

Критично важной для вас является только третья ячейка, остальные оставлены здесь, для возможности визуальной оценки работы предложенного бейзлайна.

In [16]:
data_folder = './metall/'
# data_folder = './data/'

In [29]:
def convert(size, box):
    dw = 1./size[0]
    dh = 1./size[1]
    x = (box[0] + box[1])/2.0
    y = (box[2] + box[3])/2.0
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)

# im=Image.open(img_path)
# w= int(im.size[0])
# h= int(im.size[1])


# print(xmin, xmax, ymin, ymax) #define your x,y coordinates
# b = (xmin, xmax, ymin, ymax)
# bb = convert((3648,5472), b)

In [46]:
# from glob import glob

# for i in sorted(glob(data_folder+'/*JPG')):
#     print(i)
    im = cv2.imread(i) 
    print(im.shape)
    
    outputs = predictor(im)  
    v = Visualizer(im[:, :, ::-1],
                   metadata=waste_metadata, 
                   scale=0.5, 
                   instance_mode=ColorMode.IMAGE_BW
    )
    # print(outputs["instances"])
    # print(list(np.array(np.array(outputs["instances"].pred_boxes)[0].to('cpu'))))
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
#     # plt.figure(figsize=(16, 16))
#     # plt.xticks([])
#     # plt.yticks([])
#     # plt.imshow(out.get_image()[:, :, ::-1])
#     # plt.show()
#     with open(i.replace('JPG', 'txt')) as f:
#         data = f.read()
#     with open(i.replace('JPG', 'txt'), 'a') as f:
#         for class_num, bbox in zip(list(np.array(outputs["instances"].pred_classes.to('cpu'))), 
#                                    np.array(outputs["instances"].pred_boxes)):
#             # print(bbox[1], bbox[3])
#             yolo_coords = [str(coord) for coord in convert((5472,3648), [bbox[0].to('cpu').item(), bbox[2].to('cpu').item(), 
#                                                    bbox[1].to('cpu').item(), bbox[3].to('cpu').item()])]
#             print(str(class_num) +' '+ ' '.join(yolo_coords)+'\n')
            
#             f.write(str(class_num) +' '+ ' '.join(yolo_coords)+'\n')
    

./metall/21_image.JPG
(3648, 5472, 3)
3 0.9190564964249817 0.035097488185815644 0.0072983412714729525 0.018663895757574784

./metall/22_image.JPG
(3648, 5472, 3)
3 0.4722965084321317 0.6386618697852419 0.027770906861065422 0.0173170524731017

3 0.20254925398798712 0.149352981333147 0.04919905411569696 0.026382370998984887

3 0.3238252450151053 0.6170890875029982 0.011230892605251735 0.03965177034076891

3 0.31001540513066517 0.7386626862643058 0.008367214983666849 0.03484284250359786

3 0.2215366586607102 0.2860223703217088 0.024317758125171327 0.016174349868506716

3 0.28480977063987684 0.20690393447875977 0.020614780180635506 0.015234445270739102

3 0.3261647252311483 0.6496932715700383 0.02361906620494106 0.017276027746367873

2 0.5465155484383566 0.44048148707339635 0.007309272275333516 0.011827836956894188

3 0.03195318980523717 0.9765766545345909 0.03893834945054082 0.04004943579958196

3 0.5729184959367004 0.35830226697419815 0.03541431092379386 0.019550992731462442

3 0.2532137

In [47]:
!cp -r ./metall/*JPG ./yolo/train/images
!cp -r ./metall/*txt ./yolo/train/labels

In [18]:
for i in range(0, 3):
    im = cv2.imread(os.path.join(data_folder, f'{i:02}_image.JPG')) 
    
    outputs = predictor(im)  
    v = Visualizer(im[:, :, ::-1],
                   metadata=waste_metadata, 
                   scale=0.5, 
                   instance_mode=ColorMode.IMAGE_BW
    )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    plt.figure(figsize=(16, 16))
    plt.xticks([])
    plt.yticks([])
    plt.imshow(out.get_image()[:, :, ::-1])
    plt.show()

AttributeError: 'NoneType' object has no attribute 'shape'

In [None]:
# for i in tqdm(range(0, 3)):
#     im = cv2.imread(os.path.join(data_folder, f'{i:02}_image.JPG'))  
#     outputs = predictor(im)
    
#     predicted_mask = np.zeros(im.shape[:-1], np.uint8)
#     for c, m in zip(outputs['instances'].pred_classes.detach().cpu().numpy(), 
#                     outputs['instances'].pred_masks.detach().cpu().numpy()):
#         predicted_mask[m] = c+1
    
#     expected_mask = np.zeros(im.shape[:-1], np.uint8)
#     for class_id, class_name in enumerate(waste_metadata.thing_classes, start=1):
#         addr = os.path.join(data_folder, f'{i:02}_{class_name}.png')
#         if os.path.exists(addr):
#             img = imread(addr)
#             mask = (img.sum(-1) > 0)
#             expected_mask[mask] = class_id
#     fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(16,10))
#     ax1.imshow(predicted_mask)
#     ax2.imshow(expected_mask)
#     plt.show()
#     print(iou(predicted_mask, expected_mask, C=[1,2,3,4]))

In [18]:
# lines = ['Type_Id,Mask\n']
# for i in tqdm(range(0, 32)):
#     im = cv2.imread(os.path.join(data_folder, f'{i:02}_image.JPG'))
#     if im is not None:
#         outputs = predictor(im)
#         predicted_mask = np.zeros(im.shape[:-1], np.uint8)
#         for c, m in zip(outputs['instances'].pred_classes.detach().cpu().numpy(), 
#                         outputs['instances'].pred_masks.detach().cpu().numpy()):
#             predicted_mask[m] = c+1
#     for class_id, class_name in enumerate(waste_metadata.thing_classes, start=1):
#         if im is not None:
#             encoded = f'{class_name}_{i},' + rlencode_mask(predicted_mask == class_id) + f'\n'
#         else:
#             encoded = f'{class_name}_{i},' + '0 0' + f'\n'
#         lines.append(encoded)

# with open('baseline_solution.csv', 'w') as f:
#     f.writelines(lines)

  0%|          | 0/32 [00:00<?, ?it/s]