# Install required packages

In [0]:
!pip install mmcv
!git clone https://github.com/open-mmlab/mmdetection.git
%cd mmdetection/
!pip install -v -e .
# !pip uninstall terminaltables
# !pip install terminaltables
# !pip uninstall imagecorruptions
# !pip install imagecorruptions

# Download pretrained weights, input data and table

In [0]:
# download submission file, class table
!rm -r /content/submission
!mkdir /content/submission
!wget -O /content/submission/challenge-2019-classes-description-segmentable.csv --no-check-certificate "https://drive.google.com/uc?export=download&id=1z65gsTMJrSDAv_UMkTpphwfcRixn-dUr"
!wget -O /content/submission/sample_empty_submission.csv --no-check-certificate "https://drive.google.com/uc?export=download&id=1n_xfLkbX2a1oQiYl9b3agpjdh5kzs7b4"

# download pretrained model
!rm -rf /content/model
!mkdir /content/model
# # !wget -O /content/model/htc_r101_fpn_20e.pth https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r101_fpn_20e_20190408-a2e586db.pth    
# # !wget -O /content/model/htc_r101_fpn_20e.pth https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_x101_64x4d_fpn_20e_20190408-497f2561.pth
# # !wget -O /content/model/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.pth https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e_20190408-0e50669c.pth
# !wget -O /content/model/htc_x101_32x4d_fpn_20e_16gpu.pth "https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_x101_32x4d_fpn_20e_20190408-9eae4d0b.pth"
!wget -O /content/model/htc_r50_fpn_1x.pth "https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/htc/htc_r50_fpn_1x_20190408-878c1712.pth"

# download test dataset
!rm -rf /content/data
!mkdir /content/data
!mkdir /content/data/test
!wget -O /content/data/test.zip https://datasets.figure-eight.com/figure_eight_datasets/open-images/test_challenge.zip
!unzip -qq /content/data/test.zip -d /content/data/test
!rm /content/data/test.zip

In [0]:
# mount gdrive to dolab
from google.colab import drive
drive.mount('/content/drive')

# Import model

In [0]:
# import required libraries
%matplotlib inline
import os, glob, time
import numpy as np
import pandas as pd
import mmcv
from mmdet.apis import init_detector, inference_detector, show_result
import base64
from pycocotools import _mask as coco_mask
import pycocotools.mask as maskUtils
import typing as t
import zlib
import torch
from matplotlib import pyplot as plt
from functools import reduce
from tqdm import tqdm_notebook 

In [0]:
# read images path
pic_list = glob.glob("/content/data/test/challenge2018/*.*")
file_names = [i.split('/')[-1][:-4] for i in pic_list]
print("[Test Image]: ",len(pic_list))

[Test Image]:  99999


In [0]:
def encode_binary_mask(mask: np.ndarray) -> t.Text:
# Converts a binary mask into OID challenge encoding ascii text.

  # check input mask --
  if mask.dtype != np.bool:
    raise ValueError(
       "encode_binary_mask expects a binary mask, received dtype == %s" %
       mask.dtype)

  mask = np.squeeze(mask)
  if len(mask.shape) != 2:
    raise ValueError(
       "encode_binary_mask expects a 2d mask, received shape == %s" %
       mask.shape)

  # convert input mask to expected COCO API input --
  mask_to_encode = mask.reshape(mask.shape[0], mask.shape[1], 1)
  mask_to_encode = mask_to_encode.astype(np.uint8)
  mask_to_encode = np.asfortranarray(mask_to_encode)

  # RLE encode mask --
  encoded_mask = coco_mask.encode(mask_to_encode)[0]["counts"]

  # compress and base64 encoding --
  binary_str = zlib.compress(encoded_mask, zlib.Z_BEST_COMPRESSION)
  base64_str = base64.b64encode(binary_str)
  
  return base64_str

In [0]:
# loading prediction model
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# config_file = '/content/mmdetection/configs/htc/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.py'
# config_file = '/content/mmdetection/configs/htc/htc_x101_32x4d_fpn_20e_16gpu.py'
config_file = '/content/mmdetection/configs/htc/htc_r50_fpn_1x.py'

# checkpoint_file = '/content/model/htc_dconv_c3-c5_mstrain_400_1400_x101_64x4d_fpn_20e.pth'
# checkpoint_file = '/content/model/htc_x101_32x4d_fpn_20e_16gpu.pth'
checkpoint_file = '/content/model/htc_r50_fpn_1x.pth'

model = init_detector(config_file, checkpoint_file, device=device)

In [0]:
class_lookup_df = pd.read_csv("/content/submission/challenge-2019-classes-description-segmentable.csv", header=None)
empty_submission_df = pd.read_csv("/content/submission/sample_empty_submission.csv")

# TODO: we have to convert coco classes to this competition's classes (COCO: 80, competition: 300)
class_lookup_df.columns = ["encoded_label","label"]
class_lookup_df['label'] = class_lookup_df['label'].str.lower()
class_lookup_df['label'] = class_lookup_df['label'].str.replace(' ','_')

In [0]:
print("Target classes [%d]"%len(class_lookup_df['label'].values))
print(np.array(class_lookup_df['label'].values))

Target classes [300]
['screwdriver' 'light_switch' 'doughnut' 'toilet_paper' 'wrench' 'toaster'
 'tennis_ball' 'radish' 'pomegranate' 'kite' 'table_tennis_racket'
 'hamster' 'barge' 'shower' 'printer' 'snowmobile' 'fire_hydrant'
 'limousine' 'whale' 'microwave_oven' 'asparagus' 'lion' 'spatula' 'torch'
 'volleyball' 'ambulance' 'chopsticks' 'raccoon' 'blue_jay' 'lynx' 'dice'
 'filing_cabinet' 'ruler' 'power_plugs_and_sockets' 'bell_pepper'
 'binoculars' 'pretzel' 'hot_dog' 'missile' 'common_fig' 'croissant'
 'adhesive_tape' 'slow_cooker' 'dog_bed' 'harpsichord' 'billiard_table'
 'alpaca' 'harbor_seal' 'grape' 'nail' 'paper_towel' 'alarm_clock'
 'guacamole' 'starfish' 'zebra' 'segway' 'sea_turtle' 'scissors'
 'rhinoceros' 'kangaroo' 'jaguar' 'leopard' 'dumbbell' 'envelope'
 'winter_melon' 'teapot' 'camel' 'beaker' 'brown_bear' 'toilet'
 'teddy_bear' 'briefcase' 'stop_sign' 'tiger' 'cabbage' 'giraffe'
 'polar_bear' 'shark' 'rabbit' 'swim_cap' 'pressure_cooker'
 'kitchen_knife' 'submarine

In [0]:
print("HTC Pretrained model Classes [%d]"%len(model.CLASSES))
print(np.array(sorted(model.CLASSES)))

HTC Pretrained model Classes [80]
['airplane' 'apple' 'backpack' 'banana' 'baseball_bat' 'baseball_glove'
 'bear' 'bed' 'bench' 'bicycle' 'bird' 'boat' 'book' 'bottle' 'bowl'
 'broccoli' 'bus' 'cake' 'car' 'carrot' 'cat' 'cell_phone' 'chair' 'clock'
 'couch' 'cow' 'cup' 'dining_table' 'dog' 'donut' 'elephant'
 'fire_hydrant' 'fork' 'frisbee' 'giraffe' 'hair_drier' 'handbag' 'horse'
 'hot_dog' 'keyboard' 'kite' 'knife' 'laptop' 'microwave' 'motorcycle'
 'mouse' 'orange' 'oven' 'parking_meter' 'person' 'pizza' 'potted_plant'
 'refrigerator' 'remote' 'sandwich' 'scissors' 'sheep' 'sink' 'skateboard'
 'skis' 'snowboard' 'spoon' 'sports_ball' 'stop_sign' 'suitcase'
 'surfboard' 'teddy_bear' 'tennis_racket' 'tie' 'toaster' 'toilet'
 'toothbrush' 'traffic_light' 'train' 'truck' 'tv' 'umbrella' 'vase'
 'wine_glass' 'zebra']


In [0]:
ImageID_list = []
ImageWidth_list = []
ImageHeight_list = []
PredictionString_list = []
score_thr = 0.3

IMAGE_DIR = '/content/data/test/challenge2018/'
submission = empty_submission_df.copy()
# for i in range(1):
#   img_path = pic_list[i]
#   img = mmcv.imread(img_path)
#   imageID = reduce(lambda x,y:x+y, img_path.split('/')[-1].split('.')[:-1])
# for table_index, imageID in tqdm(enumerate(empty_submission_df['ImageID'].values)):
for table_index, imageID in enumerate(empty_submission_df['ImageID'].values):
#   if table_index == 5:
#     break
  img_path = "%s%s.jpg"%(IMAGE_DIR,imageID)
  start = time.time()
  result = inference_detector(model, img_path)
  end = time.time()
  prediction_string = ""
#   print("[ID: %s]"%imageID)
  if isinstance(result, tuple):
    bbox_result, segm_result = result
  else:
    bbox_result, segm_result = result, None
#   show_result(img_path, result, model.CLASSES, show=False, out_file='/content/output_%s.jpg'%imageID) # save image
  bboxes = np.vstack(bbox_result)

  if segm_result is not None :
#     print(len(segm_result))
    segms = mmcv.concat_list(segm_result)
    inds = np.where(np.array(bboxes)[:, -1] > score_thr)[0]
    labels = [
                  np.full(bbox.shape[0], i, dtype=np.int32)
                  for i, bbox in enumerate(bbox_result)
              ]
    labels = np.concatenate(labels)
    for i,seg in enumerate(segms):
      if len(segms[i]) > 0:
        height,width = segms[i]['size']
        break
  else:
    inds = np.empty((0,),dtype=int64)

  if np.where(np.array(bboxes)[:, -1] > 1)[0].shape[0] == 0:
    height,width = -1, -1
#   print("[%d] objects detected"%len(inds))
  
  for i in inds:
    labelname = model.CLASSES[labels[i]]
    if class_lookup_df[class_lookup_df["label"] == labelname]["encoded_label"].empty: # mismatch between coco label and Open Image label
#       print("%s is not in Open Image"%labelname)
      continue
    encoded_label = class_lookup_df[class_lookup_df["label"] == labelname]["encoded_label"].item()
    confidence = bboxes[i][-1]
    mask = maskUtils.decode(segms[i]).astype(np.bool)
    encoded_mask = encode_binary_mask(mask)
    
    prediction_string += encoded_label + " "
    prediction_string += str(confidence) + " "
    prediction_string += encoded_mask.decode() + " "
  submission.loc[table_index,'ImageWidth'] = width
  submission.loc[table_index,'ImageHeight'] = height
  submission.loc[table_index,'PredictionString'] = prediction_string
  print("[%d] %.4f  %.4f"%(table_index,end-start,time.time()-start))
  if table_index % 500 == 0:
#     submission.to_csv("/content/submission_%d.csv"%(table_index),index=False,sep=',')
  submission.to_csv("/content/drive/submission/submission_%d.csv"%(table_index),index=False,sep=',')
#   print(prediction_string)
submission.to_csv("/content/submission.csv",index=False,sep=',')

    


[0] 1.2040  1.2212
[1] 1.2234  1.2471
[2] 0.6856  0.6956
[3] 1.1914  1.2096
[4] 1.2225  1.2468
[5] 0.7301  0.7470
[6] 1.1457  1.1598
[7] 1.1530  1.1643
[8] 1.1826  1.2016
[9] 0.9670  0.9774
[10] 1.1289  1.1426
[11] 0.9728  0.9871
[12] 1.1438  1.1599
