In [None]:
import numpy as np 
import pandas as pd

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

## Install required packages

In [None]:
!pip install numpy==1.17.0
!pip install tensorflow==1.15.2
!pip install keras==2.1.0

In [None]:
!git clone https://www.github.com/matterport/Mask_RCNN.git

In [None]:
!ls

In [None]:
os.chdir("Mask_RCNN/")

In [None]:
!pip install -r requirements.txt

In [None]:
!python setup.py -q install

In [None]:
!pip uninstall pycocotools -y
!pip install -q git+https://github.com/waleedka/coco.git#subdirectory=PythonAPI

## Import packages

In [None]:
ROOT_DIR = ""

In [None]:
import sys
sys.path.append(os.path.join(".", "Mask_RCNN"))
sys.path.append(ROOT_DIR)
import re
import random
import pandas as pd
import numpy as np
import mrcnn.model as modellib
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.lines as lines
import matplotlib
import math
import logging
import json
import itertools
import glob
import cv2
from tqdm import tqdm
from pycocotools.cocoeval import COCOeval
from pycocotools.coco import COCO
from pycocotools import mask as maskUtils
from mrcnn.model import log
from mrcnn.config import Config
from mrcnn import visualize
from mrcnn import utils
from collections import Counter, defaultdict

In [None]:
ROOT_DIR = os.path.abspath(".")
ROOT_DIR

## Constant variables

In [None]:
DATA_TRAIN_DIR = "/kaggle/input/food-recognition/train/"

In [None]:
DATA_VAL_DIR = "/kaggle/input/food-recognition/val/"

In [None]:
DATA_DIR = "/kaggle/input/food-recognition/"

## Defining DatasetClass and Config

In [None]:
class FoodChallengeDataset(utils.Dataset):
    def load_dataset(self, dataset_dir, load_small=False, return_coco=True):
        self.load_small = load_small
        
        if self.load_small:
            self.annotation_path = os.path.join(dataset_dir, "annotations-small.json")
        else:
            self.annotation_path = os.path.join(dataset_dir, "annotations.json")
        
        image_dir = os.path.join(dataset_dir, "images")
        print("Annotation path", self.annotation_path)
        print("Image Dir", image_dir)
        
        assert os.path.exists(self.annotation_path) and os.path.exists(image_dir)
        
        self.coco = COCO(self.annotation_path)
        self.image_dir = image_dir
        
        class_ids = self.coco.getCatIds()
        image_ids = list(self.coco.imgs.keys())
        
        #Register classes
        for _class_id in class_ids:
            self.add_class("crowdai_food_challenge", _class_id, self.coco.loadCats(_class_id)[0]["name"])
            
        #Register images
        for _img_id in image_ids:
            assert os.path.exists(os.path.join(self.image_dir, self.coco.imgs[_img_id]["file_name"]))
            self.add_image(
                "crowdai_food_challenge", image_id=_img_id, 
                path=os.path.join(self.image_dir, self.coco.imgs[_img_id]["file_name"]),
                width=self.coco.imgs[_img_id]["width"],
                height=self.coco.imgs[_img_id]["height"],
                annotations=self.coco.loadAnns(self.coco.getAnnIds(
                                                                    imgIds=_img_id,
                                                                    catIds=class_ids,
                                                                    iscrowd=None
                                                                   )
                                              )
            )
        
        if return_coco:
            return self.coco
        
    def load_mask(self, image_id):
        image_infor = self.image_info[image_id]
        print(image_info["source"])
        assert image_info["source"] == "crowdai_food_challenge"
        
        instance_masks = []
        class_ids = []
        annotations = self.image_info[image_id]["annotations"]
        
        for annotation in annotations:
            class_id = self.map_source_class_id("crowdai_food_challenge.{}".format(annotation["category_id"]))
            
            if class_id:
                m = self.annToMask(annotation, image_infor["height"], image_infor["width"])
                
                if m.max() < 1:
                    continue
                    
                instance_masks.append(m)
                class_ids.append(class_id)
        
        if class_ids:
            mask = np.stack(instance_masks, axis=2)
            class_ids = np.array(class_ids, dtype=np.uint32)
            return mask, class_ids
        else:
            return super(FoodChallengeDataset, self).load_mask(image_id)
        
    def image_reference(self, image_id):
        return "crowai-food-challenge::{}".format(image_id)
    
    def annToRLE(self, ann, height, width):
        segm = ann["segmentation"]
        
        if isinstance(segm, list):
            rles = maskUtils.frPyObjects(segm, height, width)
            rle = maskUtils.merge(rles)
        elif isinstance(segm["count"], list):
            rle = maskUtils.frPyObjects(segm, height, width)
        else:
            rle = ann['segmentation']
            
        return rle
    
    def annToMask(self, ann, height, width):
        rle = self.annToRLE(ann, height, width)
        m = maskUtils.decode(rle)
        return m

In [None]:
dataset_train = FoodChallengeDataset()

In [None]:
dataset_train.load_dataset(dataset_dir=DATA_TRAIN_DIR, load_small=False)
dataset_train.prepare()

In [None]:
dataset_val = FoodChallengeDataset()
dataset_val.load_dataset(dataset_dir=DATA_VAL_DIR, load_small=False, return_coco=True)
dataset_val.prepare()

In [None]:
class FoodChallengeConfig(Config):
    NAME = "crowai-food-challenge"
    IMAGES_PER_GPU = 2
    GPU_COUNT = 1
    BACKBONE = 'resnet50'
    NUM_CLASSES = 62 # n_classes + background
    STEPS_PER_EPOCH = len(dataset_train.image_ids) // 2
    VALIDATION_STEPS = len(dataset_val.image_ids) // 2
    LEARNING_RATE = 0.001
    IMAGE_MAX_DIM = 256
    IMAGE_MIN_DIM = 256

In [None]:
config = FoodChallengeConfig()

In [None]:
config.display()

## Data Augment

In [None]:
import imgaug as ia
from imgaug import augmenters as iaa

In [None]:
DATA_AUG_SEQUENCE = None
DATA_AUG_NAME_LOADED = None

In [None]:
def load_aug_geometric():
    return iaa.OneOf([
        iaa.Sequential([iaa.Fliplr(0.5), iaa.Flipud(0.2)]),
        iaa.Crop(percent=(0.0, 0.1)),
        iaa.Crop(percent=(0.1, 0.3)),
        iaa.Crop(percent=(0.3, 0.5)),
        iaa.CropAndPad(percent=(-0.05, 0.1),
                       pad_cval=(0, 255),
                       pad_mode='constant'),
        iaa.Sequential([
            iaa.Affine(
                scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
                translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
                rotate=(-45, 45),
                shear=(-16, 16),
                order=[0,1],
                mode="constant",
                cval=(0, 255)
            ),
            iaa.Sometimes(0.3, iaa.Crop(percent=(0.3, 0.5)))
        ])
    ])

In [None]:
def load_aug_non_geometric():
    return iaa.Sequential([
        iaa.Sometimes(0.3, iaa.Multiply((0.5, 1.5), per_channel=0.5)),
        iaa.Sometimes(0.3, iaa.GaussianBlur(sigma=(0, 3.0))),
        iaa.Sometimes(0.2, iaa.Grayscale(alpha=(0, 1.0))),
        iaa.Sometimes(0.3, iaa.Emboss(alpha=(0, 1.0), strength=(0, 2.0)))
    ])

In [None]:
def load_aug_all():
    return iaa.Sequential([
        iaa.Sometimes(0.5, load_aug_geometric()),
        iaa.Sometimes(0.3, load_aug_non_geometric())
    ])

In [None]:
def load_aug_all2():
    def sometime(aug):
        return iaa.Sometimes(0.5, aug)
    
    return iaa.Sequential([
        iaa.Sequential([iaa.Fliplr(0.5), iaa.Flipud(0.2)]),
        iaa.Crop(percent=(0.0, 0.1)),
        iaa.Crop(percent=(0.1, 0.3)),
        iaa.Crop(percent=(0.3, 0.5)),
        iaa.CropAndPad(percent=(-0.05, 0.1),
                       pad_cval=(0, 255),
                       pad_mode='constant'),
        iaa.Sequential([
            iaa.Affine(
                scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
                translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
                rotate=(-45, 45),
                shear=(-16, 16),
                order=[0,1],
                mode="constant",
                cval=(0, 255)
            ),
            iaa.Sometimes(0.3, iaa.Crop(percent=(0.3, 0.5)))
        ]),
        iaa.SomeOf((0, 5), [
            sometime(iaa.Superpixels(p_replace=(0, 1.0), n_segments=(20, 200))),
            iaa.OneOf([
                iaa.GaussianBlur(sigma=(0, 3.0)),
                iaa.AverageBlur(k=(2, 7)),
                iaa.MedianBlur(k=(3, 11))
            ]),
            iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)),
            iaa.Emboss(alpha=(0, 1.0), strength=(0, 2.0)),
            iaa.SimplexNoiseAlpha(iaa.OneOf([
               iaa.EdgeDetect(alpha=(0.5, 1.0)),
               iaa.DirectedEdgeDetect(direction=(0.0, 1.0))
            ])),
            iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5),
            iaa.OneOf([
                iaa.Dropout((0.01, 0.1), per_channel=0.5),
                iaa.CoarseDropout((0.03, 0.15), size_percent=(
                    0.02, 0.05), per_channel=0.2),
            ]),
            iaa.Invert(0.05, per_channel=True),
            iaa.Add((-10, 10), per_channel=0.5),
            iaa.AddToHueAndSaturation((-20, 20)),
            iaa.OneOf([
                iaa.Multiply(
                            (0.5, 1.5), per_channel=0.5),
                iaa.FrequencyNoiseAlpha(
                    exponent=(-4, 0),
                    first=iaa.Multiply(
                        (0.5, 1.5), per_channel=True),
                    second=iaa.ContrastNormalization(
                        (0.5, 2.0))
                )
            ]),
            iaa.ContrastNormalization((0.5, 2.0), per_channel=0.5),
            iaa.Grayscale(alpha=(0.0, 1.0)),
            sometime(iaa.ElasticTransformation(alpha=(0.5, 3.5), sigma=0.25)),
            sometime(iaa.PiecewiseAffine(scale=(0.01, 0.05))),
            sometime(iaa.PerspectiveTransform(scale=(0.01, 0.1)))
        ], random_order=True)
    ], random_order=True)

In [None]:
list_aug_support = {
    "aug_all": load_aug_all,
    "aug_geo": load_aug_geometric,
    "aug_non_geo": load_aug_non_geometric,
    "aug_all_2": load_aug_all2,
}

In [None]:
def load_aug(aug_name="aug_all_2"):
    global DATA_AUG_NAME_LOADED
    
    if DATA_AUG_NAME_LOADED is None:
        DATA_AUG_SEQUENCE = list_aug_support[aug_name]()
        DATA_AUG_NAME_LOADED = aug_name
        
    return DATA_AUG_SEQUENCE

## Dataset Exploration

In [None]:
from collections import Counter

class_counts = Counter()

In [None]:
for image_info in dataset_train.image_info:
    ann = image_info["annotations"]
    
    for i in ann:
        class_counts[i["category_id"]] += 1
    
class_mapping = {i["id"]: i["name"] for i in dataset_train.class_info}

class_counts = pd.DataFrame(class_counts.most_common(), columns=["class", "count"])
class_counts["class"] = class_counts["class"].apply(lambda x: class_mapping[x])
plt.figure(figsize=(12, 12))
plt.barh(class_counts['class'], class_counts['count'])
plt.title('Counts of classes of objects');

In [None]:
print(f'We have {class_counts.shape[0]} classes!')

In [None]:
image_id = random.choice(dataset_train.image_ids)
image = dataset_train.load_image(image_id)
plt.imshow(image)
mask, class_ids = dataset_train.load_mask(image_id)
bbox = utils.extract_bboxes(mask)

print("image", image_id, dataset_train.image_reference(image_id))
log("mask", mask)
log("class_ids", class_ids)
log("image", image)
log("bbox", bbox)
visualize.display_instances(image, bbox, mask, class_ids, dataset_train.class_names, figsize=(12, 12))

In [None]:
dataset_train.image_info[0]

In [None]:
class_images = defaultdict(list)

for ind, image_info in enumerate(dataset_train.image_info):
    ann = image_info["annotations"]
    
    for i in ann:
        class_images[i['category_id']].append(ind)

In [None]:
image_ids = np.random.choice(dataset_train.image_ids, 4)

for class_id in np.random.choice(list(class_images.keys()), 10):
    image_id = np.random.choice(class_images[class_id], 1)[0]
    image = dataset_train.load_image(image_id)
    mask, class_ids = dataset_train.load_mask(image_id)
    visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names)

## Bbox

In [None]:
for idx, class_id in enumerate(np.random.choice(list(class_images.keys()), 10)):
    image_id = np.random.choice(list(class_images[class_id]), 1)[0]
    image = dataset_train.load_image(image_id)
    mask, class_ids = dataset_train.load_mask(image_id)
    bbox = utils.extract_bboxes(mask)
    visualize.display_instances(image, bbox, mask, class_ids, dataset_train.class_names, figsize=(12, 12))

## Anchors

In [None]:
backbone_shape = modellib.compute_backbone_shapes(config, config.IMAGE_SHAPE)
anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
                                         config.RPN_ANCHOR_RATIOS,
                                         backbone_shape,
                                         config.BACKBONE_STRIDES,
                                         config.RPN_ANCHOR_STRIDE)

In [None]:
num_levels = len(backbone_shape)
anchors_per_cell = len(config.RPN_ANCHOR_RATIOS)

In [None]:
print("levels", num_levels)
print("scales", config.RPN_ANCHOR_SCALES)
print("ratio", config.RPN_ANCHOR_RATIOS)
print("anchors per cell", anchors_per_cell)
print("number anchors", anchors.shape[0])

In [None]:
anchors_per_level = []

for l in range(num_levels):
    num_cells = backbone_shape[l][0] * backbone_shape[l][1]
    anchors_per_level.append(anchors_per_cell * num_cells // config.RPN_ANCHOR_STRIDE**2)
    
    print("Anchors per level ", l, anchors_per_level[l])

In [None]:
image_id = np.random.choice(dataset_train.image_ids, 1)[0]
image, image_meta, _, _, _ = modellib.load_image_gt(dataset_train, config, image_id)
fig, ax = plt.subplots(1, figsize=(10, 10))
ax.imshow(image)

levels = len(backbone_shape)

for level in range(levels):
    colors = visualize.random_colors(levels)
    level_start = sum(anchors_per_level[:level])
    level_anchors = anchors[level_start:level_start+anchors_per_level[level]]
    print("Level {}. Anchors {:6} Feature map shape: {}".format(level, level_anchors.shape[0], backbone_shape[level]))
    
    center_cell = backbone_shape[level] // 2
    center_cell_index = (center_cell[0] * backbone_shape[level][1] + center_cell[1])
    
    level_center = center_cell_index * anchors_per_cell 
    center_anchor = anchors_per_cell * (
        (center_cell[0] * backbone_shape[level][1] / config.RPN_ANCHOR_STRIDE**2) \
        + center_cell[1] / config.RPN_ANCHOR_STRIDE)
    level_center = int(center_anchor)
    
    for i, rect in enumerate(level_anchors[level_center:level_center+anchors_per_cell]):
        y1, x1, y2, x2 = rect
        p = patches.Rectangle((x1, y1), x2-x1, y2-y1, linewidth=2, facecolor='none',
                              edgecolor=(i+1)*np.array(colors[level]) / anchors_per_cell)
        ax.add_patch(p)

## ROI

In [None]:
# random_rois = 2000
# dataset.class_ids.astype("int")

# g = modellib.data_generator(
#         dataset, config, shuffle=True, random_rois=random_rois, 
#         batch_size=4,
#         detection_targets=True
#     )

# # Get Next Image
# if random_rois:
#     [normalized_images, image_meta, rpn_match, rpn_bbox, gt_class_ids, gt_boxes, gt_masks, rpn_rois, rois], \
#     [mrcnn_class_ids, mrcnn_bbox, mrcnn_mask] = next(g)
# else:
#     [normalized_images, image_meta, rpn_match, rpn_bbox, gt_boxes, gt_masks], _ = next(g)
    
# image_id = modellib.parse_image_meta(image_meta)["image_id"][0]

# mrcnn_class_ids = mrcnn_class_ids[:,:,0]

# b = 0

# # Restore original image (reverse normalization)
# sample_image = modellib.unmold_image(normalized_images[b], config)

# # Compute anchor shifts.
# indices = np.where(rpn_match[b] == 1)[0]
# refined_anchors = utils.apply_box_deltas(anchors[indices], rpn_bbox[b, :len(indices)] * config.RPN_BBOX_STD_DEV)

# # Get list of positive anchors
# positive_anchor_ids = np.where(rpn_match[b] == 1)[0]
# negative_anchor_ids = np.where(rpn_match[b] == -1)[0]
# neutral_anchor_ids = np.where(rpn_match[b] == 0)[0]

# # ROI breakdown by class
# for c, n in zip(dataset.class_names, np.bincount(mrcnn_class_ids[b].flatten())):
#     if n:
#         print("{:23}: {}".format(c[:20], n))

# # Show positive anchors
# visualize.draw_boxes(sample_image, boxes=anchors[positive_anchor_ids], 
#                      refined_boxes=refined_anchors)

In [None]:
# if random_rois:
#     # Class aware bboxes
#     bbox_specific = mrcnn_bbox[b, np.arange(mrcnn_bbox.shape[1]), mrcnn_class_ids[b], :]

#     # Refined ROIs
#     refined_rois = utils.apply_box_deltas(rois[b].astype(np.float32), bbox_specific[:,:4] * config.BBOX_STD_DEV)

#     # Class aware masks
#     mask_specific = mrcnn_mask[b, np.arange(mrcnn_mask.shape[1]), :, :, mrcnn_class_ids[b]]

#     visualize.draw_rois(sample_image, rois[b], refined_rois, mask_specific, mrcnn_class_ids[b], dataset.class_names)
    
#     # Any repeated ROIs?
#     rows = np.ascontiguousarray(rois[b]).view(np.dtype((np.void, rois.dtype.itemsize * rois.shape[-1])))
#     _, idx = np.unique(rows, return_index=True)
    
#     print("Unique ROIs: {} out of {}".format(len(idx), rois.shape[1]))

## Modeling with MaskRCNN

In [None]:
!mkdir pretrained

In [None]:
PRETRAINED_MODEL_PATH = os.path.join("pretrained", "mask_rcnn_coco.h5")
LOGS_DIRECTORY = os.path.join(ROOT_DIR, "logs")

if not os.path.exists(PRETRAINED_MODEL_PATH):
    utils.download_trained_weights(PRETRAINED_MODEL_PATH)

In [None]:
import keras.backend as K

In [None]:
if K.backend() == "tensorflow":
    K.common.image_dim_ordering()

In [None]:
model = modellib.MaskRCNN(mode="training", config=config, model_dir=LOGS_DIRECTORY)
model_path = PRETRAINED_MODEL_PATH
model.load_weights(model_path, by_name=True, exclude=[
    "mrcnn_class_logits", "mrcnn_bbox_fc", "mrcnn_bbox", "mrcnn_mask"
])

In [None]:
# class_names = dataset_train.class_names
# assert len(class_names)==62, "Please check DatasetConfig"
# dataset_train.class_names

In [None]:
from keras.callbacks import ModelCheckpoint

file_path = "logs/best_weight.hdf5"

checkpoint = ModelCheckpoint(filepath=file_path, mode="min", verbose=1, monitor="val_acc", save_best_only=True)

In [None]:
config.LEARNING_RATE

In [None]:
from keras.callbacks import LearningRateScheduler

In [None]:
def lr_schedule(epoch):
    initial_lr = config.LEARNING_RATE
    drop_every = 10
    factor = 0.5
    
    lr = initial_lr * factor**(np.floor((1 + epoch)/drop_every))
    
    return lr

In [None]:
print("Training network")

model.train(dataset_train, dataset_val,
            learning_rate=config.LEARNING_RATE,
            epochs=45,
            layers='heads', custom_callbacks=[checkpoint, LearningRateScheduler(lr_schedule)],
            augmentation=load_aug())

In [None]:
!rm logs/crowai-food-challenge20200427T0506/mask_rcnn_crowai-food-challenge_0039.h5

In [None]:
model.epoch = 45

##  Prediction

In [None]:
class InferenceConfig(FoodChallengeConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    NUM_CLASSES = 62
    IMAGE_MAX_DIM = 256
    IMAGE_MIN_DIM = 256
    NAME = "Food"
    DETECTION_MIN_CONFIDENCE = 0

In [None]:
inference_config = InferenceConfig()

In [None]:
inference_config.display()

In [None]:
model_path = 