In [None]:
# # https://github.com/maxw1489/Mask_RCNN (tensorflow 2.9.1)

In [None]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" # https://stackoverflow.com/questions/35911252/disable-tensorflow-debugging-information


import tensorflow as tf
from tensorflow.python.client import device_lib

print(tf.__version__)
print("Num GPUs Available: ",
len(tf.config.list_physical_devices('GPU')))
device_lib.list_local_devices()

In [None]:
import logging
import os
import json
import cv2
import sys
import math
import random
import warnings

import cv2

import numpy as np
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET

dir_root = os.path.abspath("./")

# sys.path.append(os.path.join(dir_root, "maskrcnn/Mask_RCNN-master/"))  # To find local version
# sys.path.append(os.path.join(dir_root, "maskrcnn/Mask_RCNN-master_tf2p9/"))  # To find local version
# sys.path.append(os.path.join(dir_root, "maskrcnn/mask-rcnn-tf2-us-main/"))  # To find local version

# sys.path.append(os.path.join(dir_root, "maskrcnn/mask-rcnn-tf2-us-main/samples/coco/"))  # To find local version


from mrcnn import utils
from mrcnn import visualize
from mrcnn.visualize import display_images
import mrcnn.model as modellib
from mrcnn.model import log
from mrcnn.config import Config

import imgaug as ia
import imgaug.augmenters as iaa



logging.disable(logging.WARNING)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
    # 0 = all messages are logged (default behavior)
    # 1 = INFO messages are not printed
    # 2 = INFO and WARNING messages are not printed
    # 3 = INFO, WARNING, and ERROR messages are not printed


warnings.filterwarnings('ignore')

In [None]:
# https://www.tensorflow.org/api_docs/python/tf/config/experimental/set_memory_growth
physical_devices = tf.config.list_physical_devices("GPU") 
for device in physical_devices:
    tf.config.experimental.set_memory_growth(device=device, enable=True)

tf.config.threading.set_intra_op_parallelism_threads(1)

In [None]:
NUM_OF_IMAGES = 10000
NUM_OF_VAL_IMAGES = int(NUM_OF_IMAGES*0.1)

In [None]:
class ShapesConfig(Config):
    """Configuration for training on the toy shapes dataset.
    Derives from the base Config class and overrides values specific
    to the toy shapes dataset.
    """
    # Give the configuration a recognizable name
    NAME = "60_shapes_shvit"
    BACKBONE = "shvit" # "resnet101"

    # The strides of each layer of the FPN Pyramid. These values
    # are based on a SHViT  backbone.
    BACKBONE_STRIDES = [8, 16, 32, 64, 128] # 128 added as they add 64 in the paper (original strides were [4,8,16,32] -> [4,8,16,32,64], maybe because P5 was upsampled by factor 2??)
    # BACKBONE_STRIDES= [4, 8, 16, 32, 64] # resnet


    # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
    # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
    GPU_COUNT = 1
    IMAGES_PER_GPU = 4

    # Number of classes (including background)
    NUM_CLASSES = 1 + 3  # background + 3 shapes

    # Use small images for faster training. Set the limits of the small side
    # the large side, and that determines the image shape.
    IMAGE_MIN_DIM = 512 # 128
    IMAGE_MAX_DIM = 512 # 128

    # Use smaller anchors because our image and objects are small
    # RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)  # anchor side in pixels resnet
    RPN_ANCHOR_SCALES = (16, 32, 64, 128, 256) # anchor side in pixels shvit

    # Reduce training ROIs per image because the images are small and have
    # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
    TRAIN_ROIS_PER_IMAGE = 128 # 32

    # Use a small epoch since the data is simple
    STEPS_PER_EPOCH = int(NUM_OF_IMAGES / IMAGES_PER_GPU)

    # use small validation steps since the epoch is small
    VALIDATION_STEPS = int(NUM_OF_VAL_IMAGES / IMAGES_PER_GPU)

    RUN_EAGERLY = False    



config = ShapesConfig()
config.display()

In [None]:
class ShapesDataset(utils.Dataset):
    """Generates the shapes synthetic dataset. The dataset consists of simple
    shapes (triangles, squares, circles) placed randomly on a blank surface.
    The images are generated on the fly. No file access required.
    """

    def load_shapes(self, count, height, width):
        """Generate the requested number of synthetic images.
        count: number of images to generate.
        height, width: the size of the generated images.
        """
        # Add classes
        self.add_class("shapes", 1, "square")
        self.add_class("shapes", 2, "circle")
        self.add_class("shapes", 3, "triangle")

        # Add images
        # Generate random specifications of images (i.e. color and
        # list of shapes sizes and locations). This is more compact than
        # actual images. Images are generated on the fly in load_image().
        for i in range(count):
            bg_color, shapes = self.random_image(height, width)
            #print("bg_color", bg_color)
            #print("shapes", shapes)
            self.add_image("shapes", image_id=i, path=None,
                           width=width, height=height,
                           bg_color=bg_color, shapes=shapes)

    def load_image(self, image_id):
        """Generate an image from the specs of the given image ID.
        Typically this function loads the image from a file, but
        in this case it generates the image on the fly from the
        specs in image_info.
        """
        info = self.image_info[image_id]
        bg_color = np.array(info['bg_color']).reshape([1, 1, 3])
        image = np.ones([info['height'], info['width'], 3], dtype=np.uint8)
        image = image * bg_color.astype(np.uint8)
        for shape, color, dims in info['shapes']:
            image = self.draw_shape(image, shape, dims, color)
        return image

    def image_reference(self, image_id):
        """Return the shapes data of the image."""
        info = self.image_info[image_id]
        if info["source"] == "shapes":
            return info["shapes"]
        else:
            super(self.__class__).image_reference(self, image_id)

    def load_mask(self, image_id):
        """Generate instance masks for shapes of the given image ID.
        """
        info = self.image_info[image_id]
        shapes = info['shapes']
        count = len(shapes)
        mask = np.zeros([info['height'], info['width'], count], dtype=np.uint8)
        for i, (shape, _, dims) in enumerate(info['shapes']):
            mask[:, :, i:i + 1] = self.draw_shape(mask[:, :, i:i + 1].copy(),
                                                  shape, dims, 1)
        # Handle occlusions
        occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)
        for i in range(count - 2, -1, -1):
            mask[:, :, i] = mask[:, :, i] * occlusion
            occlusion = np.logical_and(
                occlusion, np.logical_not(mask[:, :, i]))
        # Map class names to class IDs.
        class_ids = np.array([self.class_names.index(s[0]) for s in shapes])
        return mask, class_ids.astype(np.int32)

    def draw_shape(self, image, shape, dims, color):
        """Draws a shape from the given specs."""
        # Get the center x, y and the size s
        x, y, s = dims
        if shape == 'square':
            image = cv2.rectangle(image, (x - s, y - s),
                                  (x + s, y + s), color, -1)
        elif shape == "circle":
            image = cv2.circle(image, (x, y), s, color, -1)
        elif shape == "triangle":
            points = np.array([[(x, y - s),
                                (x - s / math.sin(math.radians(60)), y + s),
                                (x + s / math.sin(math.radians(60)), y + s),
                                ]], dtype=np.int32)
            image = cv2.fillPoly(image, points, color)
        return image

    def random_shape(self, height, width):
        """Generates specifications of a random shape that lies within
        the given height and width boundaries.
        Returns a tuple of three valus:
        * The shape name (square, circle, ...)
        * Shape color: a tuple of 3 values, RGB.
        * Shape dimensions: A tuple of values that define the shape size
                            and location. Differs per shape type.
        """
        # Shape
        shape = random.choice(["square", "circle", "triangle"])
        # Color
        color = tuple([random.randint(0, 255) for _ in range(3)])
        # Center x, y
        buffer = 20
        y = random.randint(buffer, height - buffer - 1)
        x = random.randint(buffer, width - buffer - 1)
        # Size
        s = random.randint(buffer, height // 4)
        return shape, color, (x, y, s)

    def random_image(self, height, width):
        """Creates random specifications of an image with multiple shapes.
        Returns the background color of the image and a list of shape
        specifications that can be used to draw the image.
        """
        # Pick random background color
        bg_color = np.array([random.randint(0, 255) for _ in range(3)])
        # Generate a few random shapes and record their
        # bounding boxes
        shapes = []
        boxes = []
        N = random.randint(1, 4)
        for _ in range(N):
            shape, color, dims = self.random_shape(height, width)
            shapes.append((shape, color, dims))
            x, y, s = dims
            boxes.append([y - s, x - s, y + s, x + s])
        # Apply non-max suppression wit 0.3 threshold to avoid
        # shapes covering each other
        keep_ixs = utils.non_max_suppression(
            np.array(boxes), np.arange(N), 0.3)
        shapes = [s for i, s in enumerate(shapes) if i in keep_ixs]
        return bg_color, shapes

In [None]:
# Training dataset
dataset_train = ShapesDataset()
dataset_train.load_shapes(NUM_OF_IMAGES, config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1])
dataset_train.prepare()

# Validation dataset
dataset_val = ShapesDataset()
dataset_val.load_shapes(NUM_OF_VAL_IMAGES, config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1])
dataset_val.prepare()

In [None]:
mask, idx = dataset_val.load_mask(2)
np.max(mask)

In [None]:
# Load and display random samples
image_ids = np.random.choice(dataset_train.image_ids, 5)
for image_id in image_ids:
    image = dataset_train.load_image(image_id)
    cv2.imwrite(f"./shape_test_images/shape_test_image_{image_id}.png", image)
    mask, class_ids = dataset_train.load_mask(image_id)
    visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names, limit=3)

## Create Model and Load Trained Weights

In [None]:
sometimes = lambda aug: iaa.Sometimes(0.5, aug)

augmentation = iaa.Sequential([
                                iaa.Fliplr(0.5), 
                                iaa.Flipud(0.5),
                                #sometimes(iaa.Crop(percent=(0, 0.1))),
                                sometimes(iaa.Affine(
                                            #scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
                                            #translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
                                            rotate=(-45, 45),
                                            #shear=(-16, 16),
                                            #order=[0, 1],
                                            #cval=(0, 255),
                                            #mode=ia.ALL
                                            )
                                        ),
                            ])

In [None]:
dir_model = os.path.join(dir_root, "logs/")

#COCO_MODEL_PATH = os.path.join(dir_root, "shoes/mask_rcnn_coco.h5")    
#if not os.path.exists(COCO_MODEL_PATH):
#    utils.download_trained_weights(COCO_MODEL_PATH)
    
# Create model object in inference mode.
model = modellib.MaskRCNN(mode="training", config=config, model_dir=dir_model)

# Load weights trained on MS-COCO
#model.load_weights(COCO_MOD
# EL_PATH, by_name=True, exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", "mrcnn_bbox", "mrcnn_mask"])

In [None]:
EPOCHS = [1,100,500]
initial_learning_rate = 1e-3
warmup_epochs = 10


def lr_warmup(epoch, initial_learning_rate, warmup_epochs):
    if epoch < warmup_epochs:
        warmup_lr = initial_learning_rate * (epoch + 1) / (warmup_epochs)
        return warmup_lr
    return None  # Use None to indicate that the reduced learning rate should be used


class WarmupSchedulerWithReduceLROnPlateau(tf.keras.callbacks.Callback):
    def __init__(self, initial_learning_rate, warmup_epochs, reduce_lr_on_plateau):
        super(WarmupSchedulerWithReduceLROnPlateau, self).__init__()
        self.initial_learning_rate = initial_learning_rate
        self.warmup_epochs = warmup_epochs
        self.reduce_lr_on_plateau = reduce_lr_on_plateau

    def on_epoch_begin(self, epoch, logs=None):
        new_lr = lr_warmup(epoch, self.initial_learning_rate, self.warmup_epochs)
        if new_lr is not None:
            tf.keras.backend.set_value(self.model.optimizer.lr, new_lr)
            print(f"Epoch {epoch+1}: Learning rate is {new_lr:.6f} (warmup phase).")
        else:
            # Delegate to ReduceLROnPlateau's functionality
            self.reduce_lr_on_plateau.on_epoch_begin(epoch, logs)


reduce_lr_on_plateau  = tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.1, patience=10, min_lr=1e-8)
warmup_with_reduce_lr_callback = WarmupSchedulerWithReduceLROnPlateau(initial_learning_rate, warmup_epochs, reduce_lr_on_plateau)
stop_early = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=50)

csv_logger_head = tf.keras.callbacks.CSVLogger(filename=config.NAME + "_training.log", append=False)
csv_logger_all = tf.keras.callbacks.CSVLogger(filename=config.NAME + "_training.log", append=True)

In [None]:
filename=config.NAME + "_training.log"
filename

In [None]:
model.compile(layers="head")
model.summary()

In [None]:
model.train(dataset_train, dataset_train, 
            epochs=EPOCHS[0], 
            augmentations=None, #augmentation, 
            custom_callbacks=[stop_early, warmup_with_reduce_lr_callback, csv_logger_head])
history = model.keras_model.history.history

In [None]:
model.compile(layers="all")
model.summary()

In [None]:
model.train(dataset_train, dataset_train, 
            epochs=EPOCHS[1], 
            augmentations=None, #augmentation, 
            custom_callbacks=[stop_early, warmup_with_reduce_lr_callback, csv_logger_all])
new_history = model.keras_model.history.history
for k in new_history: 
    history[k] = history[k] + new_history[k]



In [None]:
epochs = range(len(list(history["loss"])))
epochs = tuple(e + 1 for e in epochs)


plt.figure(figsize=(18, 6))

plt.subplot(131, title="losses")
plt.plot(epochs, history["loss"], label="train loss")
plt.plot(epochs, history["val_loss"], label="valid loss")
plt.xticks(range(0, epochs[-1]+1, 20))
plt.yscale("log")
for xpos in (epochs[0], epochs[-1]):
    plt.axvline(x=xpos, color="gray", linestyle="--", linewidth=1)
plt.legend()

plt.subplot(132)
plt.plot(epochs, history["mrcnn_class_loss"], label="train class loss")
plt.plot(epochs, history["val_mrcnn_class_loss"], label="valid class loss")
plt.xticks(range(0, epochs[-1]+1, 20))
plt.yscale("log")
for xpos in (epochs[0], epochs[-1]):
    plt.axvline(x=xpos, color="gray", linestyle="--", linewidth=1)
plt.legend()

plt.subplot(133)
plt.plot(epochs, history["mrcnn_mask_loss"], label="train mask loss")
plt.plot(epochs, history["val_mrcnn_mask_loss"], label="valid mask loss")
plt.xticks(range(0, epochs[-1]+1, 20))
plt.yscale("log")
for xpos in (epochs[0], epochs[-1]):
    plt.axvline(x=xpos, color="gray", linestyle="--", linewidth=1)
plt.legend()

plt.show()

In [None]:
class InferenceConfig(ShapesConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

inference_config = InferenceConfig()

# Recreate the model in inference mode
model = modellib.MaskRCNN(mode="inference", 
                          config=inference_config,
                          model_dir=dir_model)

# Get path to saved weights
# Either set a specific path or find last trained weights
# model_path = os.path.join(ROOT_DIR, ".h5 file name here")
model_path = model.find_last()

# Load trained weights
print("Loading weights from ", model_path)
model.load_weights(model_path, by_name=True)

In [None]:
image = cv2.imread("./shape_test_images/shape_test_image_8.png") # check if image in folder exists - otherwise change name, if necessary

# Run detection
results = model.detect([image], verbose=1)
res = results[0]
print(res["rois"])
print(res["class_ids"])
print(res["scores"])
print(dataset_val.class_names)

# Visualize results
visualize.display_instances(image, res["rois"], res["masks"], res["class_ids"], dataset_val.class_names, res["scores"], figsize=(8, 8))

In [None]:
# Test on a random image
image_id = random.choice(dataset_val.image_ids)
original_image, image_meta, gt_class_id, gt_bbox, gt_mask =\
    modellib.load_image_gt(dataset_val, inference_config, 
                           image_id, use_mini_mask=False)

log("original_image", original_image)
log("image_meta", image_meta)
log("gt_class_id", gt_class_id) # ground truth
log("gt_bbox", gt_bbox) # ground truth
log("gt_mask", gt_mask) # ground truth

# visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id, 
#                             dataset_val.class_names, figsize=(8, 8))