In [25]:
import pandas as pd
import numpy as np
from mrcnn.utils import Dataset
import os

### Lets read the csv from the dataset

In [5]:

df = pd.read_csv("datasets/car_object_detection/data/train_solution_bounding_boxes (1).csv")

In [6]:
df.head()

Unnamed: 0,image,xmin,ymin,xmax,ymax
0,vid_4_1000.jpg,281.259045,187.035071,327.727931,223.225547
1,vid_4_10000.jpg,15.163531,187.035071,120.329957,236.43018
2,vid_4_10040.jpg,239.192475,176.764801,361.968162,236.43018
3,vid_4_10020.jpg,496.483358,172.363256,630.02026,231.539575
4,vid_4_10060.jpg,16.63097,186.54601,132.558611,238.386422


In [24]:
# There are 559 images
df.shape

(559, 5)

In [10]:
df.nunique()

image    355
xmin     440
ymin     149
xmax     438
ymax     184
dtype: int64

In [14]:
# It looks like there are some images repeated, should be cause in some unique images there are several cars detected
df["image"].value_counts()

vid_4_26460.jpg    7
vid_4_6240.jpg     6
vid_4_6280.jpg     5
vid_4_26380.jpg    5
vid_4_26420.jpg    5
                  ..
vid_4_18840.jpg    1
vid_4_18820.jpg    1
vid_4_18340.jpg    1
vid_4_18360.jpg    1
vid_4_9980.jpg     1
Name: image, Length: 355, dtype: int64

## 1.- Mask R-CNN for car detection and segmentation:
The mask-rcnn lib requires to manage the data using the Dataset object (mrcnn.utils.Dataset). 
Let's create our class with inheritance, we need to use the built-in add_class() and add_image() functions. And also we need to define a function to load the data, for loading the mask and also for loading an image reference.


In [29]:
# This class defines and loads the car_object_detection Dataset with all the images and bboxes
class CarsDataset(Dataset):
    
    def load_dataset(self, dataset_dir="datasets/car_object_detection/data", mode='train'):
        # defining the class and the train/val proportion
        self.add_class("dataset",1,"car")
        
        # defining image info with built-in function
        if mode=='train':
            images_dir = dataset_dir + "/training_images/"
            for i in range(500):
                image_id = df.iloc[i,0]
                image_path = images_dir + image_id
                self.add_image("dataset", image_id=image_id, path=image_path)
        if mode=='val':
            images_dir = dataset_dir + "/training_images/"
            for i in range(500,df.shape[0]):
                image_id = df.iloc[i,0]
                image_path = images_dir + image_id
                self.add_image("dataset", image_id=image_id, path=image_path)
        if mode=='test':
            images_dir = dataset_dir + "/testing_images/"
            for filename in listdir(images_dir):
                image_id = filename
                image_path = images_dir + image_id
                self.add_image("dataset", image_id=image_id, path=image_path)
                
    def extract_boxes(self, image_id):
        #get coordinates from bboxes
        boxes = []
        xmin = int(df[df["image"]==image_id].iloc[0,1])
        ymin = int(df[df["image"]==image_id].iloc[0,2])
        xmax = int(df[df["image"]==image_id].iloc[0,3])
        ymax = int(df[df["image"]==image_id].iloc[0,4])
        boxes.append([xmin,ymin,xmax,ymax])
        # mrcnn needs the width and height from image values
        width = 380
        height = 676
        return boxes, width, height
    
    # This defines the mask, as a 2D array, with zeros in all the are, but ones when its detected the bbox
    def load_mask(self, image_id):
        image = self.image_info[image_id]["id"]
        boxes, w, h = self.extract_boxes(image)
        masks = zeros([w, h, len(boxes)], dtype='uint8')
        class_ids = []
        for i in range(len(boxes)):
            box = boxes[i]
            # Rows and columns start and end of the array
            row_s, row_e = box[1], box[3]
            col_s, col_e = box[0], box[2]
            masks[row_s:row_e, col_s,col_e, i] = 1
            class_ids.append(self.class_names.index('car'))
        return masks, asarray(class_ids, dtype='int32')
    
    # This function is responsible for returning the path or URL for a given "image_id"
    def image_reference(self, image_id):
        info = self.image_info[image_id]
        return info["path"]
    

#### Now let's load different datasets and train