# Cup detection

### To build and to tune a simple detection model I will use:

In [None]:
import cv2
import imutils as imt
from matplotlib import pyplot as plt
import pandas as pd
import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET
import math
import numpy as np

## Preparing

I have prepared dataset from an example video. I cut frames from this video by using ffmpeg. After that, I marked cups by using labelImg application (https://github.com/tzutalin/labelImg). I did it to tune a detection model.

## Read dataset

In [None]:
ROOT_PATH = "training/data/cut_video"

def xml_to_df(path):
    xml_list = []
    for xml_file in glob.glob(path + '/*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall('object'):
            value = (root.find('filename').text,
                     int(root.find('size')[0].text),
                     int(root.find('size')[1].text),
                     member[0].text,
                     int(member[4][0].text),
                     int(member[4][1].text),
                     int(member[4][2].text),
                     int(member[4][3].text)
                     )
            xml_list.append(value)
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    return xml_df

### Let's see one of the samples.

In [None]:
def show_row(row):
    img = cv2.imread(ROOT_PATH +"/" + row.filename, 1)
    img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    
    lt_point = (row.xmin, row.ymin)
    rb_point = (row.xmax, row.ymax)
    COLOR = (0,255,0)
    cv2.rectangle(img, lt_point, rb_point, COLOR, 4)
    plt.imshow(img)

In [None]:
df = xml_to_df(ROOT_PATH + "/annotations")
show_row(df.iloc[3])

## Detection models

### Simple HSV treshhold model.

I will use information about a cup color. It seems to me we can find this cup in the HSV color model by finding a color interval. To do that I apply a symmetric couple of the threshold function for each color dimensions. After that, I can apply logic operation AND between the color dimension. The result matrix I will call a color mask matrix.

In [None]:
def thresh_holding(rgb, color, weight):
    img = cv2.cvtColor(rgb, cv2.COLOR_RGB2HSV)
    thresh_lambda = lambda ind: cv2.bitwise_and(\
                cv2.threshold(img[:,:,ind],\
                color[ind]-weight[ind],255,\
                cv2.THRESH_BINARY)[1],\
                cv2.threshold(img[:,:,ind],\
                color[ind]+weight[ind],255,\
                cv2.THRESH_BINARY_INV)[1],\
                )

    thrs = [thresh_lambda(i) for i in range(3)]
    rgthr = cv2.bitwise_and(thrs[0], thrs[1])
    bthr = cv2.bitwise_and(thrs[2], rgthr)
    return bthr


### Let's see the color mask matrix.

In [None]:
img = cv2.imread(ROOT_PATH +"/" + df.iloc[3].filename,1)
img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
img = thresh_holding(img, (125,70,70), (20,40,40))
plt.imshow(img, cmap="gray")

### Prehandling

I apply adaptive histogram equalization for input picture. It a little bit allows avoiding the problem with different brightness and contrasts. OpenCV has CLAHE for it.

In [None]:
def prehandling(bgr, clipLimit=20.0, tileGridSize=(6,6)):
    clahe = cv2.createCLAHE(clipLimit=clipLimit, tileGridSize=tileGridSize)
    lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB)
    lab_planes = cv2.split(lab)
    lab_planes[0] = clahe.apply(lab_planes[0])
    lab = cv2.merge(lab_planes)
    return cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)

In [None]:
img = cv2.imread(ROOT_PATH +"/" + df.iloc[3].filename,1)
img = prehandling(img, clipLimit=5)
img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
plt.imshow(img, cmap="gray")

### Let's see the effect of prehandling to color mask matrix.

In [None]:
img = cv2.imread(ROOT_PATH +"/" + df.iloc[3].filename,1)
img = prehandling(img, clipLimit=5, tileGridSize=(6,6))
img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
img = thresh_holding(img, (125,60,70), (20,40,40))
plt.imshow(img, cmap="gray")

## Detecting process

### Area overlap calculate function

To avoid the same bounding boxes I have to estimate the area overlap of this boxes.

In [None]:
def overlap(rect1, rect2):
    """
    calculate area overlap
    rect is two point left top and bottom right
    """
    delta_x = min(rect2[1][0], rect1[1][0]) - max(rect2[0][0], rect1[0][0])
    delta_y = min(rect2[1][1], rect1[1][1]) - max(rect2[0][1], rect1[0][1])
    common = delta_x * delta_y if delta_x > 0 and delta_y > 0 else 0
    area1 = abs(rect1[0][0] - rect1[1][0]) * abs(rect1[0][1] - rect1[1][1])
    area2 = abs(rect2[0][0] - rect2[1][0]) * abs(rect2[0][1] - rect2[1][1])
    return 2 * common / (area1 + area2)


### Detection function

To find bounding boxes I will use a matchTemplate OpenCV function. I will fill a template with max_value. After that, I will apply it to color_mask_matrix. To avoid duplicates I will union rectangles with a big overlap area.

In [None]:
def windows_fill_calc(color_mask_matrix,  window_size=250, threshhold=0.5, overlap_threshhold=0.75):
    
    template = np.full((window_size, window_size), 255, dtype=gray_scale.dtype)
    
    matchResult = cv2.matchTemplate(color_mask_matrix, template, cv2.TM_CCORR_NORMED)
    min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(matchResult)
    
    #handmade normalization
    res = matchResult / max_val 
    
    #plt.imshow(res, cmap="gray")
    results = []
    min_val, max_val, min_loc, max_loc = 0, 1, 0, 0
    while max_val > threshhold:
        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
        if max_val > threshhold:
            bottom_right = (max_loc[0]+window_size, max_loc[1]+window_size)
            results.append((max_loc, bottom_right))
            
            #to avoid finding this place the next time
            res[max_loc[1]:bottom_right[1], max_loc[0]:bottom_right[0]] = 0
    
    
    #union rectangles with a big overlap area
    i = 0
    while i < len(results):
        detect = results[i]
        
        j = 0
        while j < len(results):
            if j != i and overlap(detect, results[j]) > overlap_threshhold:
                #union two rect
                xl, xr = min(detect[0][0], results[j][0][0]), max(detect[1][0], results[j][1][0])
                yl, yr = min(detect[0][1], results[j][0][1]), max(detect[1][1], results[j][1][1])
                results[i] = ((xl, yl), (xr, yr))
                detect = results[i]
                
                
                results.remove(results[j])
                
                if j < i:
                    i -= 1
            j += 1
        
        i += 1
                
                
    
    return results
       
        

### Let's see the example of work with no tuned parameters.

In [None]:
def draw_detections(img, detectins):
    COLOR = (255, 0, 0)
    for detected in detections:
        cv2.rectangle(img, detected[0],  detected[1], COLOR, 3)

img_source = cv2.imread(ROOT_PATH +"/" + df.iloc[9].filename,1)
img = prehandling(img_source, clipLimit=5, tileGridSize=(6,6))
img_pre = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
img = thresh_holding(img_pre, (125,60,70), (20,40,40))
detections = windows_fill_calc(img, threshhold=0.98, window_size=200)
draw_detections(img_pre, detections)
plt.imshow(img_pre)

## Entire model

Let compose model with these steps. I mean I will build detection pipeline.

In [None]:
class SimpleHSVDetector:
    
    def __init__(self, color=(125,70,70), weight_color=(20,40,40),  window_size=250, 
                 threshhold=0.98, overlap_threshhold=0.75,
                 clipLimit=5.0, tileGridSize=(6,6)):
        """
        color should in HSV
        """
        self.color, self.weight_color, self.window_size = color, weight_color, window_size
        self.threshhold, self.overlap_threshhold = threshhold, overlap_threshhold
        self.clipLimit, self.tileGridSize = clipLimit, tileGridSize
        
    def detect(self, bgr):
        bgr = prehandling(bgr, clipLimit=self.clipLimit, tileGridSize=self.tileGridSize)
        rgb = cv2.cvtColor(bgr,cv2.COLOR_BGR2RGB)
        gray_scale = thresh_holding(rgb, self.color, self.weight_color)
        
        return windows_fill_calc(img, threshhold=self.threshhold, window_size=self.overlap_threshhold)


## Estimate process.

Here I want to calculate error of detection model on the dataset.

In [None]:
#TODO: Estimate function

## Train model parametrs.

We can improve the quality of the model by changing the next parameters:

* target color
* diaposon for target color
* threshholds
* window size
* clahe parametrs

To find the best parameters we can create a grid search of these parameters.