# C1: Content Based Image Retrieval (Week 2)
## Team 7
* Marco Cordón
* Iñaki Lacunza
* Cristian Gutiérrez

#### Summary of the methods we have applied for Week 2 tests:
* **Method 1**: CieLAB + 2D & 1D Histograms of 32 bins + Histogram Intersection
* **Method 2**: RGB + 3D Histograms of 16 bins + Histogram_Intersection

In [1]:
import cv2
import os
import skimage
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
import math

In [2]:
def get_dict_from_data(path, extension=".jpg"):
    dictionary = dict()
    for file in sorted(os.listdir(path)):
        if file.endswith(extension):
            key = os.path.splitext(file)[0].split('_').pop()
            dictionary[key] = cv2.imread(os.path.join(path, file))
            
    return dictionary

In [3]:
BBDD_PATH = './BBDD/'
QSD2_W1_PATH = './qsd2_w1/'
QSD1_W2_PATH = './qsd1_w2/'
QSD2_W2_PATH = './qsd2_w2/'

bbdd = get_dict_from_data(BBDD_PATH)
qsd2_w1 = get_dict_from_data(QSD2_W1_PATH)
qsd1_w2 = get_dict_from_data(QSD1_W2_PATH)
qsd2_w2 = get_dict_from_data(QSD2_W2_PATH)
qsd2_w2_masks = get_dict_from_data(QSD2_W2_PATH, extension=".png")

In [4]:
QST1_W2_PATH = './qst1_w2/'
qst1_w2 = get_dict_from_data(QST1_W2_PATH)

QST2_W2_PATH = './qst2_w2/'
qst2_w2 = get_dict_from_data(QST2_W2_PATH)

## Task 1: Multi-resolution Block-based Histograms

Each level of multi-resolution L will divide our image in 2^(L-1) blocks for each side x and y.

We have implemented both 3D and 2D + 1D. The later will be mostly used for the CieLAB first channel L that represents Luminosity.

In [5]:
def get_2d1d_hist(img, bins, mask = None):
    
    full_histogram = np.array([])
    
    if mask is not None: hist_1d = cv2.calcHist([img], [0], mask, [bins], [0, 256])
    else: hist_1d = cv2.calcHist([img], [0], None, [bins], [0, 256])
    if np.sum(hist_1d != 0): hist_1d /= np.sum(hist_1d)
    
    if mask is not None: hist_2d = cv2.calcHist([img, img], [1, 2], mask, [bins, bins], [0, 256, 0, 256])
    else: hist_2d = cv2.calcHist([img, img], [1, 2], None, [bins, bins], [0, 256, 0, 256])
    if np.sum(hist_2d != 0): hist_2d /= np.sum(hist_2d)
    
    hist = np.concatenate((hist_1d.flatten(), hist_2d.flatten()))

    return hist    

In [6]:
def histogram_by_block(img, n_blocks_x, n_blocks_y, bins = 8, dim = "2d1d", mask = None):
    
    M = math.ceil(img.shape[0]/n_blocks_x)
    N = math.ceil(img.shape[1]/n_blocks_y)
    tiles = [img[x:x+M, y:y+N] for x in range(0, img.shape[0], M) for y in range(0, img.shape[1], N)]
    
    if mask is not None: 
        mask_tiles = [mask[x:x+M,y:y+N] for x in range(0, img.shape[0],M) for y in range(0, img.shape[1], N)]
       
    histogram = np.array([])
    
    for i in range(len(tiles)):
        if dim == "3":
            if mask is not None: tile_hist = cv2.calcHist([tiles[i], tiles[i], tiles[i]], [0, 1, 2], mask_tiles[i], [bins, bins, bins], [0, 256, 0, 256, 0, 256])
            else: tile_hist = cv2.calcHist([tiles[i]], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256])
        
        elif dim == "2d1d":           
            if mask is not None: tile_hist = get_2d1d_hist(tiles[i], bins, mask = mask_tiles[i])
            else: tile_hist = get_2d1d_hist(tiles[i], bins, mask = None)
        
        if np.sum(tile_hist) != 0:
            tile_hist /= np.sum(tile_hist)
            
        histogram = np.concatenate((histogram, tile_hist.flatten()))

    return histogram

In [7]:
def get_full_hist(img, levels = 3, bins = 8, dim = "2d1d", color = "Lab", mask = None):
    
    if color == "Lab":
        image = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)
    else: 
        image = img.copy()
    
    levels_histogram = np.array([])
    
    for i in range(levels):
        block_hist = histogram_by_block(image, 2**i, 2**i, bins = bins, dim = dim, mask = mask) 
        levels_histogram = np.concatenate((levels_histogram, block_hist))
        
    return levels_histogram

In [8]:
def get_all_hists(data, levels = 3, bins = 8, dim = "2d1d", color = "Lab", masks = None):
    full_dict = dict()
    
    for k, v in data.items():
        if masks: 
            if masks[k].ndim == 3:
                full_dict[k] = get_full_hist(v, levels = levels, bins = bins, dim = dim, color = color, mask = masks[k][:, :, 0])
            else:
                full_dict[k] = get_full_hist(v, levels = levels, bins = bins, dim = dim, color = color, mask = masks[k])
        
        else:
            full_dict[k] = get_full_hist(v, levels = levels, bins = bins, dim = dim, color = color, mask = None)
        
    return full_dict

## Task 2: Benchmark and comparision with Week 1 (QSD2_W1)

We did a benchmark for the QSD2 of Week 1 using the previous week methods and the multi-resolution block-based methods. The results were improved a lot (slides).

In [9]:
def get_mask_with_percentages(img, path=None):
    # 1.
    blur_img = cv2.GaussianBlur(img[:,:,0], (25,25), 0)
            
    # 2. 
    ret2, th2 = cv2.threshold(blur_img, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    
    # 3.
    th2 = th2 / 255.0
    
    # Calculting the kernel size relative to the image size
    h, w, c = img.shape
    
    hper = 0.05
    wper = 0.05
    
    hker = int(h * hper)
    wker = int(w * wper)
    
    # 4.
    # Gradient
    kernel = np.ones((hker, wker),np.uint8)
    gradient = cv2.morphologyEx(th2, cv2.MORPH_GRADIENT, kernel, cv2.BORDER_CONSTANT, borderValue=1)
    
    # We need to invert the mask
    th2 = 1 - th2
    
    # Or
    mask = (th2 == 1) + (gradient == 1)
    mask = (255* mask).astype(np.uint8)
    
    # 2x Closing
    kernel_closing1 = np.ones((int(hker*0.5), int(wker*0.5)), np.uint8)
    closing1 = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel_closing1)
    kernel_closing2 = np.ones((int(hker*1.), int(wker*1.)), np.uint8)
    closing2 = cv2.morphologyEx(closing1, cv2.MORPH_CLOSE, kernel_closing2)
    
    # Opening:
    kernel_opening = np.ones((int(hker),int(wker)),np.uint8)
    opening = cv2.morphologyEx(closing2, cv2.MORPH_OPEN, kernel_opening)
    
    # 3.rd Closing
    kernel_closing3 = np.ones((int(hker*1.25), int(wker*1.25)),np.uint8)
    closing3 = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel_closing3)
    
    # Erosion
    kernel_erode = np.ones((int(hker*1.), int(wker*1.)),np.uint8)
    erode = cv2.erode(closing3, kernel_erode)        
    
    # Preprocessing steps visualization:
    """
    plt.axis("off")
    fig, ax = plt.subplots(nrows=3, ncols=3)
    
    ax[0, 0].imshow(img)
    ax[0, 1].imshow(th2, cmap = "gray") # blur
    ax[0, 2].imshow(gradient, cmap = "gray") # gradient
    ax[1, 0].imshow(mask, cmap = "gray") # or
    ax[1, 1].imshow(closing1, cmap = "gray") # closing1
    ax[1, 2].imshow(closing2, cmap = "gray") # closing2
    ax[2, 0].imshow(opening, cmap = "gray") # opening
    ax[2, 1].imshow(closing3, cmap = "gray") # closing3
    ax[2, 1].imshow(erode, cmap = "gray") # erode
    
    """

    return erode

In [10]:
def get_masks(query_set, path = None):
    
    masks = dict()
    
    for name, img in query_set.items():
        #mask = cv2.cvtColor(mask ,cv2.COLOR_BGR2GRAY)
        predicted_mask = get_mask_with_percentages(query_set[name])
        
        # If path is True, save masks in a specified path. For saving test masks
        if path:
            if not os.path.exists(path):
                os.makedirs(path)
                
            filename = os.path.join(path, name + ".png")
            cv2.imwrite(filename, predicted_mask)
            
        masks[name] = predicted_mask
        
    return masks

In [11]:
def apply_mask(querys, masks):
    
    masked_qs = dict()
    
    for name, img in querys.items():
        img_mask = masks[name]
        
        masked_image = cv2.bitwise_and(img, img, mask = img_mask.astype(np.uint8))
        masked_qs[name] = masked_image

    return masked_qs

In [12]:
def Euclidean(h1, h2):
    return np.linalg.norm(h1 - h2)

In [13]:
def L1_distance(h1, h2):
    result = np.subtract(h1, h2)
    result = np.absolute(result)
    return np.sum(result)

In [14]:
def XSquaredDistance(h1, h2):
    result = ((h1 - h2)**2 / (h1 + h2))
    result = np.nan_to_num(result, nan = 0.0)
    return np.sum(result)

In [15]:
def HistogramIntersection(h1, h2):
    return np.sum(np.minimum(h1, h2))

In [16]:
def HellingerKernel(h1, h2):
    x = np.multiply(h1, h2)
    x = np.sqrt(x)
    return np.sum(x)

In [17]:
def compare(query, database, method):
    result = dict()
    for k, v in database.items():
        if method == "Euclidean": result[k] = Euclidean(v, query)
        elif method == "L1": result[k] = L1_distance(v, query)
        elif method == "X_Squared": result[k] = XSquaredDistance(v, query)
        elif method == "Histogram_Intersection": result[k] = HistogramIntersection(v, query)
        elif method == "Hellinger_Kernel": result[k] = HellingerKernel(v, query)
        
    return result

In [18]:
def k_neighbours(dictionary, k=10, rev=False):
    result_dict = dict(sorted(dictionary.items(), key=lambda item: item[1], reverse=rev))
    return [int(keys) for keys,v in result_dict.items()][:k]

In [19]:
def add_list_level(input_list):
    out = []
    for ll in input_list:
        tmp = []
        for q in ll:
            tmp.append([q])
        out.append(tmp)
    return (out)

list_depth = lambda L: (isinstance(L, list) or isinstance(L, tuple)) and max(map(list_depth, L))+1

def compute_mapk(gt,hypo,k_val):

    hypo = list(hypo)
    if list_depth(hypo) == 2:
        hypo = add_list_level(hypo.copy())

    apk_list = []
    for ii,query in enumerate(gt):
        for jj,sq in enumerate(query):
            apk_val = 0.0
            if len(hypo[ii]) > jj:
                apk_val = apk([sq],hypo[ii][jj], k_val)
            apk_list.append(apk_val)
            
    return np.mean(apk_list)

In [20]:
def apk(actual, predicted, k=10):
    if len(predicted)>k:
        predicted = predicted[:k]

    score = 0.0
    num_hits = 0.0

    for i,p in enumerate(predicted):
        if p in actual and p not in predicted[:i]:
            num_hits += 1.0
            score += num_hits / (i+1.0)

    if not actual:
        return 0.0

    return score / min(len(actual), k)

def mapk(actual, predicted, k=10):
    return np.mean([apk(a,p,k) for a,p in zip(actual, predicted)])

In [21]:
def calculateMAPK(histograms, histograms_bbdd, method, ground_truth = None, topK=10, rev = False):
    apk_list = []
              
    for k, v in histograms.items():
        apk_list += [k_neighbours(compare(v, histograms_bbdd, method), topK, rev = rev)]
      
    if ground_truth:
        #print(ground_truth)
        #print(apk_list)
        return mapk(actual=ground_truth, predicted=apk_list, k=topK)
    else:
        return apk_list

### Method 1: 2D+1D histograms, Lab color space, 3 levels (1, 4, 16), 256 bins, Hellinger Kernel

In [22]:
with open('./qsd2_w1/gt_corresps.pkl', 'rb') as f:
    cor2_w1 = pickle.load(f)

hists_bbdd_Lab = get_all_hists(bbdd, levels = 3, bins = 256, dim = "2d1d", color = "Lab", masks = None)

masks_qsd2_w1 = get_masks(qsd2_w1)
hists_qsd2_w1_Lab = get_all_hists(qsd2_w1, levels = 3, bins = 256, dim = "2d1d", color = "Lab", masks = masks_qsd2_w1)

MAPK_method_1_qsd2_w1_k1 = calculateMAPK(hists_qsd2_w1_Lab, hists_bbdd_Lab, "Hellinger_Kernel", ground_truth = cor2_w1, topK=1, rev = True)
MAPK_method_1_qsd2_w1_k5 = calculateMAPK(hists_qsd2_w1_Lab, hists_bbdd_Lab, "Hellinger_Kernel", ground_truth = cor2_w1, topK=5, rev = True)
MAPK_method_1_qsd2_w1_k10 = calculateMAPK(hists_qsd2_w1_Lab, hists_bbdd_Lab, "Hellinger_Kernel", ground_truth = cor2_w1, topK=10, rev = True)

print(f"Method1 mAPK using k=1: {MAPK_method_1_qsd2_w1_k1}")
print(f"Method1 mAPK using k=5: {MAPK_method_1_qsd2_w1_k5}")
print(f"Method1 mAPK using k=10: {MAPK_method_1_qsd2_w1_k10} \n\n")

Method1 mAPK using k=1: 0.6333333333333333
Method1 mAPK using k=5: 0.6511111111111111
Method1 mAPK using k=10: 0.6622222222222223 




### Method 2: 3D histograms, RGB color space, 3 levels (1, 4, 16), 8 bins, Histogram Intersection

In [23]:
with open('./qsd2_w1/gt_corresps.pkl', 'rb') as f:
    cor2_w1 = pickle.load(f)

hists_bbdd_RGB = get_all_hists(bbdd, levels = 3, bins = 8, dim = "3", color = "RGB", masks = None)

masks_qsd2_w1 = get_masks(qsd2_w1)
hists_qsd2_w1_RGB = get_all_hists(qsd2_w1, levels = 3, bins = 8, dim = "3", color = "RGB", masks = masks_qsd2_w1)

MAPK_method_2_qsd2_w1_k1 = calculateMAPK(hists_qsd2_w1_RGB, hists_bbdd_RGB, "Histogram_Intersection", ground_truth = cor2_w1, topK=1, rev = True)
MAPK_method_2_qsd2_w1_k5 = calculateMAPK(hists_qsd2_w1_RGB, hists_bbdd_RGB, "Histogram_Intersection", ground_truth = cor2_w1, topK=5, rev = True)
MAPK_method_2_qsd2_w1_k10 = calculateMAPK(hists_qsd2_w1_RGB, hists_bbdd_RGB, "Histogram_Intersection", ground_truth = cor2_w1, topK=10, rev = True)

print(f"Method2 mAPK using k=1: {MAPK_method_2_qsd2_w1_k1}")
print(f"Method2 mAPK using k=5: {MAPK_method_2_qsd2_w1_k5}")
print(f"Method2 mAPK using k=10: {MAPK_method_2_qsd2_w1_k10} \n\n")

Method2 mAPK using k=1: 0.6
Method2 mAPK using k=5: 0.6538888888888889
Method2 mAPK using k=10: 0.6627777777777778 




## Task 3: Detect and remove text from images in QSD1-W2

In [24]:
def get_name_boxs(img):
    
    h_i, w_i, c = img.shape
    
    top_limit = int(round(h_i/6))
    bottom_limit = int(h_i - top_limit)
    
    left_area = int(round(w_i/3))
    right_area = int(left_area*2)
    
    area_pixel= int(h_i*w_i)
    
    # BGR to Lab conversion, and take L channel only
    image = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)
    imageL = image[:, :, 0]
    
    # Black hat and top hat separately to the L channel image
    kernel = np.ones((2, 10),np.uint8)
    thL = cv2.morphologyEx(imageL, cv2.MORPH_TOPHAT, kernel, iterations = 5)
    bhL = cv2.morphologyEx(imageL, cv2.MORPH_BLACKHAT, kernel, iterations = 5)
    
    # Take the highest value pixels, mostly letters
    fthL_th = thL.max() - 50
    fthL = (thL[:, :] > fthL_th) * 255
    fthL = fthL.astype('uint8')    
        
    fbhL_th = bhL.max() - 50
    fbhL = (bhL[:, :] > fbhL_th) * 255
    fbhL = fbhL.astype('uint8')
    
    # OR between top hat and black hat most important pixels
    mix = fbhL + fthL
    mix = mix>1
    mix= mix.astype('uint8')
    
    # Closing
    kernel = np.ones((20, 30),np.uint8)
    closing = cv2.morphologyEx(mix, cv2.MORPH_CLOSE, kernel)
    
    
    # Connected Components
    analysis = cv2.connectedComponentsWithStats(closing,4, cv2.CV_32S) 
    (totalLabels, label_ids, values, centroid) = analysis 

    # Initialize a new image to store  
    # all the output components 
    output = np.zeros(imageL.shape, dtype="uint8") 

    # Loop through each component 
    mask = np.zeros([h_i,w_i], dtype=np.uint8)
    cont=0
    x1_prev= -10000
    for i in range(1, totalLabels): 
        
        # Area of the component 
        area = values[i, cv2.CC_STAT_AREA]  
        
        (X, Y) = centroid[i]
        
        if (area >round(area_pixel*0.0009)) and ((Y<top_limit)or(Y>bottom_limit))and ((X>left_area)and(X<right_area)): 
            x1 = values[i, cv2.CC_STAT_LEFT] 
            y1 = values[i, cv2.CC_STAT_TOP] 
            w = values[i, cv2.CC_STAT_WIDTH] 
            h = values[i, cv2.CC_STAT_HEIGHT]
            
            pt1 = (x1, y1) 
            pt2 = (x1+ w, y1+ h)
            if (int(x1 - x1_prev) < int(round(w_i/4)) and (int(x1 - x1_prev) > int(round(-w_i/4)))):
                roi_corners = np.array([(x1_prev,y1_prev),(x1+w,y1), pt2,(x1_prev,y1+h)], dtype=np.int32)
            else:
                roi_corners = np.array([pt1,(x1+w,y1), pt2,(x1,y1+h)], dtype=np.int32)
                
            cv2.fillPoly(mask, [roi_corners], 255)
            output = cv2.bitwise_or(output, mask)
            cont = cont+1
            x1_prev= x1
            y1_prev = y1                           
                        
    if cont == 0:
        output = fthL
        
        
    # Closing
    kernel2 = np.ones((int(h_i*0.001), int(w_i*0.03)),np.uint8)
    closing2 = cv2.morphologyEx(output, cv2.MORPH_CLOSE, kernel2)
    
    # Dilation
    kernel3 = np.ones((int(h_i*0.03), int(w_i*0.003)),np.uint8)
    dilate = cv2.dilate(closing2,kernel3)

    #print(thL.max())
    #print(np.count_nonzero((fthL < 350)&(fthL > 0)))
    
    # For visualization:
    """
    plt.axis("off")
    fig, ax = plt.subplots(nrows=3, ncols=3, figsize = (10, 10))
    
    ax[0, 0].imshow(dilate, cmap = "gray")
    ax[0, 1].imshow(closing, cmap = "gray")
    ax[0, 2].imshow(closing2, cmap = "gray")
    
    ax[1, 0].imshow(thL, cmap = "gray")
    ax[1, 1].imshow(fthL, cmap = "gray")        
    ax[1, 2].imshow(mix, cmap = "gray")
    
    ax[2, 0].imshow(bhL, cmap = "gray")
    ax[2, 1].imshow(fbhL, cmap = "gray")
    ax[2, 2].imshow(output, cmap = "gray")
    """
    
    return dilate

In [25]:
def get_tl_and_br(mask):
    
    xl = 100000
    xr = -1
    yu = 100000
    yd = -1
    
    for y in range(mask.shape[0]):
        for x in range(mask.shape[1]):
            if mask[y, x] == 255:
                if x < xl: xl = x
                if x > xr: xr = x
                if y < yu: yu = y
                if y > yd: yd = y
    
    return (xl, yu), (xr, yd)

In [26]:
def get_pred_cords(masks_dict):

    result = []
    for name, mask in masks_dict.items():
        tl, br = get_tl_and_br(mask)
        result.append([tl[0], tl[1], br[0], br[1]])
        
    return result

In [27]:
def get_pred_masks(qs):
    
    masks_dict = dict()
    
    for name, img in qs.items():
        masks_dict[name] = get_name_boxs(img)
    
    return masks_dict
    

In [28]:
def get_rectangular_predicted_masks(qs):
    masks_dict = dict()
    
    
    for name, img in qs.items():
        not_rect_mask = get_name_boxs(img)
        
        tl, br = get_tl_and_br(not_rect_mask)
        tr = (br[0], tl[1])
        bl = (tl[0], br[1])
        
        #print(f"tl: {tl}, tr:{tr}, bl: {bl}, br: {br}")
        
        rect_mask = np.zeros((img.shape[0], img.shape[1]), dtype = np.uint8)
        roi_corners = np.array([tl, tr, br, bl], dtype = np.int32)
        cv2.fillPoly(rect_mask, [roi_corners], 255)
        
        masks_dict[name] = rect_mask
        
    return masks_dict

In [70]:
masks_qst1 = get_rectangular_predicted_masks(qst1_w2)
text_boxes_qst1 = get_pred_cords(masks_qst1)

path = "./QST1/"
name = "text_boxes.pkl"

if not os.path.exists(path):
    os.makedirs(path)

filename = os.path.join(path, name)
with open(filename, 'wb') as f:
    pickle.dump(text_boxes_qst1, f)
    
    
with open(filename, 'rb') as f:
    saved_res = pickle.load(f)

print("Text boxes of qst1: \n{}\n".format(saved_res))

Text boxes of qst1: 
[[85, 330, 423, 380], [51, 377, 251, 418], [70, 316, 344, 361], [58, 14, 202, 120], [66, 0, 362, 62], [91, 319, 425, 367], [84, 41, 424, 108], [672, 89, 1281, 208], [22, 12, 266, 303], [49, 271, 277, 328], [388, 589, 664, 662], [590, 1607, 1167, 1729], [65, 306, 299, 349], [63, 19, 324, 67], [67, 13, 316, 48], [47, 0, 274, 42], [62, 37, 311, 91], [110, 413, 512, 461], [30, 12, 203, 107], [161, 679, 450, 778], [591, 1330, 1167, 1443], [53, 33, 440, 93], [65, 17, 315, 62], [38, 473, 224, 504], [558, 2055, 1149, 2197], [284, 483, 494, 550], [0, 0, 225, 276], [519, 1905, 1086, 2023], [77, 23, 439, 97], [86, 534, 404, 609]]



## Task 4: Evaluate text detection using bounding box in QSD1-W2

Evaluated with Mean Intersection-over-Union (IoU)

In [33]:
def get_gt_tb_dict(tb, ds):
    i = 0
    
    result = dict()
    
    for k, v in ds.items():
        name = str(i).zfill(5)
        
        inner_dict = dict()
        
        tl = tuple(tb[i][0][0][:])
        bl = tuple(tb[i][0][1][:])
        br = tuple(tb[i][0][2][:])
        tr = tuple(tb[i][0][3][:])
        size = tuple([v.shape[0], v.shape[1]])
        
        mask = np.zeros((size[0], size[1]), dtype=np.uint8)
        roi_corners = np.array([tl, tr, br, bl], dtype=np.int32)
        cv2.fillPoly(mask, [roi_corners], 255)
        
        result[name] = mask
        
        i+=1
        
    return result

In [34]:
def binaryMaskIOU(mask1, mask2):
    mask1_area = np.count_nonzero(mask1 == 255)
    mask2_area = np.count_nonzero(mask2 == 255)
    intersection = np.count_nonzero(np.logical_and(mask1 == 255, mask2 == 255))
    iou = intersection/(mask1_area+mask2_area-intersection)
    return iou

In [35]:
def MeanIOU(gt_dict, pred_dict):
    result = 0
    for name, mask in gt_dict.items():
        result += binaryMaskIOU(gt_dict[name], pred_dict[name])
    
    result /= len(gt_dict)
    return result

#### Mean IoU for QSD1_W2

In [36]:
with open('./qsd1_w2/text_boxes.pkl', 'rb') as f:
    gt_text_boxes_qsd1 = pickle.load(f)
    
gt_tb_masks_qsd1 = get_gt_tb_dict(gt_text_boxes_qsd1, qsd1_w2)
    
pred_masks_tb_qsd1 = get_rectangular_predicted_masks(qsd1_w2)

MeanIOU_qsd1 = MeanIOU(gt_tb_masks_qsd1, pred_masks_tb_qsd1)

print(f"Mean Interception Over Union in QSD1: {MeanIOU_qsd1}")

Mean Interception Over Union in QSD1: 0.6023614895982022


## Task 5: Test query system using query set QSD1-W2
We did a Benchmark on QSD1_W2 and obtained the following best methods and metrics:

* **Method 1**: CieLAB + 2D & 1D Histograms of 32 bins + Histogram Intersection
* **Method 2**: RGB + 3D Histograms of 16 bins + Histogram_Intersection

In [37]:
def InvertMasks(mask_dict_orig):
    masks_dict_invert = dict()
    for name, img in mask_dict_orig.items():
        masks_dict_invert[name] = (255-mask_dict_orig[name])
    
    return masks_dict_invert

### Method 1: CieLAB + 2D & 1D Histograms of 32 bins + Histogram Intersection

In [38]:
with open('./qsd1_w2/gt_corresps.pkl', 'rb') as f:
    cor1_w2 = pickle.load(f)
    
pred_masks_tb_qsd1 = get_rectangular_predicted_masks(qsd1_w2)    
pred_masks_tb_qsd1_inver = InvertMasks(pred_masks_tb_qsd1)
    
hists_bbdd_Lab= get_all_hists(bbdd, levels = 3, bins = 32, dim = "2d1d", color = 'Lab', masks = None)
hists_qsd1_w2_Lab = get_all_hists(qsd1_w2, levels = 3, bins = 32, dim = "2d1d", color = 'Lab', masks = pred_masks_tb_qsd1_inver)

MAPK_qsd1_w2_Lab_k1 = calculateMAPK(hists_qsd1_w2_Lab, hists_bbdd_Lab, "Histogram_Intersection", ground_truth = cor1_w2, topK=1, rev = True)
MAPK_qsd1_w2_Lab_k5 = calculateMAPK(hists_qsd1_w2_Lab, hists_bbdd_Lab, "Histogram_Intersection", ground_truth = cor1_w2, topK=5, rev = True)
MAPK_qsd1_w2_Lab_k10 = calculateMAPK(hists_qsd1_w2_Lab, hists_bbdd_Lab, "Histogram_Intersection", ground_truth = cor1_w2, topK=10, rev = True)

print(f"Method1 mAPK using k=1: {MAPK_qsd1_w2_Lab_k1}")
print(f"Method1 mAPK using k=5: {MAPK_qsd1_w2_Lab_k5}")
print(f"Method1 mAPK using k=10: {MAPK_qsd1_w2_Lab_k10} \n\n")

Method1 mAPK using k=1: 0.5333333333333333
Method1 mAPK using k=5: 0.5611111111111112
Method1 mAPK using k=10: 0.577037037037037 




### Method 2: RGB + 3D Histograms of 16 bins + Histogram Intersection

In [39]:
with open('./qsd1_w2/gt_corresps.pkl', 'rb') as f:
    cor1_w2 = pickle.load(f)
    
pred_masks_tb_qsd1 = get_rectangular_predicted_masks(qsd1_w2)    
pred_masks_tb_qsd1_inver = InvertMasks(pred_masks_tb_qsd1)
    
hists_bbdd_RGB= get_all_hists(bbdd, levels = 3, bins = 16, dim = "3", color = 'RGB', masks = None)
hists_qsd1_w2_RGB = get_all_hists(qsd1_w2, levels = 3, bins = 16, dim = "3", color = 'RGB', masks = pred_masks_tb_qsd1_inver)

MAPK_qsd1_w2_RGB_k1 = calculateMAPK(hists_qsd1_w2_RGB, hists_bbdd_RGB, "Histogram_Intersection", ground_truth = cor1_w2, topK=1, rev = True)
MAPK_qsd1_w2_RGB_k5 = calculateMAPK(hists_qsd1_w2_RGB, hists_bbdd_RGB, "Histogram_Intersection", ground_truth = cor1_w2, topK=5, rev = True)
MAPK_qsd1_w2_RGB_k10 = calculateMAPK(hists_qsd1_w2_RGB, hists_bbdd_RGB, "Histogram_Intersection", ground_truth = cor1_w2, topK=10, rev = True)

print(f"Method1 mAPK using k=1: {MAPK_qsd1_w2_RGB_k1}")
print(f"Method1 mAPK using k=5: {MAPK_qsd1_w2_RGB_k5}")
print(f"Method1 mAPK using k=10: {MAPK_qsd1_w2_RGB_k10} \n\n")

Method1 mAPK using k=1: 0.5333333333333333
Method1 mAPK using k=5: 0.55
Method1 mAPK using k=10: 0.5621693121693121 




## Task 6: Apply full processing pipeline and split paintings

In [71]:
def order_tls_and_brs(tls_and_brs, centroids): 
    if len(tls_and_brs) != 2: return tls_and_brs

    if ((centroids[0][0] - centroids[1][0])**2 < (centroids[0][1] - centroids[1][1])**2): horizontal_ordering = False
    else: horizontal_ordering = True
        
    if horizontal_ordering:
        if centroids[0][0] < centroids[1][0]: return tls_and_brs
        else: return [tls_and_brs[1], tls_and_brs[0]] 
    else:
        if centroids[0][1] < centroids[1][1]: return tls_and_brs
        else: return [tls_and_brs[1], tls_and_brs[0]]
        

In [72]:
def divide_paintings(img):
    
    h_i, w_i, c = img.shape
    
    top_limit = int(round(h_i/8))
    bottom_limit = int(h_i - top_limit)
    
    left_area = int(round(w_i/8))
    right_area = int(w_i - left_area)
    
    area_pixel= int(h_i*w_i)
    
    image = cv2.cvtColor(img, cv2.COLOR_BGR2Lab)
    imageL = image[:, :, 0]
    imagea = image[:, :, 1]
    imageb = image[:, :, 2]
    
    #kernel = np.ones((2, 10),np.uint8)
    #thL = cv2.morphologyEx(imageL, cv2.MORPH_TOPHAT, kernel, iterations = 5)
    #bhL = cv2.morphologyEx(imageL, cv2.MORPH_BLACKHAT, kernel, iterations = 5)
    
    kernel = np.ones((2, 2),np.uint8)
    gradL = cv2.morphologyEx(imageL, cv2.MORPH_GRADIENT, kernel, iterations = 20)

    ret2, th2 = cv2.threshold(gradL,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    
    kernel = np.ones((round(h_i*0.001), round(w_i*0.001)),np.uint8)
    opening1 = cv2.morphologyEx(th2, cv2.MORPH_OPEN, kernel, iterations = 3)
    
    
    # -----------------------------------------------------------------------
    analysis = cv2.connectedComponentsWithStats(opening1, 4, cv2.CV_32S) 
    (totalLabels, label_ids, values, centroid) = analysis 

    # Initialize a new image to store  
    # all the output components 
    output1 = np.zeros(imageL.shape, dtype="uint8") 

    # Loop through each component 
    
    mask = np.zeros([h_i,w_i], dtype=np.uint8)
    cont=0
    x1_prev= -10000
    for i in range(1, totalLabels): 
        
          # Area of the component 
        area = values[i, cv2.CC_STAT_AREA]  
        
        (X, Y) = centroid[i]
         
        #if True:
        if (area > round(area_pixel*0.00001)):
            x1 = values[i, cv2.CC_STAT_LEFT] 
            y1 = values[i, cv2.CC_STAT_TOP] 
            w = values[i, cv2.CC_STAT_WIDTH] 
            h = values[i, cv2.CC_STAT_HEIGHT]
            
            pt1 = (x1, y1) 
            pt2 = (x1+ w, y1+ h)
            if (int(x1 - x1_prev) < int(round(w_i/4)) and (int(x1 - x1_prev) > int(round(-w_i/4)))):
                roi_corners = np.array([(x1_prev,y1_prev),(x1+w,y1), pt2,(x1_prev,y1+h)], dtype=np.int32)
            else:
                roi_corners = np.array([pt1,(x1+w,y1), pt2,(x1,y1+h)], dtype=np.int32)
        
               
            cv2.fillPoly(mask, [roi_corners], 255)
            output1 = cv2.bitwise_or(output1, mask)
            cont = cont+1
            x1_prev= x1
            y1_prev = y1                           
            
    if cont == 0:
        output1 = th2
        
    #print(cont)
    
    # ---------------------------------------------------------------
    
    kernel = np.ones((round(h_i*0.02), round(w_i*0.01)),np.uint8)
    opening2 = cv2.morphologyEx(output1, cv2.MORPH_OPEN, kernel, iterations = 15)
    
    # -----------------------------------------------------------------------
    analysis2 = cv2.connectedComponentsWithStats(opening2, 4, cv2.CV_32S) 
    (totalLabels2, label_ids2, values2, centroid2) = analysis2 

    # Initialize a new image to store  
    # all the output components 
    output2 = np.zeros(imageL.shape, dtype="uint8") 

    # Loop through each component 
    
    mask2 = np.zeros([h_i,w_i], dtype=np.uint8)
    cont=0
    
    tls_and_brs = []
    centroids = []
    
    for i in range(1, totalLabels2): 

        x1_prev= -10000
        
          # Area of the component 
        area = values2[i, cv2.CC_STAT_AREA]  
        
        (X, Y) = centroid2[i]
        
        if area > round(area_pixel*0.05):
        #if (area > round(area_pixel*0.2)) and ((Y<top_limit)or(Y>bottom_limit))and ((X>left_area)and(X<right_area)): 
            x1 = values2[i, cv2.CC_STAT_LEFT] 
            y1 = values2[i, cv2.CC_STAT_TOP] 
            w = values2[i, cv2.CC_STAT_WIDTH] 
            h = values2[i, cv2.CC_STAT_HEIGHT]
            
            pt1 = (x1, y1) 
            pt2 = (x1+ w, y1+ h)
            if (int(x1 - x1_prev) < int(round(w_i/4)) and (int(x1 - x1_prev) > int(round(-w_i/4)))):
                roi_corners2 = np.array([(x1_prev,y1_prev),(x1+w,y1), pt2,(x1_prev,y1+h)], dtype=np.int32)
            else:
                roi_corners2 = np.array([pt1,(x1+w,y1), pt2,(x1,y1+h)], dtype=np.int32)
        
               
            cv2.fillPoly(mask2, [roi_corners2], 255)
            output2 = cv2.bitwise_or(output2, mask2)
            cont = cont+1
            x1_prev= x1
            y1_prev = y1    
            
            tls_and_brs.append([roi_corners2[0][0], roi_corners2[0][1], roi_corners2[2][0], roi_corners2[2][1]])
            centroids.append([X, Y])
            
    if cont == 0:
        output2 = output1
    
    # ---------------------------------------------------------------
    
    # for visualization:
    """"
    plt.axis("off")
    fig, ax = plt.subplots(nrows=2, ncols=3, figsize = (10, 10))
    
    ax[0, 0].imshow(imageL, cmap = "gray")
    ax[0, 1].imshow(th2, cmap = "gray")
    ax[0, 2].imshow(opening1, cmap = "gray")
    
    ax[1, 0].imshow(output1, cmap = "gray")
    ax[1, 1].imshow(opening2, cmap = "gray")
    ax[1, 2].imshow(output2, cmap = "gray")
    """
    
    
    tls_and_brs = order_tls_and_brs(tls_and_brs, centroids)
    
    return tls_and_brs, output2
    

In [73]:
def task6(qs, bbdd_hists, method = "L1", topK = 5, levels = 3, bins = 16, dim = "3", bins_1d = None, bins_2d = None, color = "Lab"):
    
    # depending on the method we want the largest or the closest score to select the topK
    if method in ["Euclidean", "L1", "X_squared"]: rev = False
    elif method in ["Histogram_Intersection", "Hellinger_Kernel"]: rev = True
    
    all_results = []
    
    for name, picture in qs.items():
        # we first have to divide the picture into paintings
        tls_and_brs = divide_paintings(picture)[0]
            
        picture_results = []
        for painting_tl_and_br in tls_and_brs:
            # we are going to crop the picture to get each painting
            painting = picture[painting_tl_and_br[1] : painting_tl_and_br[3]+1, painting_tl_and_br[0] : painting_tl_and_br[2]+1]
            
            # we have to mask the text boxes
            not_rect_mask = get_name_boxs(painting)
            tl, br = get_tl_and_br(not_rect_mask)
            tr = (br[0], tl[1])
            bl = (tl[0], br[1])
            rect_mask = np.zeros((painting.shape[0], painting.shape[1]), dtype = np.uint8)
            roi_corners = np.array([tl, tr, br, bl], dtype = np.int32)
            cv2.fillPoly(rect_mask, [roi_corners], 255)
            
            # we have to invert the text box mask
            rect_mask = (255 - rect_mask)
            
            # we have to compute the histogram masking the text box
            hist = get_full_hist(painting, levels = levels, bins = bins, dim = dim, color = color, mask = rect_mask)
            
            # compare with the bbdd histograms
            comparisons = compare(hist, bbdd_hists, method)
            
            # get the topK
            painting_topK = k_neighbours(comparisons, topK, rev = rev)
            
            # append topK to the picture results
            picture_results.append(painting_topK)
            
        # append picture results to all_results
        all_results.append(picture_results)
        
    return all_results

### Method 1: CieLAB + 2D & 1D Histograms of 32 bins + Histogram Intersection

In [74]:
with open('./qsd2_w2/gt_corresps.pkl', 'rb') as f:
    cor2_w2 = pickle.load(f)

hists_bbdd_Lab = get_all_hists(bbdd, levels = 3, bins = 32, dim = "2d1d", color = "Lab", masks = None)

result_k1 = task6(qsd2_w2, hists_bbdd_Lab, dim="2d1d", color="Lab", bins = 32, method = "Histogram_Intersection", topK = 1)
result_k5 = task6(qsd2_w2, hists_bbdd_Lab, dim="2d1d", color="Lab", bins = 32, method = "Histogram_Intersection", topK = 5)
result_k10 = task6(qsd2_w2, hists_bbdd_Lab, dim="2d1d", color="Lab", bins = 32, method = "Histogram_Intersection", topK = 10)

MAPK_qsd2_w2_LAB_k1 = compute_mapk(cor2_w2, result_k1, 1)
MAPK_qsd2_w2_LAB_k5 = compute_mapk(cor2_w2, result_k5, 5)
MAPK_qsd2_w2_LAB_k10 = compute_mapk(cor2_w2, result_k10, 10)

print(f"Method1 mAPK using k=1: {MAPK_qsd2_w2_LAB_k1}")
print(f"Method1 mAPK using k=5: {MAPK_qsd2_w2_LAB_k5}")
print(f"Method1 mAPK using k=10: {MAPK_qsd2_w2_LAB_k10} \n\n")

Method1 mAPK using k=1: 0.5178571428571429
Method1 mAPK using k=5: 0.5729166666666666
Method1 mAPK using k=10: 0.5894274376417233 




### Method 2: RGB + 3D Histograms of 16 bins + Histogram Intersection

In [75]:
with open('./qsd2_w2/gt_corresps.pkl', 'rb') as f:
    cor2_w2 = pickle.load(f)

hists_bbdd_RGB = get_all_hists(bbdd, levels = 3, bins = 16, dim = "3", color = None, masks = None)

result_k1 = task6(qsd2_w2, hists_bbdd_RGB, dim="3", color=None, bins = 16, method = "Histogram_Intersection", topK = 1)
result_k5 = task6(qsd2_w2, hists_bbdd_RGB, dim="3", color=None, bins = 16, method = "Histogram_Intersection", topK = 5)
result_k10 = task6(qsd2_w2, hists_bbdd_RGB, dim="3", color=None, bins = 16, method = "Histogram_Intersection", topK = 10)

MAPK_qsd2_w2_RGB_k1 = compute_mapk(cor2_w2, result_k1, 1)
MAPK_qsd2_w2_RGB_k5 = compute_mapk(cor2_w2, result_k5, 5)
MAPK_qsd2_w2_RGB_k10 = compute_mapk(cor2_w2, result_k10, 10)

print(f"Method1 mAPK using k=1: {MAPK_qsd2_w2_RGB_k1}")
print(f"Method1 mAPK using k=5: {MAPK_qsd2_w2_RGB_k5}")
print(f"Method1 mAPK using k=10: {MAPK_qsd2_w2_RGB_k10} \n\n")

Method1 mAPK using k=1: 0.5535714285714286
Method1 mAPK using k=5: 0.5973214285714287
Method1 mAPK using k=10: 0.6036422902494331 




## Tests and Results for QST1_W2

#### Method 1: CieLAB + 2D & 1D Histograms of 32 bins + Histogram Intersection

In [78]:
pred_masks_tb_qst1 = get_rectangular_predicted_masks(qst1_w2)    
pred_masks_tb_qst1_inver = InvertMasks(pred_masks_tb_qst1)
    
hists_bbdd_Lab= get_all_hists(bbdd, levels = 3, bins = 32, dim = "2d1d", color = 'Lab', masks = None)
hists_qst1_w2_Lab = get_all_hists(qst1_w2, levels = 3, bins = 32, dim = "2d1d", color = 'Lab', masks = pred_masks_tb_qst1_inver)

results_k10 = calculateMAPK(hists_qst1_w2_Lab, hists_bbdd_Lab, "Histogram_Intersection", ground_truth = None, topK=10, rev = True)

# Write and Load results
path = "./QST1/method1/"
name = "result.pkl"

if not os.path.exists(path):
    os.makedirs(path)

filename = os.path.join(path, name)
with open(filename, 'wb') as f:
    pickle.dump(results_k10, f)

with open(filename, 'rb') as f:
    saved_res = pickle.load(f)

print("Results for Method 1: \n{}\n".format(saved_res))

Results for Method 1: 
[[211, 16, 205, 252, 244, 200, 168, 12, 81, 203], [157, 57, 64, 133, 46, 110, 187, 218, 281, 146], [222, 40, 79, 35, 272, 236, 152, 120, 205, 226], [212, 270, 194, 252, 182, 97, 90, 148, 16, 220], [32, 34, 9, 8, 57, 162, 259, 46, 185, 110], [13, 127, 106, 120, 215, 70, 11, 162, 263, 21], [21, 46, 162, 217, 197, 146, 190, 128, 64, 213], [113, 259, 88, 57, 72, 31, 225, 130, 103, 154], [223, 110, 80, 238, 57, 115, 242, 240, 218, 31], [91, 248, 103, 51, 225, 119, 161, 163, 61, 241], [147, 58, 248, 78, 35, 237, 56, 278, 23, 140], [160, 259, 225, 244, 130, 271, 154, 70, 239, 58], [91, 8, 103, 241, 51, 88, 149, 187, 242, 225], [251, 105, 19, 25, 29, 215, 66, 239, 104, 39], [272, 212, 226, 85, 35, 232, 40, 222, 184, 79], [252, 211, 205, 264, 12, 244, 16, 218, 168, 120], [118, 187, 146, 79, 189, 125, 18, 117, 77, 75], [110, 57, 123, 9, 133, 259, 72, 241, 242, 244], [245, 57, 281, 286, 196, 279, 9, 218, 208, 110], [204, 40, 35, 236, 197, 280, 5, 190, 108, 41], [259, 88, 31

### Method 2: RGB + 3D Histograms of 16 bins + Histogram Intersection

In [79]:
pred_masks_tb_qst1 = get_rectangular_predicted_masks(qst1_w2)    
pred_masks_tb_qst1_inver = InvertMasks(pred_masks_tb_qst1)
    
hists_bbdd_RGB= get_all_hists(bbdd, levels = 3, bins = 16, dim = "3", color = 'RGB', masks = None)
hists_qst1_w2_RGB = get_all_hists(qst1_w2, levels = 3, bins = 16, dim = "3", color = 'RGB', masks = pred_masks_tb_qst1_inver)

results_k10 = calculateMAPK(hists_qst1_w2_RGB, hists_bbdd_RGB, "Histogram_Intersection", ground_truth = None, topK=10, rev = True)

# Write and Load results
path = "./QST1/method2/"
name = "result.pkl"

if not os.path.exists(path):
    os.makedirs(path)

filename = os.path.join(path, name)
with open(filename, 'wb') as f:
    pickle.dump(results_k10, f)

with open(filename, 'rb') as f:
    saved_res = pickle.load(f)

print("Results for Method 2: \n{}\n".format(saved_res))

Results for Method 2: 
[[211, 16, 205, 244, 203, 218, 133, 200, 12, 168], [157, 133, 57, 240, 187, 64, 173, 199, 218, 153], [222, 79, 194, 205, 187, 208, 137, 120, 248, 40], [212, 97, 270, 278, 220, 85, 182, 232, 194, 152], [32, 240, 46, 238, 34, 254, 128, 8, 57, 157], [13, 31, 215, 163, 113, 104, 225, 51, 193, 72], [21, 217, 146, 64, 197, 162, 46, 34, 190, 286], [113, 31, 154, 103, 104, 110, 160, 72, 259, 88], [110, 223, 115, 259, 101, 230, 31, 282, 246, 89], [91, 23, 51, 103, 197, 225, 163, 73, 248, 168], [78, 227, 265, 248, 69, 87, 140, 84, 26, 237], [160, 225, 110, 244, 271, 259, 242, 239, 51, 105], [91, 103, 51, 241, 212, 271, 85, 163, 57, 225], [251, 105, 19, 142, 154, 25, 200, 104, 95, 158], [272, 232, 212, 236, 40, 5, 226, 197, 47, 97], [205, 110, 12, 211, 187, 174, 242, 244, 218, 137], [118, 187, 64, 146, 57, 75, 217, 34, 73, 46], [139, 9, 110, 160, 37, 133, 259, 123, 88, 244], [245, 208, 57, 9, 133, 218, 281, 21, 173, 196], [204, 40, 35, 236, 5, 272, 197, 120, 108, 68], [259,

## Tests and Results for QST2_W2

#### Method 1: CieLAB + 2D & 1D Histograms of 32 bins + Histogram Intersection

In [80]:
hists_bbdd_Lab = get_all_hists(bbdd, levels = 3, bins = 32, dim = "2d1d", color = "Lab", masks = None)
result_k10 = task6(qst2_w2, hists_bbdd_Lab, dim="2d1d", color=None, bins = 32, method = "Histogram_Intersection", topK = 10)

# Write and Load results
path = "./QST2/method1/"
name = "result.pkl"

if not os.path.exists(path):
    os.makedirs(path)

filename = os.path.join(path, name)
with open(filename, 'wb') as f:
    pickle.dump(result_k10, f)

with open(filename, 'rb') as f:
    saved_res = pickle.load(f)

print("Results for Method 1: \n{}\n".format(saved_res))

Results for Method 1: 
[[[16, 24, 270, 168, 77, 261, 116, 60, 40, 248], [259, 133, 60, 43, 131, 234, 72, 232, 88, 157]], [[74, 30, 63, 103, 167, 155, 239, 88, 76, 198]], [[180, 123, 231, 121, 110, 31, 37, 223, 259, 142]], [[110, 123, 259, 180, 231, 121, 223, 133, 37, 142]], [[260, 103, 140, 163, 88, 82, 239, 225, 271, 84]], [[57, 259, 88, 157, 133, 140, 31, 110, 72, 69], [239, 140, 82, 163, 103, 215, 174, 88, 104, 105]], [[133, 157, 110, 259, 153, 230, 99, 26, 57, 218]], [[281, 22, 19, 30, 242, 241, 170, 88, 187, 39]], [[176, 212, 248, 167, 119, 249, 58, 168, 225, 237], [29, 161, 163, 239, 51, 119, 193, 140, 215, 135]], [[57, 242, 88, 46, 31, 281, 259, 103, 32, 140], [88, 57, 103, 140, 265, 259, 31, 203, 32, 130]], [[99, 58, 159, 147, 81, 168, 102, 34, 152, 156], [76, 99, 140, 113, 103, 21, 286, 81, 225, 130]], [[57, 92, 219, 144, 216, 246, 157, 88, 171, 179]], [[239, 215, 51, 39, 104, 135, 225, 163, 70, 154]], [[28, 37, 110, 218, 143, 244, 123, 261, 81, 154]], [[88, 140, 57, 103, 259,

#### Method 2: RGB + 3D Histograms of 16 bins + Histogram Intersection

In [81]:
hists_bbdd_RGB = get_all_hists(bbdd, levels = 3, bins = 16, dim = "3", color = "RGB", masks = None)
result_k10 = task6(qst2_w2, hists_bbdd_RGB, dim="3", color =" RGB", bins = 16, method = "Histogram_Intersection", topK = 10)

# Write and Load results
path = "./QST2/method2/"
name = "result.pkl"

if not os.path.exists(path):
    os.makedirs(path)

filename = os.path.join(path, name)
with open(filename, 'wb') as f:
    pickle.dump(result_k10, f)

with open(filename, 'rb') as f:
    saved_res = pickle.load(f)

print("Results for Method 2: \n{}\n".format(saved_res))

Results for Method 2: 
[[[212, 118, 278, 261, 102, 168, 271, 152, 77, 186], [27, 110, 165, 160, 242, 168, 225, 218, 187, 244]], [[130, 74, 174, 58, 286, 225, 161, 109, 46, 241]], [[121, 266, 231, 180, 73, 123, 190, 173, 153, 133]], [[110, 37, 31, 160, 154, 259, 242, 9, 139, 230]], [[260, 21, 84, 248, 281, 286, 57, 197, 126, 64]], [[215, 239, 103, 192, 88, 193, 163, 39, 161, 51], [248, 205, 168, 51, 244, 137, 119, 225, 163, 12]], [[136, 99, 76, 248, 140, 26, 0, 230, 7, 43]], [[175, 133, 227, 9, 73, 34, 99, 69, 39, 113]], [[18, 35, 5, 197, 258, 278, 146, 148, 106, 49], [93, 205, 212, 220, 40, 194, 152, 12, 97, 184]], [[103, 88, 161, 130, 193, 192, 225, 163, 132, 31], [193, 161, 103, 225, 239, 51, 29, 88, 192, 163]], [[227, 218, 57, 37, 248, 173, 99, 110, 133, 153], [218, 26, 110, 227, 37, 9, 133, 248, 99, 71]], [[219, 179, 216, 240, 238, 34, 153, 10, 126, 57]], [[224, 35, 85, 151, 226, 272, 68, 191, 47, 146]], [[28, 218, 133, 121, 160, 244, 9, 26, 37, 81]], [[12, 200, 137, 104, 105, 150,

In [49]:
def total_masks(qs, path = None, return_tls_brs = False):
    
    masks_total = dict()
    
    all_tls_and_brs = []
    
    for name, img in qs.items():
        
        tls_and_brs, full_mask = divide_paintings(img)
        
        picture_tls_and_brs = []
        
        picture_results = []
        for painting_tl_and_br in tls_and_brs:
            # we are going to crop the picture to get each painting
            painting = img[painting_tl_and_br[1] : painting_tl_and_br[3]+1, painting_tl_and_br[0] : painting_tl_and_br[2]+1]
            
            # we have to mask the text boxes
            not_rect_mask = get_name_boxs(painting)
            tl, br = get_tl_and_br(not_rect_mask)
            tr = (br[0], tl[1])
            bl = (tl[0], br[1])
            rect_mask = np.zeros((painting.shape[0], painting.shape[1]), dtype = np.uint8)
            roi_corners = np.array([tl, tr, br, bl], dtype = np.int32)
            cv2.fillPoly(rect_mask, [roi_corners], 255)
            
            if return_tls_brs:
                picture_tls_and_brs.append([tl[0] + painting_tl_and_br[0],
                                            tl[1] + painting_tl_and_br[1],
                                            br[0] + painting_tl_and_br[0],
                                            br[1] + painting_tl_and_br[1]] )
            
            # we have to invert the text box mask
            rect_mask = (255 - rect_mask)
            
            full_mask[painting_tl_and_br[1] : painting_tl_and_br[3]+1, painting_tl_and_br[0] : painting_tl_and_br[2]+1] = rect_mask
            
        if return_tls_brs:
            all_tls_and_brs.append(picture_tls_and_brs)
        
        masks_total[name] = full_mask
            
        if path:
            if not os.path.exists(path):
                os.makedirs(path)
                
            filename = os.path.join(path, name + ".png")
            cv2.imwrite(filename, full_mask)
    
    if return_tls_brs:
        return masks_total, all_tls_and_brs
    else:
        return masks_total

### Get the text boxes' tl and br for each painting and save masks

In [82]:
PATH_TO_SAVE_MASKS = "./QST2/full_masks_qst2"
PATH_TO_SAVE_TB = "./QST2/text_boxes.pkl"

full_masks_qst2, tls_and_brs_qst2 = total_masks(qst2_w2, path = PATH_TO_SAVE_MASKS, return_tls_brs = True)

with open(PATH_TO_SAVE_TB, 'wb') as f:
    pickle.dump(tls_and_brs_qst2, f)
    
with open(PATH_TO_SAVE_TB, 'rb') as f:
    saved_tls_brs_qst2 = pickle.load(f)

print("Text boxs cords: \n{}\n".format(saved_tls_brs_qst2))

Results for Method 2: 
[[[117, 191, 290, 230], [735, 94, 1234, 615]], [[628, 226, 1237, 365]], [[356, 845, 629, 898]], [[623, 1899, 1185, 2012]], [[559, 178, 1130, 339]], [[678, 1375, 1261, 1481], [2766, 149, 3370, 263]], [[181, 594, 482, 637]], [[209, 440, 550, 568]], [[118, 99, 328, 108], [562, 339, 742, 385]], [[315, 103, 1551, 2312], [2354, 252, 2945, 378]], [[792, 93, 1162, 173], [2828, 585, 3386, 702]], [[303, 123, 555, 266]], [[211, 162, 288, 169]], [[220, 184, 326, 262]], [[154, 105, 506, 177]], [[603, 85, 876, 172], [1337, 137, 1956, 679]], [[557, 213, 1147, 348]], [[132, 528, 402, 585]], [[744, 152, 910, 341]], [[125, 98, 461, 604]], [[699, 112, 1042, 256]], [[710, 156, 919, 183]], [[770, 1396, 1333, 1507]], [[124, 137, 326, 177]], [[145, 103, 417, 578]], [[158, 124, 439, 189]], [[389, 1214, 995, 1339]], [[212, 150, 505, 227]], [[166, 273, 498, 497], [1038, 199, 1065, 233]], [[176, 140, 493, 194]]]



### If we would want to compare the full masks and get the different scores

In [None]:
def compare_mask (gt_masks_dict, pred_masks_dict): 
        all_results = dict()
        for name, mask in gt_masks_dict.items():
                
                predicted_mask = pred_masks_dict[name]

                tp  = ((mask == 255) & (predicted_mask == 255)).sum()

                fn = ((mask == 255) & (predicted_mask == 0)).sum()

                fp = ((mask == 0) & (predicted_mask == 255)).sum()

                tn = ((mask == 0) & (predicted_mask == 0)).sum()

                #print(f"True Positives: {tp}  ,  False Negatives: {fn}  , False Positives: {fp}  ,  True Negatives: {tn}")
                #print(f"Total number of pixels: {tp + fn + fp + tn}")

                # Compute precision, recall and F1
                precision = tp / (tp + fp)
                recall = tp / (tp + fn)
                f1 = 2 * precision * recall / (precision + recall) 
                #print(f"Precision: {precision} , Recall: {recall} , F1: {f1}")

                #print()
                #print()


                result = {"precision": precision, "recall": recall, "f1": f1}
                all_results[name] = result

        dataframe = pd.DataFrame.from_dict(all_results)
        return all_results, dataframe

In [None]:
def get_tb_dict(tb, ds, qsd):
    i = 0
    
    result = dict()
    
    for k, v in ds.items():
        name = str(i).zfill(5)
        
        inner_dict = dict()

        size = tuple([v.shape[0], v.shape[1]])
        mask = np.zeros((size[0], size[1]), dtype=np.uint8)
        roi_corners = []
        
        for txt_box in tb[i]:  
            if qsd == 1: # QSD1
                tl = tuple(txt_box[0][:])
                bl = tuple(txt_box[1][:])
                br = tuple(txt_box[2][:])
                tr = tuple(txt_box[3][:])
                roi_corners = np.array([tl, tr, br, bl], dtype=np.int32)
                cv2.fillPoly(mask, [roi_corners], 255)
            else: # QSD2
                if txt_box[:2][-1] > txt_box[2:][-1]:
                    tl = tuple(txt_box[:2]) 
                    br = tuple(txt_box[2:])
                else:
                    tl = tuple(txt_box[2:])
                    br = tuple(txt_box[:2]) 
                cv2.rectangle(mask, tl, br, 255, thickness=cv2.FILLED)

        result[name] = mask
        
        i+=1
        
    return result