# Move test documents

In [6]:
import pandas as pd
import os
import shutil
from tqdm import tqdm


In [11]:
db_path = "../../Datasets/Objects_Database/"
tb_path = "../../Datasets/Tobacco 800 Dataset/tobacco800"
dst_path = os.path.join(db_path, "test_documents")


object_info_file = os.path.join(db_path, "2_Selected_Objects_Information.csv")
objects_info_df = pd.read_csv(object_info_file)

u_test_docs = objects_info_df.loc[objects_info_df["set"]=="test", "image_name"].unique()


for i in tqdm(range(len(u_test_docs))):
    image_name = u_test_docs[i]
    src_file = os.path.join(tb_path, image_name+".png")
    dst_file = os.path.join(dst_path, image_name+".png")

    shutil.copy2(src=src_file, dst=dst_file)




100%|██████████| 34/34 [00:00<00:00, 13347.65it/s]


# 2 Stage object detector Example

In [10]:
u_test_docs = objects_info_df.loc[objects_info_df["set"]=="test", "image_name"].unique()
print(u_test_docs.shape)

(34,)


## Libraries

In [1]:
import numpy as np
import cv2
import pandas as pd
import matplotlib.pyplot as plt
from imutils import paths
from tqdm import tqdm
from skimage.feature import hog
import os
import shutil

from sklearn import svm
from joblib import dump, load

## Functions

In [2]:
def print_image(img, cmap="gray", title="img", fontsize=12):

    image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    # plt.figure()
    plt.imshow(image)#, cmap=cmap)
    plt.title(title)#, fontsize=fontsize)
    plt.xticks([])
    plt.yticks([])
    # plt.show()

def create_dir(path):

    dirs = path.split("/")
    n_path = ""
    for i in range(len(dirs)):
        
        n_path = os.path.join(n_path, dirs[i])
        # print(n_path)
        a = os.path.isdir(n_path)   
        # print(a) 
        if(not(a)):
            os.mkdir(n_path)      


def check_area(obj_contours, n_std=0):
    ## List to save the area of objects ##
    area_arr = np.empty(0)
    ## Loop ##
    for cnt in (obj_contours):
        ## calcular area del objeto ##
        area = cv2.contourArea(cnt)
        ## Save value ##
        area_arr = np.append(area_arr, area)
    
    
    ## return area threshold to select objects ##
    area_mean = np.mean(area_arr)
    area_std = np.std(area_arr)
    
    area_th = area_mean + n_std*area_std
    
    # print(a_mean)
    return area_th


def object_segmentation_prediction(doc_file, classifier, normalize_transfor, save_path="", kp=16, n_std=1, margin=0, lw=4):
    
    ##########################################
    ############## Load data #################
    ##########################################
    ## Split doc name ##
    doc_name = doc_file.split("/")[-1].split(".")[0]
  
    #### Invert images ####
    inv = False
    
    ## Load image ##
    image = cv2.imread(doc_file)
    
    ## Copy original image ##
    img = image.copy()
    ## image to draw contours ##
    img_contour = image.copy()
    
    ##########################################
    
    
    #################################################
    ############## Image processing #################
    #################################################
    
    #### Image preprocessing ####
    ## image to gray scale ##
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    ## Compute kernel size ##
    k = min(img.shape)//kp
    if((k%2)==0):
        k -= 1
    # print(f"imagesize: {img.shape}, kernel size: ({k},{k})")
    
    ## Filtering ##
    sigmax = 0
    kernel = (k,k)
    img = cv2.GaussianBlur(img, kernel, sigmax)
    # gk = cv2.getGaussianKernel(k, sigmax)
    # print_image(image, title="gaussian")

    #### binarization by otzu ####
    if(inv):
        u, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
    else:
        u, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    # print_image(img)

    #################################################
    
    
    ##############################################
    ############## Object serach #################
    ##############################################
    
    #### search contours ####
    contours, hierarchy = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    #print("Contours:",len(contours))

    #### lista para almacenar los objetos ####
    objects_list = []
    areas_list = []
    annotations_list = []
    
    #### Check object area ####
    # n_std: Number of standar deviation values ober the mean to define the threshold.
    area_threshold = check_area(contours, n_std)

    ### HOG Parameters ####
    dim = (512, 256)
    orientations = 9
    pixels_per_cell = 64
    cells_per_block = 2
    block_norm = "L2-Hys"
    
    
    for i in range(len(contours)):
        
        cnt = contours[i]
        
        #### calcular area del objeto ####
        area = cv2.contourArea(cnt)
        
        if(area > area_threshold):
        
            #### calcular el rctangulo que enciarra al objeto ####
            x, y, w, h = cv2.boundingRect(cnt)
            ## Save annotations ##
            annotations_list.append([x, y, w, h])
            
            ## Segmentar el objeto deseado ##
            img_object = image[y-margin : y+h+margin, x-margin : x+w+margin, :]
            
            ## image to gray scale ##
            img_object = cv2.cvtColor(img_object, cv2.COLOR_BGR2GRAY)
            
            #### Reshape image #### 
            img_object = cv2.resize(img_object, dim, interpolation=cv2.INTER_CUBIC)
            
            ################################
            #### HOG feature extraction ####
            ################################ 
            hog_vec = hog(img_object, orientations=orientations, pixels_per_cell=(pixels_per_cell, pixels_per_cell),
                                    cells_per_block=(cells_per_block, cells_per_block), block_norm=block_norm, visualize=False)
            
            X = np.asarray(hog_vec)
            X = X.reshape(1, len(X))
            col_name = [f"hog_{i}" for i in range(X.shape[1])]
            X = pd.DataFrame(X, columns=col_name)
            
    
            #########################
            #### Standarize data ####
            #########################
            X = normalize_transfor.transform(X)
            
            #########################
            #### Data prediction ####
            #########################
            ## Predictions ##
            y_pred = classifier.predict(X)[0]
            

            label_map = {
                0:"logo",
                1:"signature",
                2:"other",
            }
            

            if(y_pred==2):
                continue

            label_name = label_map[y_pred]
     
            #### dibujar el rectangulo que encierra al objeto ####
            cv2.rectangle(img_contour, (x,y), (x+w, y+h), (0,255,0), lw)
                            
            ## Select cordinates for the label ##
            x_pos = x+w//4
            y_pos = y+h//2
            
            ## Add the label text to the image ##
            img_contour = cv2.putText(img_contour, label_name, org=(x_pos, y_pos), fontFace=cv2.FONT_HERSHEY_SIMPLEX,  
                    fontScale=3, color=(0, 0, 255), thickness=10) 
    

            ## Check for directory ##
            if(save_path != ""):
                create_dir(os.path.join(save_path, label_name))
                save_file = os.path.join(save_path, label_name, doc_name+f"_{label_name}_{i}.png")
                cv2.imwrite(save_file, img_object)
            
            #### almacenar caracteres en una lista ####
            objects_list.append(img_object)
            areas_list.append(area)
            #print(caracter_2.shape)


    #### imprimir imagen con los contornos dibujados ####
    # print_image(imgContour, save=False)
    # cv2.imwrite(doc_name+"_objects.png", img_contour)
    
    # print(f"caracteres encontrados: {len(objects_list)}")
    # print(f"Area minima: {min(areas_list)}, Area maxima: {max(areas_list)}, Area mean: {np.mean(areas_list)}.")
    
    #### retornar lista de los caracteres encontrados ####        
    # return img_contour, objects_list
    return img_contour, objects_list, areas_list, annotations_list

## Pipeline

In [3]:
#### documents path ####
## Images path ##
# doc_path = "../../Datasets/Objects_Database/Example_Documents"
doc_path = "../../Datasets/Objects_Database/test_documents"

dst_path = "../../Data/Classical_Object_Detection_Evaluation/"

create_dir(dst_path)

## Kernel proportions size ##
kp_list = [40]
## Std over mean as threshold ##
n_std = 0

selected_documents = os.listdir(doc_path)
print(f"Number of documents:{len(selected_documents)}")

#### Model information ####
model_path = "../../Datasets/Objects_Database/Classical_Model"
standarization_file = os.path.join(model_path, "std_selected_train_data_204(other_signature_logo).joblib")
model_file = os.path.join(model_path, "Model(SVM)_K(rbf)_C(100)_G(0.001)_HOG_Feat(756)_img(256, 512)_O(9)_C(64)_B(2)_N(L2-Hys).joblib")

## Load stadarizer ##
normalize_transfor = load(standarization_file)
## Load model ##
classifier = load(model_file)


# plt.figure(figsize=(15,10))

for kp in kp_list:
    
    ## Path to save results ##
    save_path = ""

    for i in tqdm(range(len(selected_documents))):
        
        doc_name = selected_documents[i]
        doc_file = os.path.join(doc_path, doc_name)
        
        # print(doc_name)
        # print(doc_file)
        ## Searching ##
        img_contour, objects_list, areas_list, annotations_list = object_segmentation_prediction(doc_file, classifier, normalize_transfor, save_path, 
                                                                                                kp, n_std, margin=0, lw=4)
        
        
        dst_file = os.path.join(dst_path, doc_name)
        cv2.imwrite(dst_file, img_contour)
        
        ## Make graphic ##
        # plt.subplot(1, len(selected_documents), i+1)
        # print_image(img_contour, title="Object proposal and classification")


# plt.tight_layout()
# plt.show()

Number of documents:34


100%|██████████| 34/34 [00:06<00:00,  5.08it/s]
