In [None]:
import os
import cv2
import random
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import itertools
import pickle
from sklearn.cluster import MeanShift
from scipy.spatial.distance import cdist

In [None]:
from skimage import io
from skimage.transform import rotate
from skimage.color import rgb2gray
from deskew import determine_skew
from matplotlib import pyplot as plt

In [None]:
import warnings
warnings.filterwarnings('ignore')
cv2.setRNGSeed(0)

In [None]:
def binarize_img(image,size=(1200,1600), n_iterations=1, return_inter=False):
    def deskew(image):
        grayscale = rgb2gray(image)
        angle = np.round(determine_skew(grayscale),2)
        if angle < 10 and angle > -10:
            return image
        rotated = (rotate(image, angle, resize=True) * 255).astype(np.uint8)
        return rotated
    # threshold
    image = deskew(image)
    image = cv2.resize(image,size,interpolation = cv2.INTER_LINEAR)
    img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    thresh = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,129,4)
    #erosion
    kernel = np.ones((3,3),np.uint8)
    erosion = ~thresh.copy()
    for i in range(n_iterations):
        erosion = cv2.erode(erosion,kernel,iterations = i)
    if return_inter:
        return [image, thresh, erosion]
    return erosion, image

    # image = cv2.resize(image,size,interpolation = cv2.INTER_LINEAR)
    # img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # thresh = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,129,4)
    # kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
    # opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
    # result = 255 - opening
    # if return_inter:
    #     return [image, opening, result]
    # return result, image

#generate_stats components into a dataframe
def generate_stats(totalLabels,stats, centroids):
    '''
    totalLabels,stats, centroids : cv2 connectedComponentsWithStats outputs
    '''
    columns = ['label_id', 'left','top','width','height','area','centroid_x','centroid_y']
    df = pd.DataFrame(columns=columns)
    df['label_id'] = [i for i in range(1,totalLabels+1)]
    df['left'] = stats[:,0]
    df['top'] = stats[:,1]
    df['width'] = stats[:,2]
    df['height'] = stats[:,3]
    df['area'] = stats[:,4]
    df['centroid_x'] = centroids[:,0]
    df['centroid_y'] = centroids[:,1]
    return df

#filter 
def filter_cv2_components(df, min_ar_thresh=1, max_ar_thresh=10,pixel_row_gap = 10):
    '''
    df : dataframe,
    min_ar_thresh : min aspect ratio threshold,
    max_ar_thresh : max aspect ratio threshold,
    pixel_row_gap : min gap b/w pixels to consider them in difft rows
    '''

    df['aspect_ratio'] = np.round(df['width']/df['height'],2)
    df = df.sort_values(by=['top', 'left']).reset_index(drop=True) #sort-by occurence

    #if it's not a word component, do not use it.
    # h_median =df['height'].median()
    # df = df[df['height'] > h_median].reset_index(drop=True)
    
    #calculate rowID, colID
    df['rowID'] = 1
    row_id = 1
    prev_val = df.loc[0, 'top']
    for idx in range(1, df.shape[0]):
        curr_val = df.loc[idx, 'top']
        if curr_val - prev_val > pixel_row_gap:
            row_id += 1
        prev_val = curr_val
        df.at[idx, 'rowID'] = int(row_id)
    df['columnID'] = df.groupby('rowID')['left'].rank().astype(int)
    return df


In [None]:
def build_single_lexicon(label_id, df, k=5):
    '''
    label_id : word component id, 
    df : dataframe
    k : #neighbors to build context
    '''
    if label_id not in df['label_id'].values:
        print("unknown component")
        return
    
    #filter by current component and extract candidate neighbors
    x,y,w,h,r,c = df[df['label_id'] == label_id][['centroid_x','centroid_y','width','height','rowID','columnID']].values[0]
    r1 = (df['rowID'] >= r-1)
    r2 = (df['rowID'] <= r+1)    
    temp_df = df[r1 & r2]
    temp_df = temp_df[temp_df['label_id'] != label_id]
    
    #rotation-invariance - TBD during preprocessing- deskew at start
    
    #scale-invariance
    temp_df['new_centroid_x'] = (temp_df['centroid_x']-x)/w
    temp_df['new_centroid_y'] = (y-temp_df['centroid_y'])/h
    temp_df['new_left'] = (temp_df['left']-x)/w
    temp_df['new_top'] = (y-temp_df['top'])/h
    temp_df['new_width'] = temp_df['width']/w
    temp_df['new_height'] = temp_df['height']/h
    
    #calculate distance & angle
    temp_df['Euclidean'] = np.sqrt(temp_df['new_centroid_y']**2 + temp_df['new_centroid_x']**2)
    temp_df['theta'] = np.degrees(np.arctan2(temp_df['new_centroid_y'], temp_df['new_centroid_x']))
    temp_df['theta'] = (temp_df['theta'] + 360) % 360 
    temp_df.loc[temp_df['theta'] > 350, 'theta'] = 0 #heuristic
    temp_df.loc[temp_df['theta'] < 2, 'theta'] = 0 #heuristic
    temp_df['quadrant'] = pd.cut(temp_df['theta'], 8, labels=range(1,9))
    
    #sort and retreive top-K, format required coordinates.
    res_df = temp_df.sort_values(by=['Euclidean','theta'])[:k] #sort and get top-k neighbors
    res_df = res_df.sort_values(by=['theta'])
    res_df['tl_corner'] = res_df.apply(lambda row: (row['new_left'], row['new_top']), axis=1)
    res_df['br_corner'] = res_df.apply(lambda row: (row['new_left']+row['new_width'], row['new_top']+row['new_height']), axis=1)           
    return res_df

In [None]:
def plot_all_ouptuts(img):
    image, opening, thresh = binarize_img(img,return_inter=True)
    totalLabels, labels, stats, centroid = cv2.connectedComponentsWithStats(thresh,4,cv2.CV_32S) # type: ignore #4 is for connectivity
    df = generate_stats(totalLabels,stats, centroid)
    df = filter_cv2_components(df)
    output = image.copy() 
    for comp in df['label_id'].values:
        x, y, w, h, area = df[df['label_id'] == comp][['left','top','width','height','area']].values[0]
        cv2.rectangle(output, (x, y), (x + w, y + h), (0, 255, 0), 2)
    
    fig, axs = plt.subplots(1, 4, figsize=(20, 6))
    axs[0].imshow(image)
    axs[0].set_title("image")
    axs[1].imshow(opening,cmap='gray')
    axs[1].set_title("opening")
    axs[2].imshow(thresh,cmap='gray')
    axs[2].set_title("thresh")
    axs[3].imshow(output)
    axs[3].set_title("components")
    plt.subplots_adjust(left=0, right=1, top=1, bottom=0, wspace=0.05, hspace=0.2)
    plt.show()

In [None]:
def build_context_layout(img,k_neigh = 5):
    '''
    img : image, 
    k_neigh : #neighbors to build context
    '''
    #preprocess and transform image
    thresh, img = binarize_img(img)
    totalLabels, labels, stats, centroid = cv2.connectedComponentsWithStats(thresh,4,cv2.CV_32S)
    df = generate_stats(totalLabels,stats, centroid)
    df = filter_cv2_components(df)
    ar_median = df['aspect_ratio'].median() #filter for smaller-words
    
    #extract context for each word component
    context_vectors,context_coordinates = [],[]
    for label in df.label_id.values:
        try:
            #if it's a small word, do not use it for indexing
            # if(df[df['label_id'] == label]['aspect_ratio'].values[0] < ar_median): 
            #     continue
            res = build_single_lexicon(label, df,k=k_neigh)
            if(res.shape[0] >= k_neigh):
                context_vectors.append(res[['tl_corner','br_corner']].values)
                coords = df[df['label_id'] == label][['centroid_x','centroid_y']].values
                context_coordinates.append(coords)
        except Exception as e:
            print(e)
    # format into a set of 4/quadrapules
    flattened_tuples = [np.array([item for sublist in t for item in sublist]) for t in context_vectors]
    array_of_tuples = np.array(flattened_tuples)
    context_vectors = array_of_tuples.reshape(len(context_vectors), k_neigh, 4) # 4 is for (TL-x,TL-y,BR-x,BR-y )
    context_vectors = context_vectors.round(4)
    #print("total no of contexts extracted:",context_vectors.shape[0])
    return context_vectors,context_coordinates, img

In [None]:
class DocumentDetails():
    def __init__(self, img,feature_vectors,label_coordinates, img_path):
        self.img = img
        self.img_path = img_path
        self.feature_vectors = feature_vectors 
        self.label_coordinates = label_coordinates #saves each word coordinates indexed wrt to feature_vectors
        
class DocumentVectors():
    def __init__(self, img_folder_path):
        
        img_files = [file for file in os.listdir(img_folder_path) if file.endswith('.png')] #filter only image files
        img_file_paths = [os.path.join(img_folder_path, file_name) for file_name in img_files] #generate filepaths for all images
        
        #initialize variables
        self.img_file_paths = img_file_paths
        self.docs = {} #save all document details
        self.context_index = {} #save reduced context-vector-layouts index

    #for all images, extract context layouts.
    def extract_context_vectors(self, show_inter=0):
        for id,img_path in enumerate(self.img_file_paths):
            img = cv2.imread(img_path)
            feature_vectors,label_coordinates, img = build_context_layout(img)
            print(img_path,id, img.shape, feature_vectors.shape,len(label_coordinates))
            self.docs[id] = DocumentDetails(img,feature_vectors,label_coordinates, img_path)
            if show_inter:
                plot_all_ouptuts(img)

    
    #build index from extracted context layouts
    def build_context_index(self, ms=True, index_file_name='reduced_context_index2.npy'):
        #if index already exists, load it. else build it and save.
        if os.path.exists(index_file_name):
            index = np.load(index_file_name,allow_pickle=True)
            self.context_index = dict(index.flatten()[0])
            print("loaded prebuilt index:",index_file_name,"in total:",len(self.context_index))
            return
        #build a normal index with ctxlayout->docid pairs
        index = {}
        for id in self.docs:
            cv_all = tuple(self.docs[id].feature_vectors)
            for cv in cv_all:
                index[tuple(list(cv.reshape(-1)))] = id
        print("extracted all context vectors, in total: ",len(index))

        if ms: #if ms is True, then perform Mean shift clustering to reduce index size
            #reduce cl collection with Mean shift clustering
            layouts = list(index.keys())
            clusterer = MeanShift()
            clusters = clusterer.fit(layouts) # type: ignore
            cluster_labels = clusters.labels_
            cluster_centers = clusters.cluster_centers_

            #build new reduced ctxlayout-centre->[list of doc-ids] index
            reduced_index = {}
            for i in range(0,len(cluster_labels)):
                cluster = cluster_labels[i]
                centroid = tuple(cluster_centers[cluster])
                doc = index[layouts[i]]
                if centroid in reduced_index:
                    reduced_index[centroid].add(doc)
                else:
                    reduced_index[centroid] = set([doc])

            self.context_index = reduced_index
            print("built mean-shift reduced-index, in total: ",len(reduced_index))
        else:
            self.context_index = index
            print("built full-index, in total: ",len(index))
        
        np.save(index_file_name, self.context_index)
        print("saved index, as ",index_file_name)

    #display all documents in the database
    def show_docs(self):
        for id in self.docs:
            img = self.docs[id].img
            plot_all_ouptuts(img)     

    #given query img, retreive relevant document

In [None]:
docs_path = './data/docs/'
save_path = "./dv_class.pickle"

if os.path.exists(save_path):
    dv = pickle.load(open(save_path, "rb", -1))
    print("loaded saved class from {}".format(save_path))
else:     
    dv = DocumentVectors(docs_path)
    dv.extract_context_vectors(show_inter=1)
    dv.build_context_index()
    with open(save_path, "wb") as file:
        pickle.dump(dv, file, -1)
    print("saved class object at {}".format(save_path))

In [None]:
dv.show_docs()

In [None]:
def random_quarter_crop(img):
    img_width, img_height,_ = img.shape
    max_left = img_width - img_width // 2
    max_top = img_height - img_height // 2
    left = random.randint(0, max_left)
    top = random.randint(0, max_top)
    crop_width = img_width // 2
    crop_height = img_height // 2
    cropped_img = img[left:left + crop_width, top:top + crop_height]
    return cropped_img

In [None]:
docs_path = './data/docs/'
img_id = 14
img_path = docs_path + str(img_id) + '.png'
print(img_path)
img = cv2.imread(img_path)
img_c = random_quarter_crop(img)
plt.imshow(img_c)

In [None]:
plot_all_ouptuts(img_c)

In [None]:
def custom_plot_matches2(q, d, m1, m2):
        ''' 
        q: query image,
        d: document image, 
        m1: matched point-set of query ,
        m2: matched point-set of document,
        '''
        point_img = np.full((d.shape[0],d.shape[1]*2,3),255) #draw and image of 2* doc-image
        point_img[:q.shape[0],:q.shape[1],:] = q #fill top-left with query
        point_img[:d.shape[0],d.shape[1]:,:] = d #fill top-right with document
        point_img = point_img.astype('uint8')

        output_image = point_img.copy()
        for match1, match2 in zip(m1, m2):
            #difference between a match should be in range of (mean_centroid-threshold, mean_centroid+threshold)
            (x1, y1) = match1
            (x2, y2) = match2
            x2 = x2+d.shape[1]
            color = (0, 255, 0)
            cv2.circle(output_image, (int(x1),int(y1)), radius=8, color=color, thickness=5)
            cv2.circle(output_image, (int(x2),int(y2)), radius=8, color=color, thickness=5)
            cv2.line(output_image, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
        
        plt.title('point-matches')
        plt.imshow(output_image)
        plt.show()

def custom_plot_matches(q, d, m1, m2):
        ''' 
        q: query image,
        d: document image, 
        m1: matched point-set of query ,
        m2: matched point-set of document,
        '''
        point_img = np.full((d.shape[0],d.shape[1]*2,3),255) #draw and image of 2* doc-image
        point_img[:q.shape[0],:q.shape[1],:] = q #fill top-left with query
        point_img[:d.shape[0],d.shape[1]:,:] = d #fill top-right with document
        point_img = point_img.astype('uint8')

        points = abs(m2-m1) #distribution of differences between matched coordinate pairs
        mean_centroid = np.mean(points, axis=0) #mean centroid coordinate difference
        threshold = np.std(points, axis=0) # threshold - std deviation of difference
        
        output_image = point_img.copy()
        for match1, match2 in zip(m1, m2):
            #difference between a match should be in range of (mean_centroid-threshold, mean_centroid+threshold)
            diff = np.round(abs(match2-match1)) 
            c1 = np.all(diff > mean_centroid-threshold)
            c2 = np.all(diff < mean_centroid+threshold)
            (x1, y1) = match1
            (x2, y2) = match2
            x2 = x2+d.shape[1]
            color = (0, 255, 0)
            if(c1 and c2):
                color = (0, 255, 0)
            else:
                color = (255, 0, 0)
            cv2.circle(output_image, (int(x1),int(y1)), radius=8, color=color, thickness=5)
            cv2.circle(output_image, (int(x2),int(y2)), radius=8, color=color, thickness=5)
            cv2.line(output_image, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)

        plt.title('point-matches')
        plt.imshow(output_image)
        plt.show()

def point_match(query_img,doc_id,n_matches=30):
        '''
        query_img: query img,
        id : doc-id
        '''
        #build context-layout for query_img
        query_vectors,query_coordinates,img = build_context_layout(query_img)
        doc_vectors,doc_coordinates,doc_img = [],[], None

        #extract context-layout(actual, not reduced index) for doc=id
        for id in dv.docs:
            if(id == doc_id):
                doc_vectors,doc_coordinates,doc_img = dv.docs[id].feature_vectors,dv.docs[id].label_coordinates,dv.docs[id].img
                break

        #for each query vector, find nearest match of context-vector,
        #and then extract corresponding coordinate pairs from query & doc
        matches1,matches2 = [],[]
        for i in range(len(query_vectors)):
            #get min-distance corresponding context layout
            dist = np.linalg.norm(doc_vectors - query_vectors[i, np.newaxis, :, :], axis=(1, 2))
            closest_index = np.argmin(dist)
            query_coords = query_coordinates[i]
            doc_coords = doc_coordinates[closest_index]
            #append matches to return
            matches1.append(query_coords[0])
            matches2.append(doc_coords[0])
            if len(matches1) >= n_matches:
                break
        # custom_plot_matches(query_img,doc_img,np.round(matches1,2),np.round(matches2,2))
        custom_plot_matches(img,doc_img,np.round(matches1,2),np.round(matches2,2))
            

In [None]:
#get orientation for a point set.
def get_orientation(matrix):
   '''
   matrix : numpy 2d array
   '''
   determinant = np.linalg.det(matrix)
   orientation = np.sign(determinant) 
   return orientation

#match/verify orientation for 2 point sets.
def verfiy_orientation(P1,P2):
    '''
    P1 : numpy 2d array,
    P2 : numpy 2d array,
    '''
    s1 = get_orientation(P1)
    s2 = get_orientation(P2)
    verif = s1 * s2
    return int(verif)

#triplet-verification
def verification(candidates, query_vectors, query_coordinates, m_matches=30, threshold_score=1000):
        '''
        candidates: candidate doc-id's,
        query_coordinates: geometric coordinates of word components in query img,
        m_matches : no of matched pairs to consider for  verification
        '''
        triplet_scores= {}
        query_coordinates = np.array(query_coordinates).reshape(len(query_coordinates),2).round(2)
        curr_score = -1
        curr_matches = None
        res_path = None
        #for each candidate doc, build triplet score
        for cd in candidates:  
            dc,doc_vectors, path = dv.docs[cd].label_coordinates, dv.docs[cd].feature_vectors, dv.docs[cd].img_path
            doc_coords = np.array(dc).reshape(len(dc),2).round(2)
            doc_vectors = np.array(doc_vectors).reshape(len(doc_vectors),20).round(2)
            #extract pair-wise distances for each cooridinate pair from query, candidate
            distances = np.round(cdist(query_vectors, doc_vectors, metric='euclidean'),2) 
            #build a graph of cooridinate pair from query, candidate with edge-length as distance between them
            edges_dict = {(tuple(query_coordinates[i]), tuple(doc_coords[j])): distances[i, j] for i in range(query_coordinates.shape[0]) for j in range(doc_coords.shape[0])}
            #sort the graph by min edge lengths and extract the matched coordiante pairs from query, candidate
            min_edges = sorted(edges_dict.items(), key=lambda x: x[1])
            # print(distances.shape, query_vectors.shape,doc_vectors.shape )

            #extract top m_matches of point-sets of matched items from query, candidate and verify for 1-1 correspondence
            triplets,count = [],1
            p1_set,p2_set = set(),set()
            for edge in min_edges:
                P1,P2,d = edge[0][0],edge[0][1], edge[1]
                if(P1 not in p1_set) and (P2 not in p2_set):
                    p1_set.add(P1)
                    p2_set.add(P2)
                    triplets.append((P1,P2))
                    count += 1
                if count >= m_matches:
                    break
            
            #build triplet sets of top m_matches of pairs.
            triplet_combinations = np.array(list(itertools.combinations(triplets, 3)))
            #extract  matche scores for each triplet combination and sum up to make final score
            matches_score = 0
            #for each triplet coordinate-sets, verfiy orientation
            for elm in triplet_combinations:
                s1 = np.concatenate([elm[:,0,:], np.ones((elm[:,0,:].shape[0], 1))], axis=1) #2,3 => 3,3 matrix
                s2 = np.concatenate([elm[:,1,:], np.ones((elm[:,1,:].shape[0], 1))], axis=1) #2,3 => 3,3 matrix
                matches_score += verfiy_orientation(s1,s2) #
            triplet_scores[cd] = matches_score # final triplet matches score for candidate cd
            if(matches_score > curr_score):
                 curr_score = matches_score
                 curr_matches = triplets
                 res_path = path


        print("triplet verification scores:",sorted(triplet_scores.items(), key=lambda x:x[1], reverse=True))

        #sort by matches scores and return top-one.
        results = sorted(triplet_scores, key=triplet_scores.get, reverse=True)
        print("top triplet-score candidates:", results)
        if curr_score < threshold_score:
            return -1,0,0
        print("final retreived result document ID:", results[0])
        return results[0], curr_matches, res_path
    
    #verify 1-1 point match between a query img and a given doc-id


In [None]:
def query(query_img, max_candidates=10,min_qry_vectors=10):
        '''
        query_img: query img,
        max_candidates: no of candidates for triplet verification
        '''
        #get query-img context layouts
        query_vectors,query_coordinates,img = build_context_layout(query_img)
        if len(query_vectors)<min_qry_vectors: #return if <10 query vectors found
            print("found only",len(query_vectors),"query vectors. cannot query with this img. try with a better image")
            return -1
        print("found",len(query_vectors),"query vectors")
        query_vectors = query_vectors.reshape(len(query_vectors),20)

        #get matchings-count score
        index = np.array(list(dv.context_index.keys()))
        docs = np.array(list(dv.context_index.values()))
        coverage_scores = {}
        distances = cdist(query_vectors, index) #extract distances between each pair of index-vectors,query-vectors 
        best_match_indices = np.argmin(distances, axis=1) #filter best matches of pairs
        print(best_match_indices,query_vectors.shape, index.shape)
        for bm in best_match_indices: #build coverage scores(no of matched index-vectors) for each document.
            for elm in set(docs[bm]):
                if elm in coverage_scores:
                    coverage_scores[elm] += 1
                else:
                    coverage_scores[elm] = 0
        print("layout coverage scores:",sorted(coverage_scores.items(), key=lambda x:x[1],reverse=True))

        #sort by matchings-count/coverage scores & filter top max_candidates
        candidates = sorted(coverage_scores, key=coverage_scores.get, reverse=True)#[:max_candidates]
        print("top layout-match candidates : ", candidates)
        doc_id, matches, doc_path = verification(candidates, query_vectors, query_coordinates)
        if doc_id == -1:
            print("looks like the document is not present in the database")
            return -1
        matches = np.array(matches)
        m1 = matches[:,0,:]
        m2 = matches[:,1,:]
        print(doc_path)
        
        # point_match(query_img, doc_id)
        doc_img = dv.docs[doc_id].img
        custom_plot_matches2(img,doc_img,np.round(m1,2),np.round(m2,2))


In [None]:
print("ground truth: ", img_id)
query(img_c)

In [None]:
point_match(img_c, 5)

In [None]:
q2 = cv2.imread("./data/queries/2.jpeg")
plt.imshow(q2)

In [None]:
plot_all_ouptuts(q2)

In [None]:
print("ground truth: ", 1)
query(q2)

In [None]:
point_match(q2, 12)

In [None]:
q12 = cv2.imread("./data/queries/2_CS.jpeg")
plt.imshow(q12)

In [None]:
plot_all_ouptuts(q12)

In [None]:
print("ground truth: ", 1)
query(q12)

In [None]:
point_match(q12, 12)

In [None]:
def extract_ROI(image):
    
    gray = cv2.cvtColor(image.copy(), cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (3, 3), 0)
    thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

    # Remove horizontal lines
    horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (25,1))
    detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=1)
    cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    for c in cnts:
        cv2.drawContours(thresh, [c], -1, 0, -1)

    # Dilate to merge into a single contour
    vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2,30))
    dilate = cv2.dilate(thresh, vertical_kernel, iterations=3)

    # Find contours, sort for largest contour and extract ROI
    cnts, _ = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2:]
    cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:-1]
    c = cnts[0]
    x,y,w,h = cv2.boundingRect(c)
    # cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 4)
    ROI = image[y:y+h, x:x+w]
    return ROI
# plt.imshow(extract_ROI(q4))

def deskew(image):
    grayscale = rgb2gray(image)
    angle = determine_skew(grayscale)
    print("angle:", angle)
    rotated = (rotate(image, angle, resize=True) * 255).astype(np.uint8)
    return rotated
# plt.imshow(deskew(extract_ROI(q4)))

def normalize_image(image, target_width=600, target_height=800):
    original_height, original_width = image.shape[:2]
    scale_x = target_width / original_width
    scale_y = target_height / original_height
    scaled_image = cv2.resize(image, (target_width, target_height))
    return scaled_image
# plt.imshow(normalize_image(q4))

def preprocess2(img):
    img = extract_ROI(img)
    img = deskew(img)
    img= normalize_image(img)
    return img

In [None]:
q3 = cv2.imread('./data/queries/cam_cap.jpeg')
plt.imshow(q3)

In [None]:
plot_all_ouptuts(q3)

In [None]:
print("ground truth: ", 10)
query(q3)

In [None]:
q33 = preprocess2(q3)
plt.imshow(q33)

In [None]:
plot_all_ouptuts(q33)

In [None]:
print("ground truth: ", 10)
query(q33)

In [None]:
query_44 = cv2.imread(".//data/queries/query_31.png")
plt.imshow(query_44)

In [None]:
plot_all_ouptuts(query_44)

In [None]:
print("ground truth: ", 31)
query(query_44)

In [None]:
q44 = preprocess2(query_44)
plt.imshow(q44)

In [None]:
plot_all_ouptuts(q44)

In [None]:
print("ground truth: ", 31)
query(q44)

In [None]:
query_5 = cv2.imread("./data/queries/d1.jpg")
plt.imshow(query_5)

In [None]:
plot_all_ouptuts(query_5)

In [None]:
print("ground truth: ", None)
query(query_5)

In [None]:
q55 = preprocess2(query_5)
plt.imshow(q55)

In [None]:
plot_all_ouptuts(q55)

In [None]:
print("ground truth: ", None)
query(q55)