In [1]:
# Own Libraries
from utils.plotting import plot_image_and_similar
from utils.data import DataManager
from utils.metrics import prec_recall, iou_score, f1_dice
from utils.similarity import Similarity
from utils.image_processing import image_to_windows, get_3d_norm_histogram, calculate_histograms,text_removal,text_reading

# 3rd Party Libraries
from skimage.io import imread
from skimage.color import rgb2gray
from typing import Tuple, List
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
import pickle
import ml_metrics as metrics
import pytesseract
import re
import textdistance

In [2]:
data_manager = DataManager()
db, db_files = data_manager.load_data(folder = "./data/BBDD/", extension = ".jpg", desc = "Loading BBDD Data...")
qsd1_w2, qsd1_w2_files = data_manager.load_data(folder = "./data/qsd1_w2/", extension = ".jpg", desc = "Loading qsd1_w2 Data...")
qsd1_w3, qsd1_w3_files = data_manager.load_data(folder = "./data/qsd1_w3/non_augmented/", extension = ".jpg", desc = "Loading qsd1_w3 Data...")

Loading BBDD Data...: 100%|██████████████████████████████████████████████████████████| 287/287 [00:12<00:00, 23.55it/s]
Loading qsd1_w2 Data...: 100%|████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 168.96it/s]

./data/BBDD/ read: 287 images



Loading qsd1_w3 Data...: 100%|████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 345.75it/s]

./data/qsd1_w2/ read: 30 images





./data/qsd1_w3/non_augmented/ read: 30 images


In [3]:
BBDD_texts = data_manager.load_text(folder = "./data/BBDD/", extension = ".txt", desc = "Loading BBDD Text...")
print(BBDD_texts)

Loading BBDD Text...: 100%|████████████████████████████████████████████████████████| 287/287 [00:00<00:00, 2546.66it/s]


[('./data/BBDD/bbdd_00000.txt', "('Victor Perez-Porros', 'Des-li-zan-tes')\n"), ('./data/BBDD/bbdd_00001.txt', "('Hugo Demarco', 'Reflecting Room')\n"), ('./data/BBDD/bbdd_00002.txt', '\n'), ('./data/BBDD/bbdd_00003.txt', "('Edvard Munch', 'Youth')\n"), ('./data/BBDD/bbdd_00004.txt', "('Martin Carral', 'Ciclo espacial XXIV')\n"), ('./data/BBDD/bbdd_00005.txt', "('Mario Pasqualotto', 'Els 72 noms de Deu')\n"), ('./data/BBDD/bbdd_00006.txt', "('Gerard Sala', 'S/t')\n"), ('./data/BBDD/bbdd_00007.txt', "('Yago Hortal', 'Kl 45')\n"), ('./data/BBDD/bbdd_00008.txt', "('Josep Guinovart', 'S/t')\n"), ('./data/BBDD/bbdd_00009.txt', '\n'), ('./data/BBDD/bbdd_00010.txt', "('Agusti Puig', 'Sota la lleona')\n"), ('./data/BBDD/bbdd_00011.txt', "('Joan Hernandez Pijuan', 'Serie violetes i verds')\n"), ('./data/BBDD/bbdd_00012.txt', "('Regina Gimenez', 'Navegants')\n"), ('./data/BBDD/bbdd_00013.txt', "('Pere Santilari', 'Berenar de pastisseria')\n"), ('./data/BBDD/bbdd_00014.txt', '\n'), ('./data/BBDD/

In [4]:
BBDD_titles = data_manager.extract_title(data=BBDD_texts,index=1,desc='Extracting titles from DDBB...')
print(BBDD_titles)

Extracting titles from DDBB...: 100%|█████████████████████████████████████████████| 287/287 [00:00<00:00, 47945.40it/s]

[' Des-li-zan-tes', ' Reflecting Room', '', ' Youth', ' Ciclo espacial XXIV', ' Els 72 noms de Deu', ' St', ' Kl 45', ' St', '', ' Sota la lleona', ' Serie violetes i verds', ' Navegants', ' Berenar de pastisseria', '', ' Guitarra vermella ', ' Spring in the Desert', '', '', '', ' St', ' Passeig de Gracia', ' Pierfont Mauler', ' The Absinthe Drinker', ' Tracant lhorari', ' Composicio', ' Lungo-Mare III', ' Existo luego existo', ' Xoc-roig', '', ' Rosae Cranium', ' Vallauris', ' Estrella apagada', '', ' Peleas domesticas', ' Mentre esclataven llampecs i retrunys de trons', '', ' Contemplacio de lobra dona nua al museu', '', ' Retrat dAlba', ' Branches in Autumn', ' Cadira florida', ' Tot allo que ens separa i ens fara desapareixer', ' Esbos per a un retaule', ' Barcelona XVII', ' Pintura sobre', ' To get lost I', ' Quatre figues verdes', ' Lalmirall Byron', '', ' St', ' Estocolm', ' Bodego I pastisseria', ' 12XI2009', ' Yellow Earth and Silver Sky', ' 4IV2011', '', ' Untitled', ' Les ro




In [5]:
# Mention the installed location of Tesseract-OCR in your system
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
detected_titles = text_reading(data=qsd1_w2,num_images=1,desc='Reading titles from images')
    
     

Reading titles from images: 100%|██████████████████████████████████████████████████████| 30/30 [00:09<00:00,  3.02it/s]


In [6]:
print(detected_titles)

['sBodego IV', 'Paisatge', 'Noia al mirallj', 'Les roses', '2 segons abans del traspas', 'Lialmirall Byron', 'iSt', 'Port de mar Eivissa', ' Pintura sobre Ll ', '7 Unknown a', ' ', 'i Unknown ', 'Suite diabolica n 5 ', ' ', ' Mercat a la plaa de Manresa a', 'Rastres i transparencies ', 'Bodego  pastisseria M', ' ', '', ' Bodego ', ' ', 'The Stroll', 'Cadira florida', 'Tol al re mmoxe aimoxe Kel amt ge e', 'Te de nit', 'eLa meva mare', 's Carrer de la Plateriall', 'rie violetes i verds', 'Suite Mediterrania ', '']


In [7]:
sim = Similarity()

qsd1_w2_similarities = sim.compute_string_similarities(detected_titles,BBDD_titles,desc='Computing string similarity', similarity='levenshtein')

Computing string similarity: 100%|█████████████████████████████████████████████████████| 30/30 [00:05<00:00,  5.88it/s]


In [8]:
print(qsd1_w2_similarities)

[[0.06666667 0.125      0.         ... 0.         0.17391304 0.08333333]
 [0.2        0.125      0.         ... 0.         0.08695652 0.16666667]
 [0.         0.         0.         ... 0.         0.2173913  0.13333333]
 ...
 [0.2        0.2        0.         ... 0.         0.17391304 0.25      ]
 [0.15789474 0.15789474 0.         ... 0.         0.08695652 0.15789474]
 [0.         0.         1.         ... 1.         0.         0.        ]]


In [9]:
top_k_qsd1_w2 = sim.get_top_k(qsd1_w2_similarities, db_files, 1, desc='Extracting top 10')

Extracting top 10: 100%|██████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 546.94it/s]

[77]
[0.9]
[0.9]
[77]
[225]
[0.8888888888888888]
[0.8888888888888888]
[225]
[130]
[0.8666666666666667]
[0.8666666666666667]
[130]
[58]
[0.9]
[0.9]
[58]
[219]
[0.962962962962963]
[0.962962962962963]
[219]
[48]
[0.875]
[0.875]
[48]
[280]
[0.6666666666666667]
[0.6666666666666667]
[280]
[248]
[0.5263157894736843]
[0.5263157894736843]
[248]
[45]
[0.7777777777777778]
[0.7777777777777778]
[45]
[270]
[0.7272727272727273]
[0.7272727272727273]
[270]
[280]
[0.33333333333333337]
[0.33333333333333337]
[280]
[270]
[0.8]
[0.8]
[270]
[91]
[0.9]
[0.9]
[91]
[280]
[0.33333333333333337]
[0.33333333333333337]
[280]
[259]
[0.9333333333333333]
[0.9333333333333333]
[259]
[221]
[0.92]
[0.92]
[221]
[52]
[0.8095238095238095]
[0.8095238095238095]
[52]
[280]
[0.33333333333333337]
[0.33333333333333337]
[280]
[284]
[1.0]
[1.0]
[284]
[272]
[0.875]
[0.875]
[272]
[280]
[0.33333333333333337]
[0.33333333333333337]
[280]
[260]
[0.9090909090909091]
[0.9090909090909091]
[260]
[41]
[0.9333333333333333]
[0.9333333333333333]
[




In [10]:
print(top_k_qsd1_w2)

[['./data/BBDD/bbdd_00077.jpg'], ['./data/BBDD/bbdd_00225.jpg'], ['./data/BBDD/bbdd_00130.jpg'], ['./data/BBDD/bbdd_00058.jpg'], ['./data/BBDD/bbdd_00219.jpg'], ['./data/BBDD/bbdd_00048.jpg'], ['./data/BBDD/bbdd_00280.jpg'], ['./data/BBDD/bbdd_00248.jpg'], ['./data/BBDD/bbdd_00045.jpg'], ['./data/BBDD/bbdd_00270.jpg'], ['./data/BBDD/bbdd_00280.jpg'], ['./data/BBDD/bbdd_00270.jpg'], ['./data/BBDD/bbdd_00091.jpg'], ['./data/BBDD/bbdd_00280.jpg'], ['./data/BBDD/bbdd_00259.jpg'], ['./data/BBDD/bbdd_00221.jpg'], ['./data/BBDD/bbdd_00052.jpg'], ['./data/BBDD/bbdd_00280.jpg'], ['./data/BBDD/bbdd_00284.jpg'], ['./data/BBDD/bbdd_00272.jpg'], ['./data/BBDD/bbdd_00280.jpg'], ['./data/BBDD/bbdd_00260.jpg'], ['./data/BBDD/bbdd_00041.jpg'], ['./data/BBDD/bbdd_00242.jpg'], ['./data/BBDD/bbdd_00274.jpg'], ['./data/BBDD/bbdd_00160.jpg'], ['./data/BBDD/bbdd_00104.jpg'], ['./data/BBDD/bbdd_00011.jpg'], ['./data/BBDD/bbdd_00223.jpg'], ['./data/BBDD/bbdd_00284.jpg']]


In [11]:
SAVE_RESULTS =False
K=1
predicted_results = data_manager.save_results_1(results = top_k_qsd1_w2, path = "./week3/QSD1W2/method1", save = SAVE_RESULTS) # FOR QS WITH 1 PAINTING
expected_results = pickle.load(file = open('./data/qsd1_w2/gt_corresps.pkl', "rb"))

metric = metrics.mapk(actual = expected_results, predicted = predicted_results, k=K)
print("MAP@{} Score: {:.4f}% ({}/{})".format(K, metric*100,int(len(predicted_results)*metric),len(predicted_results)))

MAP@1 Score: 66.6667% (20/30)
