In [2]:
%load_ext autoreload
%autoreload 2

import os
import exercise3_config as config
from readers import ExtractImagePaths, ExtractTranscriptionsAsDictionary, ExtractTranscriptionsAsList, ExtractKeywords


transcriptions_as_dict = ExtractTranscriptionsAsDictionary()
transcriptions_as_list = ExtractTranscriptionsAsList()
keywords_to_search = ExtractKeywords()

# Extract paths for train and test files
train_images_numbers, train_jpg_paths, train_svg_paths = \
  ExtractImagePaths(os.path.join(config.DATA_ROOT_DIR, 'task/train.txt'))
test_images_numbers, test_jpg_paths, test_svg_paths = \
  ExtractImagePaths(os.path.join(config.DATA_ROOT_DIR, 'task/valid.txt'))

In [3]:
import cv2 as cv
import numpy as np
from PIL import Image
from image_preprocessing import CropAllWordImages


# TODO: remove this, apply OTSU. This method is here now just for the sake of testing the whole pipeline.
# Applies KMeans clustering with the given k on the image at the given path 
# and returns the output as a a PIL.Image.
def ApplyKMeansClusteringToImageFile(jpg_image_filename, k=2):
  original_image = cv.imread(jpg_image_filename)
  pixels = original_image.reshape((-1,3))
  pixels = np.float32(pixels)

  # Define criteria, number of clusters and apply KMeans.
  criteria = (cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER, 10, 1.0)
  _, label, center = cv.kmeans(pixels, k, None, criteria, 10, cv.KMEANS_RANDOM_CENTERS)

  # Convert back into uint8.
  center = np.uint8(center)
  final_image_pixels = center[label.flatten()]
  final_image_pixels = final_image_pixels.reshape((original_image.shape))

  return Image.fromarray(final_image_pixels)


# Applies binarization on the original image, then crops the words using the given mask.
def ExtractWordImagesFromOriginalImage(original_image_path, mask_path):
  image_after_binarization = ApplyKMeansClusteringToImageFile(original_image_path)
  return CropAllWordImages(image_after_binarization, mask_path)


# Extracts all words needed for train and test
# Output:
#  - train_words_per_image: dict containing all train words
#  - testt_words_per_image: dict containing all test words
# Entries in the output dictionaries are like dict[page_number] = list_of_words
def ExtractTrainAndTestWords(train_images_numbers, train_jpg_paths, train_svg_paths,
                             test_images_numbers, test_jpg_paths, test_svg_paths):
  train_words_per_image = {}
  # for index in range(len(train_images_numbers)):
  # TODO: iterate over all images. we can leave it like this for now for testing
  for index in range(1):
    train_words_per_image[train_images_numbers[index]] = ExtractWordImagesFromOriginalImage(train_jpg_paths[index], train_svg_paths[index])

  test_words_per_image = {}
  # for index in range(len(test_images_numbers)):
  # TODO: iterate over all images. we can leave it like this for now for testing
  for index in range(1):
    test_words_per_image[test_images_numbers[index]] = ExtractWordImagesFromOriginalImage(test_jpg_paths[index], test_svg_paths[index])

  return train_words_per_image, test_words_per_image

train_words_per_image, test_words_per_image = \
  ExtractTrainAndTestWords(train_images_numbers, train_jpg_paths, train_svg_paths,
                           test_images_numbers, test_jpg_paths, test_svg_paths)


In [4]:
from word import Word, TestWord


# The following code is not nice but it works because transcriptions and train_words_per_image
# are in the same order.
train_words = []
index = 0
for _, train_words_per_current_image in train_words_per_image.items():
  for train_word in train_words_per_current_image:
    word = Word()
    word.image = train_word
    word.id = transcriptions_as_list[index][0]
    word.transcription = transcriptions_as_list[index][1]
    index += 1
    train_words.append(word)
    # TODO: set features for each Word object


test_words = []
for image_number, test_words_per_current_image in test_words_per_image.items():
  # TODO: remove once we use all files
  while transcriptions_as_list[index][0].split("-")[0] != image_number:
    index += 1
  for test_word in test_words_per_current_image:
    word = TestWord()
    word.image = test_word
    word.id = transcriptions_as_list[index][0]
    word.transcription = transcriptions_as_list[index][1]
    index += 1
    test_words.append(word)
    # TODO: set features for each Word object

In [5]:
import numpy as np


# TODO: for each Word object, set features
# Compute distances between all train and test words.
# Output:
#  - distances: matrix stored as dict of size n x m, where n = len(train_words) 
#    and m = len(test_words)
def ComputeDistances(train_words, test_words):
  distances = {}
  for train_word in train_words:
    distances_from_train_word = {}
    for test_word in test_words:
      # TODO: Compute real distances
      distances_from_train_word[test_word.id] = 0
    distances[train_word.id] = distances_from_train_word
  return distances


distances = ComputeDistances(train_words, test_words)

In [20]:
from keyword_spotting import FindClosestKnownWord


# For each test_word, find the train_word that is closest to it.
for test_word in test_words:
  test_word.closest_train_word = FindClosestKnownWord(test_word, train_words, distances)

In [22]:
from keyword_spotting import SpotKeyword


# Search keywords.
for keyword_to_search in keywords_to_search:
  spotted_test_keywords = SpotKeyword(keyword_to_search, test_words)
  print(keyword_to_search)
  print(spotted_test_keywords)

A-l-e-x-a-n-d-r-i-a
[]
C-a-p-t-a-i-n
[]
C-l-o-t-h-e-s
[]
C-l-o-t-h-i-n-g
[]
C-o-l-o-n-e-l
[]
C-o-m-m-i-s_s-s-a-r-y
[]
C-o-u-r-t
[]
C-u-m-b-e-r-l-a-n-d
[]
D-i-c-k
[]
D-o-c-t-o-r
[]
E-n-s-i-g-n
[]
F-o-r-t
[]
F-o-r-t-s_pt
[]
F-r-e-d-e-r-i-c-k-s-b-u-r-g-h-s_cm
[]
G-e-o-r-g-e
[]
G-u-a-r-d
[]
I-n-s-t-r-u-c-t-i-o-n-s-s_pt
[]
J-o-h-n
[]
L-e-t-t-e-r-s
[]
L-i-e-u-t-e-n-a-n-t
[]
M-a-j-o-r
[]
M-r-s_pt
[]
O-f-f-i-c-e-r-s
[]
O-r-d-e-r
[]
O-r-d-e-r-s
[]
O-r-d-e-r-s-s_pt
[]
P-a-r-o-l-e
[]
R-e-c-r-u-i-t-s
[]
R-e-g-i-m-e-n-t
[]
R-e-g-i-m-e-n-t-s_pt
[]
R-e-n-d-e-z-v-o-u-s
[]
R-e-t-u-r-n
[]
R-o-b-e-r-t
[]
S-a-l-t
[]
S-e-r-g-e-a-n-t
[]
S-e-r-g-e-a-n-t-s_cm
[]
S-h-i-r-t-s-s_cm
[]
S-o-l-d-i-e-r-s
[]
S-t-e-w-a-r-t-s_cm
[]
S-t-o-r-e-s
[]
S-u-i-t-s
[]
V-i-r-g-i-n-i-a
[]
W-a-g-g-o-n-s
[]
W-a-s-h-i-n-g-t-o-n-s_cm
[]
W-i-n-c-h-e-s-t-e-r
[]
W-i-n-c-h-e-s-t-e-r-s_cm
[]
W-i-n-c-h-e-s-t-e-r-s_qo
[]
a-b-s-o-l-u-t-e-l-y
[]
a-r-r-i-v-e
[]
a-r-r-i-v-e-s_cm
[]
a-t-e-l-y
[]
c-a-m-p-s_pt
[]
c-a-r-e
[]
c-a-r-e-f-u-l
[]
c-a-r-