In [2]:
%load_ext autoreload
%autoreload 2

import os
import exercise3_config as config
from readers import ExtractImagePaths, ExtractTranscriptionsAsDictionary, ExtractTranscriptionsAsList, ExtractKeywords


transcriptions_as_dict = ExtractTranscriptionsAsDictionary()
transcriptions_as_list = ExtractTranscriptionsAsList()
keywords_to_search = ExtractKeywords()

# Extract paths for train and test files
train_images_numbers, train_jpg_paths, train_svg_paths = \
  ExtractImagePaths(os.path.join(config.DATA_ROOT_DIR, 'task/train.txt'))
test_images_numbers, test_jpg_paths, test_svg_paths = \
  ExtractImagePaths(os.path.join(config.DATA_ROOT_DIR, 'task/valid.txt'))

In [3]:
import cv2 as cv
import numpy as np
from PIL import Image
from image_preprocessing import CropAllWordImages


# TODO: remove this, apply OTSU. This method is here now just for the sake of testing the whole pipeline.
# Applies KMeans clustering with the given k on the image at the given path 
# and returns the output as a a PIL.Image.
def ApplyKMeansClusteringToImageFile(jpg_image_filename, k=2):
  original_image = cv.imread(jpg_image_filename)
  pixels = original_image.reshape((-1,3))
  pixels = np.float32(pixels)

  # Define criteria, number of clusters and apply KMeans.
  criteria = (cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER, 10, 1.0)
  _, label, center = cv.kmeans(pixels, k, None, criteria, 10, cv.KMEANS_RANDOM_CENTERS)

  # Convert back into uint8.
  center = np.uint8(center)
  final_image_pixels = center[label.flatten()]
  final_image_pixels = final_image_pixels.reshape((original_image.shape))

  return Image.fromarray(final_image_pixels)


# Applies binarization on the original image, then crops the words using the given mask.
def ExtractWordImagesFromOriginalImage(original_image_path, mask_path):
  image_after_binarization = ApplyKMeansClusteringToImageFile(original_image_path)
  return CropAllWordImages(image_after_binarization, mask_path)


# Extracts all words needed for train and test
# Output:
#  - train_words_per_image: dict containing all train words
#  - testt_words_per_image: dict containing all test words
# Entries in the output dictionaries are like dict[page_number] = list_of_words
def ExtractTrainAndTestWords(train_images_numbers, train_jpg_paths, train_svg_paths,
                             test_images_numbers, test_jpg_paths, test_svg_paths):
  train_words_per_image = {}
  # for index in range(len(train_images_numbers)):
  # TODO: iterate over all images. we can leave it like this for now for testing
  for index in range(1):
    train_words_per_image[train_images_numbers[index]] = ExtractWordImagesFromOriginalImage(train_jpg_paths[index], train_svg_paths[index])

  test_words_per_image = {}
  # for index in range(len(test_images_numbers)):
  # TODO: iterate over all images. we can leave it like this for now for testing
  for index in range(1):
    test_words_per_image[test_images_numbers[index]] = ExtractWordImagesFromOriginalImage(test_jpg_paths[index], test_svg_paths[index])

  return train_words_per_image, test_words_per_image

train_words_per_image, test_words_per_image = \
  ExtractTrainAndTestWords(train_images_numbers, train_jpg_paths, train_svg_paths,
                           test_images_numbers, test_jpg_paths, test_svg_paths)


In [28]:
from word import Word, TestWord
from feature_extraction import ExtractFeatures, Method


feature_extraction_methods = [Method.BLACK_PIXEL_RATIO]

# The following code is not nice but it works because transcriptions and train_words_per_image
# are in the same order.
train_words = []
index = 0
for _, train_words_per_current_image in train_words_per_image.items():
  for train_word in train_words_per_current_image:
    word = Word()
    word.image = train_word
    word.features = [e for v in ExtractFeatures(train_word, feature_extraction_methods) for e in v]
    word.id = transcriptions_as_list[index][0]
    word.transcription = transcriptions_as_list[index][1]
    index += 1
    train_words.append(word)


test_words = []
for image_number, test_words_per_current_image in test_words_per_image.items():
  # TODO: remove once we use all files
  while transcriptions_as_list[index][0].split("-")[0] != image_number:
    index += 1
  for test_word in test_words_per_current_image:
    word = TestWord()
    word.image = test_word
    word.features = [e for v in ExtractFeatures(test_word, feature_extraction_methods) for e in v]
    word.id = transcriptions_as_list[index][0]
    word.transcription = transcriptions_as_list[index][1]
    index += 1
    test_words.append(word)

In [31]:
# all_train_words = train_words
# all_test_words = test_words

train_words = train_words[:7]
test_words = test_words[:7]

221


In [32]:
import numpy as np
from dtaidistance import dtw


# Compute distances between all train and test words.
# Output:
#  - distances: matrix stored as dict of size n x m, where n = len(train_words) 
#    and m = len(test_words)
def ComputeDistances(train_words, test_words):
  distances = {}
  for train_word in train_words:
    distances_from_train_word = {}
    for test_word in test_words:
      distances_from_train_word[test_word.id] = dtw.distance(train_word.features, test_word.features)
    distances[train_word.id] = distances_from_train_word
  return distances


distances = ComputeDistances(train_words, test_words)

In [37]:
from keyword_spotting import FindClosestKnownWord


# For each test_word, find the train_word that is closest to it.
for test_word in test_words:
  test_word.closest_train_word, test_word.distance_to_closest_train_word = \
    FindClosestKnownWord(test_word, train_words, distances)

In [36]:
from keyword_spotting import SpotKeyword


# Search keywords.
for keyword_to_search in keywords_to_search:
  spotted_test_keywords = SpotKeyword(keyword_to_search, test_words)
  if len(spotted_test_keywords):
    print(keyword_to_search)
    print(spotted_test_keywords)
    for keyword in spotted_test_keywords:
      print(keyword.distance_to_closest_train_word)
      keyword.image.show()
    input()

I-n-s-t-r-u-c-t-i-o-n-s-s_pt
[<word.TestWord object at 0x7fc6dec9d880>]
O-r-d-e-r-s
[<word.TestWord object at 0x7fc6dec9d0a0>, <word.TestWord object at 0x7fc6dec9d820>, <word.TestWord object at 0x7fc6dec9d940>]


In [None]:
# TODO: write output file kws.csv (slide 24)