In [14]:
import os
import exercise3_config as config


# Output:
#  - list of size n containing image numbers
#  - list of size n containing paths to original jpg images
#  - list of size n containing paths to svg masks
def ExtractImagePaths(text_file_path):
  images_numbers = []
  jpg_paths = []
  svg_paths = []

  with open(text_file_path) as file:
    for image_number in file.readlines():
      images_numbers.append(str(int(image_number)))
      jpg_image_path = os.path.join(config.DATA_ROOT_DIR, 'images/' + str(int(image_number)) + '.jpg')
      jpg_paths.append(jpg_image_path)
      svg_image_path = os.path.join(config.DATA_ROOT_DIR, 'ground-truth/locations/' + str(int(image_number)) + '.svg')
      svg_paths.append(svg_image_path)

  return images_numbers, jpg_paths, svg_paths


# Extract paths for train and test files
train_images_numbers, train_jpg_paths, train_svg_paths = \
  ExtractImagePaths(os.path.join(config.DATA_ROOT_DIR, 'task/train.txt'))
test_images_numbers, test_jpg_paths, test_svg_paths = \
  ExtractImagePaths(os.path.join(config.DATA_ROOT_DIR, 'task/valid.txt'))

In [None]:
import cv2 as cv
import numpy as np
from PIL import Image
from image_preprocessing import CropAllWordImages


# TODO: remove this, apply OTSU. This method is here now just for the sake of testing the whole pipeline.
# Applies KMeans clustering with the given k on the image at the given path 
# and returns the output as a a PIL.Image.
def ApplyKMeansClusteringToImageFile(jpg_image_filename, k=2):
  original_image = cv.imread(jpg_image_filename)
  pixels = original_image.reshape((-1,3))
  pixels = np.float32(pixels)

  # Define criteria, number of clusters and apply KMeans.
  criteria = (cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER, 10, 1.0)
  _, label, center = cv.kmeans(pixels, k, None, criteria, 10, cv.KMEANS_RANDOM_CENTERS)

  # Convert back into uint8.
  center = np.uint8(center)
  final_image_pixels = center[label.flatten()]
  final_image_pixels = final_image_pixels.reshape((original_image.shape))

  return Image.fromarray(final_image_pixels)


def ExtractWordImagesFromOriginalImage(original_image_path, mask_path):
  image_after_binarization = ApplyKMeansClusteringToImageFile(original_image_path)
  return CropAllWordImages(image_after_binarization, mask_path)


# Extract all words needed for train and test
train_words_per_image = {}
for index in range(len(train_images_numbers)):
  train_words_per_image[train_images_numbers[index]] = ExtractWordImagesFromOriginalImage(train_jpg_paths[index], train_svg_paths[index])
print(train_words_per_image)

test_words_per_image = []

In [None]:
# Ground truth
# TODO: read from file (the TODO is in the transcription_reader.py file)

In [17]:
# Extract keywords.
keywords = []

keywords_path = os.path.join(config.DATA_ROOT_DIR, 'task/keywords.txt')
with open(keywords_path) as keywords_file:
  for line in keywords_file.readlines():
    keywords.append(line.strip())

['A-l-e-x-a-n-d-r-i-a', 'C-a-p-t-a-i-n', 'C-l-o-t-h-e-s', 'C-l-o-t-h-i-n-g', 'C-o-l-o-n-e-l', 'C-o-m-m-i-s_s-s-a-r-y', 'C-o-u-r-t', 'C-u-m-b-e-r-l-a-n-d', 'D-i-c-k', 'D-o-c-t-o-r', 'E-n-s-i-g-n', 'F-o-r-t', 'F-o-r-t-s_pt', 'F-r-e-d-e-r-i-c-k-s-b-u-r-g-h-s_cm', 'G-e-o-r-g-e', 'G-u-a-r-d', 'I-n-s-t-r-u-c-t-i-o-n-s-s_pt', 'J-o-h-n', 'L-e-t-t-e-r-s', 'L-i-e-u-t-e-n-a-n-t', 'M-a-j-o-r', 'M-r-s_pt', 'O-f-f-i-c-e-r-s', 'O-r-d-e-r', 'O-r-d-e-r-s', 'O-r-d-e-r-s-s_pt', 'P-a-r-o-l-e', 'R-e-c-r-u-i-t-s', 'R-e-g-i-m-e-n-t', 'R-e-g-i-m-e-n-t-s_pt', 'R-e-n-d-e-z-v-o-u-s', 'R-e-t-u-r-n', 'R-o-b-e-r-t', 'S-a-l-t', 'S-e-r-g-e-a-n-t', 'S-e-r-g-e-a-n-t-s_cm', 'S-h-i-r-t-s-s_cm', 'S-o-l-d-i-e-r-s', 'S-t-e-w-a-r-t-s_cm', 'S-t-o-r-e-s', 'S-u-i-t-s', 'V-i-r-g-i-n-i-a', 'W-a-g-g-o-n-s', 'W-a-s-h-i-n-g-t-o-n-s_cm', 'W-i-n-c-h-e-s-t-e-r', 'W-i-n-c-h-e-s-t-e-r-s_cm', 'W-i-n-c-h-e-s-t-e-r-s_qo', 'a-b-s-o-l-u-t-e-l-y', 'a-r-r-i-v-e', 'a-r-r-i-v-e-s_cm', 'a-t-e-l-y', 'c-a-m-p-s_pt', 'c-a-r-e', 'c-a-r-e-f-u-l', 'c-a-