In [1]:
import bisect
import collections
import cv2
import itertools as it
import numpy as np
import operator as op
import os
import pickle
import random

def ilen(iterable) -> int:
    counter = it.count()
    collections.deque(zip(iterable, counter), maxlen=0)
    return next(counter)

def show_and_wait(image):
    cv2.imshow('tesst', image)
    cv2.waitKey()

In [2]:
file_titles = [
    'Ancient Apparition',
    'Anti-Mage',
    'Broodmother',
    'Centaur Warrunner',
    'Clinkz',
    'Io',
    'Juggernaut',
    'Keeper of the Light',
    "Nature's Prophet",
    'Nyx Assassin',
    'Outworld Devourer',
    'Queen of Pain',
]
s = {c.lower() for c, _ in it.groupby(sorted(it.chain.from_iterable(file_titles)))}
print(len(file_titles), ''.join(sorted(s)), len(s))

12  '-abcdefghijklmnopqrstuvwxyz 29


Here is a process for determining the letters in the images.

1. Create a mean image for each hero.  A mean image is the mean of all available images for a hero.
1. Find the vertical extents of the text in the images.  This is not the vertical extents of the letters.  Adjust for the "Q" descender in "Queen of Pain".
1. Crop the images to those vertical extents for the rest of the process.
1. Find the horizontal extents of the text in the images.  This is not the horizontal extents of the letters.  This is the left edge of the left-most letter and the right edge of the right-most letter.
1. Crop the images to those horizontal extents for the rest of the process.
1. Find the horizontal extents of the letters.  Note that there are three instances of letters having overlapping bright pixels due to kerning:  "TA" and "WA" in "CENTAUR WARRUNNER" and "AT" in "NATURE'S PROPHET".
1. Extract the letter images for each Hero using the uncropped mean images.
1. Create a mask for each letter.
1. Save a dictionary of letters to 2-tuples of the tuple of letter images and the tuple of masks.

In [3]:
# Create a mean image for each hero.  A mean image is the mean of all available images for a hero.
directory_path = r'F:\Dota 2\Heroes\Pictures'
def fn(file_title):
    def fn(file_name: str):
        # Read the image.
        file_path = os.path.join(directory_path, file_name)
        image = cv2.imread(file_path)

        # Color data adds no value to this methodology.
        # Take the color channel with the lowest value.  This changes
        # the shape of the image from (60, 160, 3) to (60, 160).
        return image[:, :, np.argmin(np.sum(image.astype(np.float32), axis=(0, 1)))]

    # Read and grey all files in the Pictures directory for the given file title.
    _, _, file_names = next(os.walk(directory_path))
    g = (file_name for file_name in file_names if file_name.startswith(file_title))
    images = list(map(fn, g))

    # Combine the list of two-dimensional tensors into a single three-dimensional tensor.
    images = np.stack(images, axis=0)

    # Compute the mean image.
    mean = np.mean(images.astype(np.float32), axis=0) / 255

    # Clean up the bottom edge.
    mean[-1, :] = (mean[-2, :] + mean[0, :]) / 2
    return mean
    mean = fn(images)
    return masking_factor, mean
hero_mean_images = {s: fn(s) for s in file_titles}

# Show the hero mean images.
show_and_wait(np.vstack([i for i in hero_mean_images.values()]))
cv2.destroyAllWindows()
[(v.min(), v.mean(), v.max(), v.dtype, v.shape) for v in hero_mean_images.values()]

[(0.060271494, 0.2981687, 0.99004525, dtype('float32'), (16, 132)),
 (0.06426943, 0.3001968, 0.98988986, dtype('float32'), (16, 132)),
 (0.049870513, 0.29900852, 0.98373413, dtype('float32'), (16, 132)),
 (0.05625996, 0.28853938, 0.9888727, dtype('float32'), (16, 132)),
 (0.077493325, 0.295497, 0.98817015, dtype('float32'), (16, 132)),
 (0.06768066, 0.24298523, 0.9718038, dtype('float32'), (16, 132)),
 (0.04792157, 0.28921297, 0.9782614, dtype('float32'), (16, 132)),
 (0.06731576, 0.323225, 0.9854226, dtype('float32'), (16, 132)),
 (0.06050024, 0.30146062, 0.98660016, dtype('float32'), (16, 132)),
 (0.0571634, 0.30412787, 0.9762876, dtype('float32'), (16, 132)),
 (0.039521433, 0.29492047, 0.9854836, dtype('float32'), (16, 132)),
 (0.058372617, 0.3034439, 0.98471, dtype('float32'), (16, 132))]

In [4]:
# Find the vertical extents of the text in the images.  This is not the vertical extents of the
# letters.  Adjust for the "Q" descender in "Queen of Pain".

# Find the maximum value of each row of each hero mean image.
d = {s: np.max(hero_mean_images[s], axis=1) for s in file_titles}

# Those that fall below the mean for the column are not in a letter.  These are the vertical
# extents.  I need these for the mean image since it's not black outside of the letters.
d = {k: v < np.mean(v) for k, v in d.items()}
d = {k: [i + 1 for i, b in enumerate(v[:-1] ^ v[1:]) if b] for k, v in d.items()}

# Adjust the vertical extents to account for the dark borders around the letters.  I want to
# include those as part of the letters.  The "Q" of "Queen of Pain" needs special handling because
# it has a descender.  For now, I'll adjust as if to ignore the "Q".
d = {k: (a - 1, b + (1 if k == 'Queen of Pain' else 2)) for k, (a, b) in d.items()}
hero_vertical_extents = d
print(hero_vertical_extents)

# Note that considering the standard deviations of the values in each row as a different means of
# obtaining the vertical extents results in the same extents as those obtained here.

# Show those values as white pixels to the right of the letters.  Offset the extents to ease
# inspecting the shown image.
def fn(image, t):
    top, bottom = t
    image = image.copy()
    right = image.shape[1] // 2
    image[:top, right:] = 1.0
    image[bottom:, right:] = 1.0
    return image
image = np.vstack(list(it.starmap(fn, zip(hero_mean_images.values(), d.values()))))
show_and_wait(image)
cv2.destroyAllWindows()
{k: (16*m+v[0],16*m+v[1],''if len(v)==2 else'not 2') for m, (k, v) in enumerate(d.items())}

{'Ancient Apparition': (6, 14), 'Anti-Mage': (5, 14), 'Broodmother': (5, 14), 'Centaur Warrunner': (6, 14), 'Clinkz': (5, 14), 'Io': (5, 14), 'Juggernaut': (5, 14), 'Keeper of the Light': (6, 14), "Nature's Prophet": (5, 14), 'Nyx Assassin': (5, 14), 'Outworld Devourer': (6, 14), 'Queen of Pain': (5, 14)}


{'Ancient Apparition': (6, 14, ''),
 'Anti-Mage': (21, 30, ''),
 'Broodmother': (37, 46, ''),
 'Centaur Warrunner': (54, 62, ''),
 'Clinkz': (69, 78, ''),
 'Io': (85, 94, ''),
 'Juggernaut': (101, 110, ''),
 'Keeper of the Light': (118, 126, ''),
 "Nature's Prophet": (133, 142, ''),
 'Nyx Assassin': (149, 158, ''),
 'Outworld Devourer': (166, 174, ''),
 'Queen of Pain': (181, 190, '')}

In [5]:
# Crop the images to those vertical extents for the rest of the process.  This does not adversely
# affect "Queen of Pain" since the descender of its "Q" is still visible.
vertically_cropped_hero_mean_images = {k: v[hero_vertical_extents[k][0]:hero_vertical_extents[k][1], :] for k, v in hero_mean_images.items()}
image = np.vstack(list(vertically_cropped_hero_mean_images.values()))
show_and_wait(image)
cv2.destroyAllWindows()

In [6]:
# Find the horizontal extents of the text in the images.  This is not the horizontal extents of
# the letters.  This is the left edge of the left-most letter and the right edge of the
# right-most letter.

# Look for significant variations along the horizontal axis.  With the current limit, it also
# captures the boundaries of the words.
d = {k: np.std(v, axis=0) > 0.011 for k, v in vertically_cropped_hero_mean_images.items()}
def fn(name, bools):
    image = vertically_cropped_hero_mean_images[name].copy()
    for i, b in enumerate(bools):
        image[-1, i] = 1.0 if b else 0.0
    return image
show_and_wait(np.vstack([fn(k, v) for k, v in d.items()]))
cv2.destroyAllWindows()

# Take the first and last change to constitute the left and right edge of each image.
def fn(bools):
    g = it.dropwhile(lambda t: not t[1], enumerate(bools))
    left = next(g)[0]
    g = it.dropwhile(lambda t: not t[1], enumerate(reversed(bools)))
    right = len(bools) - next(g)[0]
    return left, right
d = {k: fn(v) for k, v in d.items()}
hero_horizontal_extents = d
d

{'Ancient Apparition': (17, 117),
 'Anti-Mage': (36, 98),
 'Broodmother': (25, 109),
 'Centaur Warrunner': (16, 118),
 'Clinkz': (47, 87),
 'Io': (61, 74),
 'Juggernaut': (30, 104),
 'Keeper of the Light': (17, 118),
 "Nature's Prophet": (15, 119),
 'Nyx Assassin': (28, 106),
 'Outworld Devourer': (17, 117),
 'Queen of Pain': (24, 110)}

In [7]:
# Crop the images to those horizontal extents for the rest of the process.
fully_cropped_hero_mean_images = {k: v[:, hero_horizontal_extents[k][0]:hero_horizontal_extents[k][1]] for k, v in vertically_cropped_hero_mean_images.items()}
fully_cropped_max_width = max(v.shape[1] for v in fully_cropped_hero_mean_images.values())
image = np.vstack([np.hstack([v, np.zeros((v.shape[0], fully_cropped_max_width - v.shape[1]))]) for v in fully_cropped_hero_mean_images.values()])
show_and_wait(image)
cv2.destroyAllWindows()

In [8]:
# Find the horizontal extents of the letters.  Note that there are three instances of letters
# having overlapping bright pixels due to kerning:  "TA" and "WA" in "CENTAUR WARRUNNER" and "AT"
# in "NATURE'S PROPHET".

# Find the maximum of each column for each hero's fully-cropped mean image.
d = {k: [np.max(v[:, i]) for i in range(v.shape[1])] for k, v in fully_cropped_hero_mean_images.items()}
hero_maxima = d

# Assume maximum values below a threshold represent the spaces between the letters.
hero_spaces = {k: [v < .312 for v in v] for k, v in hero_maxima.items()}

# Show those values as white pixels below the letters.
def fn(image, l):
    image = image.copy()
    for i, b in enumerate(l):
        if b:
            image[-1:, i] = 1.0
    return image
g = (it.starmap(fn, zip(fully_cropped_hero_mean_images.values(), hero_spaces.values())))
show_and_wait(np.vstack([np.hstack([v, np.zeros((v.shape[0], fully_cropped_max_width - v.shape[1]))]) for v in g]))
cv2.destroyAllWindows()

# Determine the horizontal letter extents.
def fn(name, l):
    # Ignore maxima that occur at the beginning of the image.
    for i, _ in enumerate(it.takewhile(lambda v: v, l)):
        l[i] = False

    # Ignore maxima that occur at the end of the image.
    for i, _ in enumerate(it.takewhile(lambda v: v, reversed(l))):
        l[~i] = False

    g = (a ^ b for a, b in it.pairwise(l))
    g = (i + 1 for i, b in enumerate(g) if b)
    g = it.chain([0], g, [fully_cropped_hero_mean_images[name].shape[1]])
    l = list(g)
    if len(l) % 2:
        raise AssertionError()
    for i in range(1, len(l) - 1, 2):
        j = i + 1
        diff = l[j] - l[i]
        if diff:
            l[i] += 2 if 2 < diff < 4 else 1
            l[j] -= 1 if diff > 1 else 0
    g = iter(l)
    g = zip(g, g)

    # Convert the elements from tuples to lists since I need to apply fix-ups.
    return [list(t) for t in g]
hero_horizontal_letter_extents = {k: fn(k, v) for k, v in hero_spaces.items()}

# Add extents for the kerned pairs.
l = hero_horizontal_letter_extents['Centaur Warrunner']
l = l[:3] + [[l[3][0], 23], [23, l[3][1]]] + l[4:6] + [[l[6][0], 52], [52, l[6][1]]] + l[7:]
hero_horizontal_letter_extents['Centaur Warrunner'] = l
l = hero_horizontal_letter_extents["Nature's Prophet"]
l = l[:1] + [[l[1][0], 15], [15, l[1][1]]] + l[2:]
hero_horizontal_letter_extents["Nature's Prophet"] = l

# Add other fix-ups.
hero_horizontal_letter_extents['Centaur Warrunner'][7][1] += 1 # This will require an image fix-up.
hero_horizontal_letter_extents['Keeper of the Light'][7][1] += 1
hero_horizontal_letter_extents["Nature's Prophet"][0][1] += 1
hero_horizontal_letter_extents["Nature's Prophet"][1][0] += 1
hero_horizontal_letter_extents["Nature's Prophet"][7][1] += 1
hero_horizontal_letter_extents["Nature's Prophet"][10][1] += 1

# Convert the elements and their sub-elements to tuples.
hero_horizontal_letter_extents = {k: tuple(tuple(l) for l in v) for k, v in hero_horizontal_letter_extents.items()}

def fn(name, extents):
    image = np.zeros((2, fully_cropped_hero_mean_images[name].shape[1]))
    for i, (left, right) in enumerate(extents):
        image[i % 2, left:right] = 1.0
    return np.vstack([fully_cropped_hero_mean_images[name], image])
g = (fn(k, v) for k, v in hero_horizontal_letter_extents.items())
show_and_wait(np.vstack([np.hstack([v, np.zeros((v.shape[0], fully_cropped_max_width - v.shape[1]))]) for v in g]))
cv2.destroyAllWindows()
hero_horizontal_letter_extents

{'Ancient Apparition': ((0, 7),
  (7, 13),
  (13, 19),
  (19, 23),
  (23, 28),
  (28, 35),
  (35, 41),
  (43, 50),
  (50, 56),
  (56, 61),
  (61, 67),
  (67, 73),
  (73, 77),
  (77, 83),
  (83, 86),
  (86, 93),
  (93, 100)),
 'Anti-Mage': ((0, 8),
  (8, 15),
  (15, 23),
  (23, 27),
  (27, 32),
  (32, 40),
  (40, 48),
  (48, 55),
  (55, 62)),
 'Broodmother': ((0, 6),
  (6, 14),
  (14, 22),
  (22, 30),
  (30, 38),
  (38, 47),
  (47, 56),
  (56, 63),
  (63, 70),
  (70, 77),
  (77, 84)),
 'Centaur Warrunner': ((0, 6),
  (6, 11),
  (11, 18),
  (18, 23),
  (23, 30),
  (30, 35),
  (35, 42),
  (44, 53),
  (52, 59),
  (59, 65),
  (65, 71),
  (71, 77),
  (77, 84),
  (84, 90),
  (90, 95),
  (95, 102)),
 'Clinkz': ((0, 7), (7, 13), (13, 17), (17, 25), (25, 33), (33, 40)),
 'Io': ((0, 4), (4, 13)),
 'Juggernaut': ((0, 6),
  (6, 14),
  (14, 21),
  (21, 29),
  (29, 35),
  (35, 43),
  (43, 51),
  (51, 59),
  (59, 66),
  (66, 74)),
 'Keeper of the Light': ((0, 6),
  (6, 11),
  (11, 17),
  (17, 23),
  (

In [9]:
# Extract the letter images for each Hero using the uncropped mean images.

def fn(name, text_left):
    image = hero_mean_images[name]
    g = ((text_left + left, text_left + right) for left, right in hero_horizontal_letter_extents[name])
    letter_images = [image[:, left:right] for left, right in g]

    # Fix up the "W" in "CENTAUR WARRUNNER".
    if name == 'Centaur Warrunner':
        row = hero_vertical_extents[name][1] - 3
        letter_images[7][row, -1] = letter_images[7][row + 1, -1]

    return letter_images
d = {name: fn(name, left) for name, (left, _) in hero_horizontal_extents.items()}

# Verify the count of the letter images.
g = ((ilen(c for c in name if c != ' '), len(l)) for name, l in d.items())
if any(a != b for a, b in g):
    raise AssertionError()

# Organize the letter images by letter, segregating short and tall letters by using lower- and
# upper-case letters, respectively, as the key.
hero_letter_images = d
d = {k: str.lower if v == 6 else str.upper for k, (v, _) in hero_vertical_extents.items()}
g = it.chain.from_iterable((zip((d[k](c) for c in k if c != ' '), v)) for k, v in hero_letter_images.items())
l = sorted(g, key=lambda t: t[0])
g = it.groupby(l, key=lambda t: t[0])
d = {k: [v for _, v in v] for k, v in g}

max_width = max(np.hstack(l).shape[1] for l in d.values())
l = [np.hstack(l + [np.zeros((l[0].shape[0], max_width - np.hstack(l).shape[1]))]) for l in d.values()]
show_and_wait(np.vstack(l))
cv2.destroyAllWindows()
hero_letter_images = d
{k: (len(v), ' '.join([str(i.shape[1]) for i in v])) for k, v in hero_letter_images.items()}

{"'": (1, '3'),
 '-': (1, '5'),
 'A': (7, '8 8 8 7 9 7 7'),
 'B': (1, '6'),
 'C': (1, '7'),
 'D': (1, '8'),
 'E': (7, '7 7 6 6 6 6 7'),
 'F': (1, '6'),
 'G': (3, '7 7 8'),
 'H': (2, '7 8'),
 'I': (5, '4 4 4 4 4'),
 'J': (1, '6'),
 'K': (1, '8'),
 'L': (1, '6'),
 'M': (2, '8 9'),
 'N': (8, '7 8 8 8 8 9 8 9'),
 'O': (6, '8 8 9 9 9 9'),
 'P': (3, '6 7 7'),
 'Q': (1, '10'),
 'R': (5, '8 7 8 7 7'),
 'S': (5, '7 6 6 6 6'),
 'T': (5, '8 7 8 7 7'),
 'U': (4, '8 7 7 7'),
 'X': (1, '8'),
 'Y': (1, '7'),
 'Z': (1, '7'),
 'a': (5, '7 7 6 7 7'),
 'c': (2, '6 6'),
 'd': (2, '6 7'),
 'e': (9, '5 5 5 5 6 5 5 4 5'),
 'f': (1, '6'),
 'g': (1, '6'),
 'h': (2, '7 7'),
 'i': (4, '4 4 3 4'),
 'k': (1, '6'),
 'l': (2, '5 5'),
 'n': (6, '6 7 7 7 7 6'),
 'o': (5, '7 8 7 7 6'),
 'p': (3, '6 5 6'),
 'r': (9, '6 7 6 6 7 6 6 5 6'),
 't': (6, '6 6 5 7 6 5'),
 'u': (4, '5 6 7 7'),
 'v': (1, '7'),
 'w': (2, '9 8')}

In [10]:
# Create a mask for each letter.

# The shifted cube root looks good.
def linear(v):
    return v
def sqrt(v):
    return np.sqrt(v)
def curt(v):
    return pow(v, 1/3)
def sigmoid(v):
    return 1 / (1 + np.exp(-v))
def adjusted_sigmoid(v):
    v = sigmoid(v - 0.125)
    min, max = np.min(v), np.max(v)
    return (v - min) / (max - min)
def shifted_sqrt(v):
    v = v.copy()
    v -= 0.2
    v[v < 0] = 0
    return np.sqrt(v / 0.8)
def shifted_curt(v):
    v = v.copy()
    v -= 0.1
    v[v < 0] = 0
    return pow(v / 0.9, 1/3)

# Create a mask from the pixels that are outside of the standard deviation of the grey background
# of pixels in the second and third rows.  Do this for each image since their grey backgrounds are
# different from each other.
def fn(image):
    mean, std = np.mean(image[2:4, :]), np.std(image[2:4, :])
    image = np.abs(image - mean) / std
    image = shifted_curt(image / np.max(image))
    return image.astype(np.float32)
d = {k: list(map(fn, v)) for k, v in hero_letter_images.items()}
l = [np.hstack(l + [np.zeros((l[0].shape[0], max_width - np.hstack(l).shape[1]))]) for l in d.values()]
show_and_wait(np.vstack(l))
cv2.destroyAllWindows()
hero_letter_masks = d
{k: (len(v), ' '.join([str(i.shape[1]) for i in v])) for k, v in hero_letter_masks.items()}

{"'": (1, '3'),
 '-': (1, '5'),
 'A': (7, '8 8 8 7 9 7 7'),
 'B': (1, '6'),
 'C': (1, '7'),
 'D': (1, '8'),
 'E': (7, '7 7 6 6 6 6 7'),
 'F': (1, '6'),
 'G': (3, '7 7 8'),
 'H': (2, '7 8'),
 'I': (5, '4 4 4 4 4'),
 'J': (1, '6'),
 'K': (1, '8'),
 'L': (1, '6'),
 'M': (2, '8 9'),
 'N': (8, '7 8 8 8 8 9 8 9'),
 'O': (6, '8 8 9 9 9 9'),
 'P': (3, '6 7 7'),
 'Q': (1, '10'),
 'R': (5, '8 7 8 7 7'),
 'S': (5, '7 6 6 6 6'),
 'T': (5, '8 7 8 7 7'),
 'U': (4, '8 7 7 7'),
 'X': (1, '8'),
 'Y': (1, '7'),
 'Z': (1, '7'),
 'a': (5, '7 7 6 7 7'),
 'c': (2, '6 6'),
 'd': (2, '6 7'),
 'e': (9, '5 5 5 5 6 5 5 4 5'),
 'f': (1, '6'),
 'g': (1, '6'),
 'h': (2, '7 7'),
 'i': (4, '4 4 3 4'),
 'k': (1, '6'),
 'l': (2, '5 5'),
 'n': (6, '6 7 7 7 7 6'),
 'o': (5, '7 8 7 7 6'),
 'p': (3, '6 5 6'),
 'r': (9, '6 7 6 6 7 6 6 5 6'),
 't': (6, '6 6 5 7 6 5'),
 'u': (4, '5 6 7 7'),
 'v': (1, '7'),
 'w': (2, '9 8')}

In [11]:
# Save a dictionary of letters to tuples of image-mask 2-tuples.
if set(hero_letter_images.keys()) != set(hero_letter_masks.keys()):
    raise AssertionError()
d = {k: tuple(zip(hero_letter_images[k], v)) for k, v in hero_letter_masks.items()}
with open(r"F:\Dota 2\Heroes\letters.pickle", 'bw') as fout:
    pickle.dump(d, fout)
{k: (len(v), [(type(a), a.shape, type(b), b.shape) for a, b in v]) for k, v in d.items()}

{"'": (1, [(numpy.ndarray, (16, 3), numpy.ndarray, (16, 3))]),
 '-': (1, [(numpy.ndarray, (16, 5), numpy.ndarray, (16, 5))]),
 'A': (7,
  [(numpy.ndarray, (16, 8), numpy.ndarray, (16, 8)),
   (numpy.ndarray, (16, 8), numpy.ndarray, (16, 8)),
   (numpy.ndarray, (16, 8), numpy.ndarray, (16, 8)),
   (numpy.ndarray, (16, 7), numpy.ndarray, (16, 7)),
   (numpy.ndarray, (16, 9), numpy.ndarray, (16, 9)),
   (numpy.ndarray, (16, 7), numpy.ndarray, (16, 7)),
   (numpy.ndarray, (16, 7), numpy.ndarray, (16, 7))]),
 'B': (1, [(numpy.ndarray, (16, 6), numpy.ndarray, (16, 6))]),
 'C': (1, [(numpy.ndarray, (16, 7), numpy.ndarray, (16, 7))]),
 'D': (1, [(numpy.ndarray, (16, 8), numpy.ndarray, (16, 8))]),
 'E': (7,
  [(numpy.ndarray, (16, 7), numpy.ndarray, (16, 7)),
   (numpy.ndarray, (16, 7), numpy.ndarray, (16, 7)),
   (numpy.ndarray, (16, 6), numpy.ndarray, (16, 6)),
   (numpy.ndarray, (16, 6), numpy.ndarray, (16, 6)),
   (numpy.ndarray, (16, 6), numpy.ndarray, (16, 6)),
   (numpy.ndarray, (16, 6),