# import

In [0]:
#! pip freeze
#! pip install keras==2.2.4

In [0]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [0]:
import numpy as np
import cv2
import os
import matplotlib.pyplot as plt
from copy import deepcopy
import string
from PIL import Image, ImageDraw, ImageFont

# parameters

In [0]:
CHAR_VECTOR = string.ascii_letters + string.digits + '.'
letters = [letter for letter in CHAR_VECTOR]

num_classes = len(letters) + 1

img_w, img_h = 128, 32

# Network parameters
batch_size = 64
val_batch_size = 10

downsample_factor = 4
max_text_len = 16

# model

In [7]:
from keras import backend as K
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Input, Dense, Activation
from keras.layers import Reshape, Lambda, BatchNormalization
from keras.layers.merge import add, concatenate
from keras.models import Model
from keras.layers.recurrent import LSTM
K.set_learning_phase(0) # keras test mode


Using TensorFlow backend.
W0905 05:48:52.591287 140364199167872 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:66: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.



In [0]:
# labels (samples, max_string_length)
# y_pred (samples, time_steps, num_categories)
# input_length (samples, 1) - y_pred sequences lens
# label_length (samples, 1) - y_true sequences lens

# # Loss and train functions, network architecture
def ctc_lambda_func(args):                                                                     # !!!___use *args to get all agrs___
    y_pred, labels, input_length, label_length = args
    # the 2 is critical here since the first couple outputs of the RNN
    # tend to be garbage:
    y_pred = y_pred[:, 2:, :]                                                                  # !!!___pass first 2 steps out___
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

  
  
# auto strides 2x2 in max_pool
# last padding same  [was 0]
# different input_size  [was img_w x 32]
# more batch_norm layers  [was 2]
# different CNN to RNN connector  [was Map-To-Sequence]
# auto flatten in dense
# auto batch dim in the layers
def get_Model(training):
    input_shape = (img_w, img_h, 1)     # (128, 32, 1) grayscale image

    # Make Network
    inputs = Input(name='the_input', shape=input_shape, dtype='float32')  # (None, 128, 32, 1)

    # Convolution layer (VGG)
    inner = Conv2D(64, (3, 3), padding='same', name='conv1', kernel_initializer='he_normal')(inputs)  # (None, 128, 32, 64)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = MaxPooling2D(pool_size=(2, 2), name='max1')(inner)  # (None, 64, 16, 64)

    inner = Conv2D(128, (3, 3), padding='same', name='conv2', kernel_initializer='he_normal')(inner)  # (None, 64, 16, 128)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = MaxPooling2D(pool_size=(2, 2), name='max2')(inner)  # (None, 32, 8, 128)

    inner = Conv2D(256, (3, 3), padding='same', name='conv3', kernel_initializer='he_normal')(inner)  # (None, 32, 8, 256)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = Conv2D(256, (3, 3), padding='same', name='conv4', kernel_initializer='he_normal')(inner)  # (None, 32, 8, 256)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = MaxPooling2D(pool_size=(1, 2), name='max3')(inner)  # (None, 32, 4, 256)

    inner = Conv2D(512, (3, 3), padding='same', name='conv5', kernel_initializer='he_normal')(inner)  # (None, 32, 4, 512)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = Conv2D(512, (3, 3), padding='same', name='conv6')(inner)  # (None, 32, 4, 512)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)
    inner = MaxPooling2D(pool_size=(1, 2), name='max4')(inner)  # (None, 32, 2, 512)

    inner = Conv2D(512, (2, 2), padding='same', kernel_initializer='he_normal', name='con7')(inner)  # (None, 32, 2, 512)
    inner = BatchNormalization()(inner)
    inner = Activation('relu')(inner)

    
    # CNN to RNN
    inner = Reshape(target_shape=((32, 1024)), name='reshape')(inner)  # (None, 32, 1024)
    inner = Dense(64, activation='relu', kernel_initializer='he_normal', name='dense1')(inner)  # (None, 32, 64)

    
    # RNN layer
    lstm_1 = LSTM(256, return_sequences=True, kernel_initializer='he_normal', name='lstm1')(inner)  # (None, 32, 512)
    lstm_1b = LSTM(256, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm1_b')(inner)
    reversed_lstm_1b = Lambda(lambda inputTensor: K.reverse(inputTensor, axes=1)) (lstm_1b)

    lstm1_merged = add([lstm_1, reversed_lstm_1b])  # (None, 32, 512)
    lstm1_merged = BatchNormalization()(lstm1_merged)
    
    lstm_2 = LSTM(256, return_sequences=True, kernel_initializer='he_normal', name='lstm2')(lstm1_merged)
    lstm_2b = LSTM(256, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm2_b')(lstm1_merged)
    reversed_lstm_2b= Lambda(lambda inputTensor: K.reverse(inputTensor, axes=1)) (lstm_2b)

    lstm2_merged = concatenate([lstm_2, reversed_lstm_2b])  # (None, 32, 1024)
    lstm2_merged = BatchNormalization()(lstm2_merged)

    # transforms RNN output to character activations:
    inner = Dense(num_classes, kernel_initializer='he_normal',name='dense2')(lstm2_merged) #(None, 32, 63)
    y_pred = Activation('softmax', name='softmax')(inner)

    labels = Input(name='the_labels', shape=[max_text_len], dtype='float32') # (None ,8)
    input_length = Input(name='input_length', shape=[1], dtype='int64')     # (None, 1)
    label_length = Input(name='label_length', shape=[1], dtype='int64')     # (None, 1)

    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length]) #(None, 1)

    if training:
        return Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out)
    else:
        return Model(inputs=[inputs], outputs=y_pred)



# input data

## font paths

In [9]:
# add calibri later
font_paths = {
    'roboto_mono': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/roboto-mono/RobotoMono-Regular.ttf',
    'arial': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/Arial/ArialRegular/ArialRegular.ttf',
    'armino': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/arimo/Arimo-Regular.ttf',
    'helvetica': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/helvetica/HelveticaRegular/HelveticaRegular.ttf',
    'open_sans': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/open-sans/OpenSans-Regular.ttf',
    'roboto2014': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/roboto-2014/Roboto-Regular.ttf',
    'times_new_roman': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/times-new-roman/times-new-roman.ttf',
    'calibri': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/calibri font sv/Calibri Regular/Calibri Regular.ttf',
    'courier': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/COURIER/COURIER.TTF',
    'lato': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/Lato2OFL/Lato2OFL/Lato-Regular.ttf',
    'shadows_into_light': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/Shadows-into-light_Typeface_1813_(Fontmirror)/Shadows Into Light 400.ttf',
    'abel': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/abel/abel-regular.ttf',
    'abril_fatface': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/abril-fatface/AbrilFatface-Regular.otf',
    'acme': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/acme/Acme-Regular.ttf',
    'amatic': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/amatic/Amatic-Bold.ttf',
    'anton': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/anton/Anton.ttf',
    'archivo': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/archivo/Archivo-Regular.ttf',
    'arimo': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/arimo/Arimo-Regular.ttf',
    'asap': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/asap_regular/asap.regular.ttf',
    'assistant': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/assistant/Assistant-Regular.otf',
    'barlow': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/barlow_regular/barlow.regular.ttf',
    'bitter': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/bitter/Bitter-Regular.ttf',
    'bree_serif': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/bree_serif/bree-serif.regular.ttf',
    'cabin': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/cabin/cabin.regular.ttf',
    'cairo': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/cairo/Cairo Medium.ttf',
    'catamaran': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/catamaran/Catamaran-Regular.ttf',
    'caveat': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/caveat/caveat-regular.ttf',
    'comfortaa': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/comfortaa/Comfortaa-Regular.ttf',
    'comic_sans': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/comic-sans-ms/COMIC.TTF',
    'crimson_text': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/crimson-text/CrimsonText-Roman.ttf',
    'dancing': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/dancing/dancing-script.regular.ttf',
    'dosis': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/dosis/dosis.book.ttf',
    'exo': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/exo/Exo-Regular.ttf',
    'firasans': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/firasans/FiraSans-Regular.otf',
    'fjalla_one': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/fjalla-one/FjallaOne-Regular.ttf',
    'garamond': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/garamond/Garamond.ttf',
    'georgia': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/georgia-2-cufonfonts/georgia.ttf',
    'hind': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/hind/Hind-Regular.ttf',
    'impact': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/impact/impact.ttf',
    'inconsolata': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/inconsolata/Inconsolata-Regular.ttf',
    'indie_flower': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/indie_flower_16818/IndieFlower.ttf',
    'josefin_sans': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/josefin-sans/JosefinSans-Regular.ttf',
    'kanit': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/kanit/Kanit-Regular.ttf',
    'karla': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/karla/Karla-Regular.ttf',
    'lobster': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/lobster/The-Lobster-Font/fonts/TTF/Lobster.ttf',
    'open_sans': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/open-sans/OpenSans-Regular.ttf',
    'oswald': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/oswald/Oswald-Regular.ttf',
    'pacifico': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/pacifico/Pacifico.ttf',
    'quicksand': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/quicksand/Quicksand-Regular.ttf',
    'raleway': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/raleway/Raleway-Regular.ttf',
    'righteous': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/righteous/Righteous-Regular.ttf',
    'roboto': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/roboto/Roboto-Regular.ttf',
    'ubuntu': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/ubuntu/ubuntu-font-family-0.80/Ubuntu-R.ttf',
    'yanone': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/fonts/yanone-kaffeesatz/YanoneKaffeesatz-Regular.ttf'
    
}


tmp_path = r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/train/x'
late_paths = []
for path in os.listdir(tmp_path):
  if not len(os.listdir(os.path.join(tmp_path,path))):
    late_paths.append(path)
 
print(late_paths)
print(len(late_paths))

font_names = [i for i in font_paths.keys() if i not in late_paths]
print(len(font_names))

[]
0
53


## dataset functions

In [0]:
# data_dir_path - шлях до директорії із трейн сетом (шрифтами)
# font_names [fonts_n] - масив назв шрифтів
# curent_batch_size - кількість випадкових слів
# font_p [len(font_names)] - ймовірності для кожного шрифту попасти в датасет
#
# rand_word_paths [batch_size] - випадкові адреси зображеннь слів
def get_random_word_paths(data_dir_path, font_names, current_batch_size, font_p=None):
  rand_font_name = np.random.choice(font_names, p=font_p) # 1 font choice
  font_dir_path = os.path.join(data_dir_path, rand_font_name)
  
  rand_img_name = np.random.choice(os.listdir(font_dir_path)) # 1 img choice
  img_dir_path = os.path.join(font_dir_path, rand_img_name)
  
  word_names = os.listdir(img_dir_path)
  batch_chunk = len(word_names) - current_batch_size # batch_words 
  if batch_chunk >= 0: # вистачає слів для батча в цьому зображенні
    rand_words = np.random.choice(word_names, current_batch_size)
    rand_word_paths = [os.path.join(img_dir_path, w_n) for w_n in rand_words]
  else: # для батча треба більше слів ніж є в даному зображенні
    rand_words = np.random.choice(word_names, len(word_names))
    rand_word_paths = [os.path.join(img_dir_path, w_n) for w_n in rand_words]
    rand_word_paths += get_random_word_paths(data_dir_path, font_names, abs(batch_chunk), font_p) # добираю слова до batch_size    
    
  return rand_word_paths


# data_dir_path - шлях до директорії із трейн сетом (шрифтами)
# font_names [fonts_n] - масив назв шрифтів
# batch_size - кількість зображеннь слів
# max_word_len - максимальна довжина слова
# font_p [len(font_names)] - ймовірності для кожного шрифту попасти в датасет
# downsample_factor - наскільки пулінги зменшують зображення 
#
# batch_x [batch_size, 128, 32] - зображення слів
# batch_y [batch_size, max_word_len] - індекси літер для слів + нулі(якщо word_len<max_word_len)
# input_lens [batch_size, 1] - розмір виходів з рнн із врахуванням пулінгів
# word_lens [batch_size, 1] - довжини слів
def get_batch(data_dir_path, font_names, batch_size, max_word_len, font_p=None, downsample_factor=4):
  batch_x = np.zeros((batch_size, 128, 32))
  batch_y = np.zeros((batch_size, max_word_len))
  word_lens = np.zeros((batch_size, 1)) 
  input_lens = np.ones((batch_size, 1)) * (128 // downsample_factor - 2)
  rand_words_adress = get_random_word_paths(data_dir_path, font_names, batch_size, font_p) # batch випадкових адес слів
            
  for i, word_path in enumerate(rand_words_adress):
    batch_x[i] = minmax_image(cv2.imread(word_path, 0)).T
    word_name = word_path.split('/')[-1][:-4] # без .jpg
    char_indexes = list(map(int, word_name.split('_')))
                  
    if len(char_indexes) > max_word_len: # обрізаю задовге слово
      char_indexes = char_indexes[:max_word_len]
    ch_len = len(char_indexes) 
    word_lens[i][0] = ch_len
    batch_y[i][:ch_len] = char_indexes
  batch_x = np.reshape(batch_x, [-1,128,32,1])
  return batch_x, batch_y, input_lens, word_lens


# image [h, w] - зображення
# mm_image [h,w] - мін-макс нормалізоване зображення
def minmax_image(image):
  mm_image = image-image.min() + 0.001
  mm_image = mm_image/mm_image.max() - 0.0001
  return mm_image


# images [batch, h, w] - зображення
# st_images [batch, h, w] - стандартизовані по батчу зображення
def standartize_images(images):
  st_images = (images - np.mean(images))
  st_images /= (np.std(st_images))
  return st_images

    
    # генерує дані для входу і виходу молелі
def data_gen2(font_names, batch_size, max_word_len, font_p, data_dir_path, train=1):
  while True:
    x_batch, y_batch, input_lens, word_lens = get_batch(data_dir_path, font_names, batch_size, max_word_len, font_p)
    inputs = {
        'the_input': x_batch, 
        'the_labels': y_batch,
        'input_length': input_lens,
        'label_length': word_lens
    }
    outputs = {'ctc': np.zeros([batch_size])} # не юзається бо лосс функція не юзає output, а юзає лейбли з інпута 
    yield (inputs, outputs)

In [0]:
for sample in range(10):
  x,y = list(data_gen())[0]
  print('{}\n\n{}\n\n{}'.format(x,y,l))
  

In [0]:

data_dir_path = r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/train/x'
font_names = ['roboto_mono', 'helvetica', 'comic_sans', 'arial', 'times_new_roman']
batch_size = 10
max_word_len = 16
font_p = [0.3, 0.2, 0.1, 0.2, 0.2] 

#rnd_w_pths = get_random_word_paths(data_dir_path, font_names, batch_size, font_p)
batch_x, batch_y, input_lens, word_lens = get_batch(data_dir_path, font_names, batch_size, max_word_len, font_p)

In [0]:
# len([pth for pth in rnd_w_pths if 'roboto_mono' in pth])    # font words num
print(batch_x.shape, batch_y.shape, input_lens.shape, word_lens.shape, sep='\n')


'''
print(len(batch_x), batch_x[1], sep='\n')
print('',len(batch_y), batch_y[1], sep='\n')
print('\n',word_lens[1])
'''

## random char batches

In [0]:
# # batch_size - кількість зображеннь слів
# font_name - наза шрифта
# max_word_len - максимальна довжина слова 
# alphabet_arr - масив з алфавітом
# font_paths - словник {назва шрифа: адрес шрифта}
# downsample_factor - наскільки пулінги зменшують зображення вширину
def data_gen_rand(batch_size, font_name, max_word_len, alphabet_arr, font_paths, downsample_factor=4):
  while 1:
    batch_x = np.zeros((batch_size, 128, 32))
    batch_y = np.zeros((batch_size, max_word_len))
    word_lens = np.zeros((batch_size, 1)) 
    input_lens = np.ones((batch_size, 1)) * (128 // downsample_factor - 2)
    
    img = Image.new('L', (128, 32), color = 255)
    font = ImageFont.truetype(font_paths[font_name], 14)
    
    for i in range(batch_size):
      rand_len = np.random.choice(max_word_len-1)+1
      shuffled_alphabet = deepcopy(alphabet_arr)
      np.random.shuffle(shuffled_alphabet)
      word =  ''.join(np.random.choice(shuffled_alphabet, rand_len))
      word_indexes = indexes_from_word(word, ''.join(alphabet_arr))
      
      batch_y[i, :rand_len] = (word_indexes)
      word_lens[i, 0] = rand_len
      
      temp_img = (img.copy())
      d = ImageDraw.Draw(temp_img)
      d.text((2, 12), word, font=font, fill=0)
      open_cv_image = np.array(temp_img).T
      open_cv_image = cv2.bilateralFilter(open_cv_image,9,100,100) #diameter, 
      batch_x[i] = minmax_image(open_cv_image)
    batch_x = np.reshape(batch_x, [-1,128,32,1])
    inputs = {
      'the_input': batch_x, 
      'the_labels': batch_y,
      'input_length': input_lens,
      'label_length': word_lens
    }
    outputs = {'ctc': np.zeros([batch_size])} # не юзається бо лосс функція не юзає output, а юзає лейбли з інпута 
    yield inputs, outputs
     
      
# word - стрічка
# alphabet_str - str алфавіт символів
# indexes - arr, заміна символів у слові на їх індекси в алфавіті      
def indexes_from_word(word, alphabet_str):
  indexes = []
  for char in word:
    indexes.append(alphabet_str.find(char))
  return indexes

In [0]:
s1 = '1 2 3 4 5'.split(' ')
s2 = deepcopy(s1)
np.random.shuffle(s2)
print(s2)
print(s1)

['3', '5', '2', '4', '1']
['1', '2', '3', '4', '5']


# training

In [0]:
from keras.optimizers import Adadelta, Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint
K.set_learning_phase(0)


model = get_Model(training=True)
weights_path = r'/content/drive/My Drive/Colab Notebooks/course_project/crnn_checkpoints/fonts_53_images_40/LSTM+BN5--100--1.284.hdf5'
weights_path2 = r'/content/drive/My Drive/Colab Notebooks/course_project/crnn_checkpoints/roboto_mono_images_40_new/LSTM+BN5--100--0.181.hdf5'

try:
    model.load_weights(weights_path2)
    print("...Previous weight data...")
except:
    print("...New weight data...")


train_dir_path = r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/train/x'
train_dir_path2 = r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/train/x_new'
train_dir_path3 = r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/train/x_new'

test_dir_path = r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/test/x'
test_dir_path3 = r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/test/x_new'


data_gen_kwargs_train = {
  'font_names' : ['roboto_mono'], #[i for i in font_paths.keys() if i not in late_paths],
  'batch_size' : 64,
  'max_word_len' : 16,
  'font_p' : None,
  'data_dir_path' : train_dir_path3
}
data_gen_kwargs_test = {
  'font_names' : ['roboto_mono'], #[i for i in font_paths.keys() if i not in late_paths],
  'batch_size' : 64,
  'max_word_len' : 16,
  'font_p' : None,
  'data_dir_path' : test_dir_path3
}



#adam = Adam()
ada = Adadelta()

check_path = r'/content/drive/My Drive/Colab Notebooks/course_project/crnn_checkpoints/roboto_mono_images_40_new'
early_stop = EarlyStopping(monitor='loss', min_delta=0.001, patience=4, mode='min', verbose=1)
checkpoint = ModelCheckpoint(filepath=os.path.join(check_path, 'LSTM+BN5--{epoch:02d}--{val_loss:.3f}.hdf5'), monitor='loss', verbose=1, mode='min', period=1)
# the loss calc occurs elsewhere, so use a dummy lambda func for the loss
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=ada)

# captures output of softmax so we can decode the output during visualization
model.fit_generator(generator = data_gen2(**data_gen_kwargs_train),
                    steps_per_epoch = 100,
                    epochs = 100,
                    callbacks = [checkpoint],
                    validation_data = data_gen2(**data_gen_kwargs_test),
                    validation_steps = 5
                    )


# training rand

In [0]:
from keras.optimizers import Adadelta, Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint
K.set_learning_phase(0)


model = get_Model(training=True)
weights_path = r'/content/drive/My Drive/Colab Notebooks/course_project/crnn_checkpoints/fonts_53_images_40/LSTM+BN5--100--1.284.hdf5'
weights_path2 = r'/content/drive/My Drive/Colab Notebooks/course_project/crnn_checkpoints/roboto_mono_images_40_new/LSTM+BN5--100--0.181.hdf5'
weights_path3 = r'/content/drive/My Drive/Colab Notebooks/course_project/crnn_checkpoints/roboto_mono_images_40_new/LSTM+BN5--38--0.471.hdf5'

try:
    model.load_weights(weights_path3)
    print("...Previous weight data...")
except:
    print("...New weight data...")

data_gen_kwargs = {
   'batch_size' : 80,
  'font_name' : 'roboto_mono',
  'max_word_len' : 16,
  'alphabet_arr': list(string.ascii_letters + string.digits + '.'),
  'font_paths': font_paths
}

#adam = Adam()
ada = Adadelta()

check_path = r'/content/drive/My Drive/Colab Notebooks/course_project/crnn_checkpoints/roboto_mono_images_40_new'
early_stop = EarlyStopping(monitor='loss', min_delta=0.001, patience=4, mode='min', verbose=1)
checkpoint = ModelCheckpoint(filepath=os.path.join(check_path, 'LSTM+BN5--{epoch:02d}--{val_loss:.3f}.hdf5'), monitor='loss', verbose=1, mode='min', period=1)
# the loss calc occurs elsewhere, so use a dummy lambda func for the loss
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=ada)

# captures output of softmax so we can decode the output during visualization
model.fit_generator(generator = data_gen_rand(**data_gen_kwargs),
                    steps_per_epoch = 200,
                    epochs = 100,
                    callbacks = [checkpoint],
                    validation_data = data_gen_rand(**data_gen_kwargs),
                    validation_steps = 5
                    )

# predict

## load model and weights

In [122]:
K.set_learning_phase(0)

model = get_Model(training=False)
weights_path = r'/content/drive/My Drive/Colab Notebooks/course_project/crnn_checkpoints/roboto_images40/LSTM+BN5--100--0.001.hdf5'
weights_path2 = r'/content/drive/My Drive/Colab Notebooks/course_project/crnn_checkpoints/roboto_mono_images_40_new/LSTM+BN5--100--0.181.hdf5'
weights_path3 = r'/content/drive/My Drive/Colab Notebooks/course_project/crnn_checkpoints/roboto_mono_images_40_new/LSTM+BN5--38--0.471.hdf5'

try:
    model.load_weights(weights_path3)
    print("...Previous weight data...")
except:
    raise Exception("No weight file!")
    


...Previous weight data...


## decode funcs

In [0]:
import itertools

'''
# y_pred [32, num_classes] - Розподіл ймовірностей для 32 ділянок фото
# highest_prob_chars [word_len] - передбачене слово
def decode_label(y_pred):
    # out : (1, 32, 64)
    highest_prob_classes = list(np.argmax(y_pred[2:], axis=1)) # індекс класу з макс ймовірністю
    highest_prob_classes = [k for k, g in itertools.groupby(highest_prob_classes)]  # забирає суміжні повторні значення
    highest_prob_chars = get_chars_from_indexes(highest_prob_classes, letters) # видаляє пропуски, декодує індекси
    return highest_prob_chars
'''
# y_pred [32, num_classes] - Розподіл ймовірностей для 32 ділянок фото
# highest_prob_chars [word_len] - передбачене слово
def decode_label(y_pred, alphabet_str):
    # out : (1, 32, 64)
    highest_prob_classes = list(np.argmax(y_pred[2:], axis=1)) # індекс класу з макс ймовірністю
    highest_prob_classes = [k for k, g in itertools.groupby(highest_prob_classes)]  # забирає суміжні повторні значення
    highest_prob_chars = get_chars_from_indexes(highest_prob_classes, alphabet_str) # видаляє пропуски, декодує індекси
    return highest_prob_chars  
  
  
# letters [class_num] - alphabet array
# chars - decoded chars str
def get_chars_from_indexes(indexes, alphabet_str):
  chars = ''.join([alphabet_str[int(i)] for i in indexes if i<len(alphabet_str)])
  return chars
  

## predict, accuraccy funcs

In [0]:
# y_pred [batch, 32, num_classes] - батч розподілів ймовірностей 
# y_true [batch, max_word_len, 1] - батч масивів правильних індексів
# word_lens [batch, 1] - батч  правильних довжин правмльних слів та правильних індексів
# alphabet_str [num_classes] - алфавіт, символи всіх класів (lower+upper+digits+.)
#
# pred_words [batch_size, ?] - батч передбачених слів
# true_words [batch_size, words_len] - батч правильних слів
def get_words_from_y(y_pred, y_true, word_lens, alphabet_str):
  pred_words, true_words = [], []
  for i in range(len(y_true)):
    pred_word, true_word = get_word_form_y(y_pred[i], y_true[i, :int(word_lens[i,0])], alphabet_str)
    pred_words.append(pred_word)
    true_words.append(true_word)
  return pred_words, true_words


# pred_probs [32, num_classes] - розподіл ймовірностей для 32 ділянок зображеня
# true_indexes [word_lens[i], 1] - масив правильних індексів символів для даного слова
# alphabet_str 'num_classes' - алфавіт, символи всіх класів (lower-upper-digits-.)
#
# pred_ord - передбачене слово
# true_word - правильне слово
def get_word_form_y(pred_probs, true_indexes, alphabet_str):
  pred_word = decode_label(pred_probs, alphabet_str)
  true_word = get_chars_from_indexes(true_indexes, alphabet_str)
  return pred_word, true_word


# all_pred_words [num_samples, ?] - сампли з передбаченими словами
# all_true_words [num_samples, word_lens] - сампли з правдивими словами
#
# (good_preds/all_preds) - точність по словах
# bad_preds [?] - масив погано передбачених слів 
def get_word_accuraccy(all_pred_words, all_true_words):
  good_preds = 0
  bad_preds = []
  all_preds = len(all_true_words)
  for pred_word, true_word in zip(all_pred_words, all_true_words):  
    if pred_word == true_word:
      good_preds +=1
    else:
      bad_preds.append(pred_word)
 
  return (good_preds/all_preds), bad_preds
  

# pred_word - передбачене слово
# true_word - правильне слово
#
# good_chars_num - кількість правильно преедбачених символів по порядку
def get_char_good_preds(pred_word, true_word):
  good_chars_num = 0
  bad_chars = []
  for pred_ch, true_ch in zip(pred_word, true_word):
    if pred_ch == true_ch:
      good_chars_num += 1
    else: 
      bad_chars.append(pred_ch)
  return good_chars_num, bad_chars
 

# all_pred_words [num_samples, ?] - сампли з передбаченими словами
# all_true_words [num_samples, word_lens] - сампли з правдивими словами
#
# (good_preds_num / all_preds_num) - точність по символах
def get_char_accuraccy(all_pred_words, all_true_words):
  good_preds_num = 0
  all_preds_num = 0
  bad_preds = []
  for pred_word, true_word in zip(all_pred_words, all_true_words):
    good_chars_num, bad_chars = get_char_good_preds(pred_word, true_word)
    good_preds_num += good_chars_num
    all_preds_num += len(true_word)
    bad_preds += bad_chars
    
  return (good_preds_num / all_preds_num), bad_preds


# all_pred_words [num_samples, ?] - сампли з передбаченими словами
# all_true_words [num_samples, word_lens] - сампли з правдивими словами
# word_mode - якщо 1, то точність за словами 0 - за символами 
#
# accuraccy - точність
def get_accuraccy(all_pred_words, all_true_words, word_mode=0):
  if word_mode:
    accuraccy, bad_preds = get_word_accuraccy(all_pred_words, all_true_words)
  else:
    accuraccy, bad_preds = get_char_accuraccy(all_pred_words, all_true_words)
  return accuraccy, bad_preds
  

# data_gen - генератор даних
# num_samples - кількість слів (число кратне batch_size)
# alphabet_str[num_classes] - алфавіт (lower+upper+digits+.) 
# data_gen_kwargs:  
# font_names - список назв шрифтів
# batch_size - розмір батчу
# max_word_len - максимальна довжина слова
# font_p - ймовірності кожного шрифта (не обовяково)
# data_dir_path  - адреса директорії з даними
# 
# all_pred_words [num_samples, ?] - сампли з передбаченими словами
# all_true_words [num_samples, word_lens] - сампли з правдивими словами
def get_all_predictions2(data_gen, num_samples, alphabet_str, **data_gen_kwargs):
  all_pred_words = []
  all_true_words = []
  for i, data in enumerate(data_gen(**data_gen_kwargs)):
    inputs, outputs = data
    batch_x, batch_y, word_lens = inputs['the_input'], inputs['the_labels'], inputs['label_length']
    y_pred = model.predict_on_batch(batch_x)
    pred_words, true_words = get_words_from_y(y_pred, batch_y, word_lens, alphabet_str)
    all_pred_words += pred_words
    all_true_words += true_words
    
    if i >= int(num_samples/data_gen_kwargs['batch_size'])-1:
      break
  return all_pred_words, all_true_words

# перевірити чи не переплутав // і %
# додати паддінг до вирізаних із зображення слів
# видалити старі розміром 96 на 24
# зробити хелв неуе new додавши z q j 0-9 ...
# затрейнити на ній робото
# зробити норм тест з цифрами і буквами
# затестити на зображенні
# підсвітка
# 

## get preds and acc



 > **all predictions, accuraccy**



In [125]:
%%time
train_dir_path3 = r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/train/x_new'

test_dir_path = r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/test/x' # chars(-z,-j,-q) + .
test_dir_path3 = r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/test/x_new'

data_gen_kwargs1 = {
  'font_names' : ['roboto_mono'], #[i for i in font_paths.keys() if i not in late_paths],
  'batch_size' : 64,
  'max_word_len' : 16,
  'font_p' : None,
  'data_dir_path' : test_dir_path
}

data_gen_kwargs = {
   'batch_size' : 80,
  'font_name' : 'roboto_mono',
  'max_word_len' : 16,
  'alphabet_arr': list(string.ascii_letters + string.digits + '.'),
  'font_paths': font_paths
}


num_samples = 128
alphabet_str = string.ascii_letters + string.digits + '.'

all_pred_words, all_true_words = get_all_predictions2(data_gen2, num_samples, alphabet_str, **data_gen_kwargs1)
accuraccy, bad_preds = get_accuraccy(all_pred_words, all_true_words, word_mode=0)

print('p: {}'.format(all_pred_words))
print('t: {}'.format(all_true_words))
print('acc: ', accuraccy)
print('bad_p: {}'.format(bad_preds))

p: ['style', 'sell', 'Aelenshalo', 'Taz4', 'salisitude', 'sakKreaAed', 'SiXAing', 'talKeS', 'famm', 'ouIger', 'ta', 'nar', 'nee6', 'rekk', 'skug', 'fakd', 'drew', 'Cz', 'na', 'hukteS', 'Taz4', 'severel', 'travelling', 'Wzvde', 'tJekSo', 'Cz', 'sao', 'Brep', 'skug', 'ikquimo', 'dispavered', 'hukteS', 'Wzvde', 'Wzvde', 'sao', 'enzugh', 'zne', 'Made', 'AetL', 'prsCesAing', 'led', 'SentimenAe', 'nee6', 'famm', 'eetw', 'severel', 'ell', 'tzreL', 'sakSreaAedw', 'Brep', 'liAiSe', 'Yaq', 'led', 'bed', 'hexrAed', 'nar', 'Xindnese', 'ye', 'SAristlo', 'Taz4', 'zn', 'lengAh', 'rekk', 'travelling', 'If', 'far', 'rekev4', 'rekev4', 'Brep', 'entreaAiee', 'veL', 'entire', 'suferL', 'suferL', 'ExpVessisn', 'insVesing', 'hukteS', 'in', 'here', 'far', 'Zaumee', 'saw', 'far', 'ds', 'Zaumee', 'zkeL', 'ds', 'saw', 'beL', 'his', 'es', 'suferL', 'rekS', 'beL', 'eetimaAing', 'es', 'ds', 'vavSRo', 'pesple', 'gone', 'Zaumee', 'beL', 'Aekw', 'salisitudeL', 'elAeretisn', 'far', 'Brep', 'Able', 'Aekw', 'apepAence',

In [0]:
from collections import Counter

cnt = Counter(bad_preds)
print(cnt)

# autocorrect

In [0]:
! pip install textblob

In [0]:
from textblob import TextBlob


def autocorrect(all_pred_words):
  text = ' '.join(all_pred_words)
  b = TextBlob(text)
  all_pred_words_corrected = b.correct().split(' ')
  return all_pred_words_corrected


In [0]:

accuraccy, bad_preds = get_accuraccy(all_pred_words, all_true_words, word_mode=0)
all_pred_words_corrected =  autocorrect(all_pred_words)

print('p: {}'.format(all_pred_words))
print('c: {}'.format(all_pred_words_corrected))
print('t: {}'.format(all_true_words))

after_accuraccy, bad_preds = get_accuraccy(all_pred_words_corrected, all_true_words, word_mode=0)

print('accuraccy:', accuraccy, sep =' ')
print('after correct accuraccy:', after_accuraccy, sep =' ')
loss_fall = 1 - (1-after_accuraccy) / (1-accuraccy) # який відсоток займає after correct помилка від простої
print('loss fall: {} %'.format(loss_fall*100))

# highlight

## get word coords

In [0]:
def get_words(img):
    """
    Function returns bboxes for each word and for chars in it.
    
    Parameters
    ----------
    img: np.array
    Thresholded grayscale image.
    
    Returns
    -------
    words: list
    Array of words bboxes in the form (x, y, w, h) where (x, y) - lower left point, h - height of bbox, w - width
    
    chars: list of lists
    Array which contains arrays that represent coordinates of each  character in word
    """
    
    # here we take maximum elements by word so we can detect rows with words
    # if there is a word the value would be 255, if not - 0
    maxs = np.max(img, axis=1) # maximum element by row
    indxs_y = np.where(maxs[:-1] != maxs[1:])[0] # indexes of elements where is transition from 255 to 0 or vice versa
    words = [] # list for storing words bboxes
    chars = [] # list for storing characters bboxes

    # In this loop we do the same operation but get max values by column so we can know
    # Where the character ends and starts
    for indx_y in range(0, len(indxs_y)-1, 2):

        distances = [] # list to store distances between 
        cordinates = []
        row = img[indxs_y[indx_y]:indxs_y[indx_y+1]] # take row of the text
        maxs1 = np.max(row, axis=0)
        indxs_x = np.where(maxs1[:-1] != maxs1[1:])[0]
        last_x = indxs_x[0]
        for indx_x in range(0, len(indxs_x)-1, 2):
            cordinates.append((
                indxs_x[indx_x],
                indxs_y[indx_y],
                indxs_x[indx_x+1] - indxs_x[indx_x],
                indxs_y[indx_y+1] - indxs_y[indx_y]
            ))
            distances.append(indxs_x[indx_x] - last_x)
            last_x = indxs_x[indx_x+1]

        temp = get_words_cordinates(cordinates, distances)
        if temp == -1:
            continue
        
        temp_words = temp[0]
        temp_chars = temp[1]
            
        words += temp_words
        chars += temp_chars
        
    return words, chars


def get_words_cordinates(cordinates, distances):
    """
    Parameters
    ----------
    cordinates: tuple or list which contains bboxes for each character in form - (x, y, w, h), where (x, y) - lower left point, h -       height of bbox, w - width
    
    """
    hist, bins = np.histogram(distances) # get histogram of distances between characters
    
    divider = find_divider(hist, bins) # get value by which we will decide if character belong to current word or starts next
    words = []
    chars = []
    x, y, w, h = cordinates[0]
    
    if w*h < 25: # weeding out anomalies
        return -1
    
    if check_for_one_word_in_line(distances): # check is it only one word in row so we can treat this case different
        for i, (x_t, y_t, w_t, h_t) in enumerate(cordinates[1:]):
            y = min(y, y_t)
            h = max(h, h_t)
            w += w_t + distances[i]
        
        words.append((x, y, w+distances[i+1], h))
        chars = cordinates
    else:
        start = 0 # start index for current word
        end = 0 # end index for current word
        for i, item in enumerate(distances[1:]):
            if item < divider:
                x_t, y_t, w_t, h_t = cordinates[i+1]
                y = min(y, y_t)
                w += w_t + item
                h = max(h, h_t)
                end += 1
            else:
                words.append((x, y, w, h))
                x, y, w, h = cordinates[i+1]
                end += 1
                chars.append(cordinates[start:end])
                start = end

        chars.append(cordinates[start:])
        words.append((x, y, w, h))
        
    return words, chars

def find_divider(hist, bins):
    """
    Function finds distance value.
    
    If distance between separate characters more than this value, characters are in the same word and if distance lower -                 characters belong to different words
    """
    left = 0
    right = len(hist)-1
    
    while hist[left] == 0:
        left += 1
        
    while hist[right] == 0:
        right -= 1
    
    divider = bins[left]*0.5 + bins[right]*0.5
    
    return divider
        
def check_for_one_word_in_line(distances):
    """
    Function chechs whether it is only one word in line
    """
    mean = np.mean(distances)
    var = np.var(distances)
    
    if mean > var:
        return True

    return False       

## get words images by coords

In [0]:
# image_address - адреса зображення
# words_images - зображення слів із одного зображення тексту 
def get_words_images_and_cords(image_address, save_words_bboxes=0, save_dir_path=None):
  image = cv2.imread(image_address, 0)
  ret, thresh = cv2.threshold(image,180,255,cv2.THRESH_BINARY_INV)
  #w,h = image.shape[1], image.shape[0]
  #image = cv2.resize(image, (w*2, h*2), interpolation = cv2.INTER_CUBIC)
  word_coords, _ = get_words(thresh)
  if save_words_bboxes:
    plot_bboxes(word_coords, deepcopy(image), dir_path=save_dir_path)
  word_images = get_images_by_coords_and_preprocess(word_coords, image)                            # !!! PREPROCESS
  return word_images, word_coords, image
  

# images_dir_path - адреса директорії зі зображеннями
# all_words_images [images, word_images] - зображення всіх слів із кожного зображення тексту 
def get_all_words_images_and_coords(images_dir_path, write=0, save_dir_path=None):
  all_words_images = []
  all_words_coords = []
  all_text_images = []
  for address in sort_int_filenames(os.listdir(images_dir_path)):                                                     
    if address.split('.')[-1] == 'jpg' or 'JPG':
      word_images, word_coords, text_image = get_words_images_and_cords(os.path.join(images_dir_path, address), write, save_dir_path)
      all_words_images.append(word_images)
      all_words_coords.append(word_coords)
      all_text_images.append(text_image)
  return all_words_images, all_words_coords, all_text_images


# filenames - масив назв файлів: ['2.jpg', '1.jpg']
# sorted_filenames - посортований масив назв файлів: ['1.jpg', '2.jpg']
def sort_int_filenames(filenames):
  names = [int(name.split('.')[0]) for name in filenames]
  sorted_names = sorted(names)
  sorted_filenames = [str(name)+'.JPG' for name in sorted_names]
  return sorted_filenames


# бере частини із зображення згідно координатам bbox
def get_images_by_coords_and_preprocess(coords, image):
  images = []
  for (x,y,w,h) in coords:
    word_image = deepcopy(image[y:y+h, x:x+w])
    word_image = pad_image(word_image)
    word_image = cv2.bilateralFilter(word_image,9,100,100) #diameter,
    images.append(word_image)
  return images


# виводить зображення в заданому colorspace
def plot_image(image, colorspace='gray'):
  if colorspace:
    plt.imshow(image, colorspace) 
  plt.show()
  

# image [h, w] - зображення
#
# pad_image [32, 128] - змінене зображення 
def pad_image(image):
  h, w = image.shape
  if w>128:
    image = cv2.resize(image, (128, int((128/w)*h)), interpolation=cv2.INTER_AREA)
  elif h>32:
    image = cv2.resize(image, (int((32/h)*w), 32), interpolation=cv2.INTER_AREA)
  h, w = image.shape

  pad_h = 32-h
  pad_w = 128-w
  pad_image = np.pad(image, ((pad_h, 0), (0,pad_w)), 'constant', constant_values=(255))
  return pad_image
  

## highlight

In [0]:
from collections import Counter
# y_pred [batch, 32, num_classes] - батч розподілів ймовірностей 
# alphabet_str [num_classes] - алфавіт, символи всіх класів (lower+upper+digits+.)
#
# pred_words [batch_size, ?] - батч передбачених слів
def get_words_arr_from_y(y_pred, alphabet_str):
  pred_words = []
  for i in range(len(y_pred)):
    pred_word = decode_label(y_pred[i], alphabet_str)
    pred_words.append(pred_word)
    
  return pred_words


# image [32, 128] - зображення тексту
# words [words_num] - масив передбачених слів
# words_coords [words_num] - масив передбачених координат
def highlight_most_frequent(image, words, words_coords):
  highlighted_img = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
  words = [w.lower() for w in words]
  counter = Counter(words)
  if not counter.most_common(1):
    return
  most_frequent_word = counter.most_common(1)[0][0]
  for i, word in enumerate(words):
    if word == most_frequent_word:
      (x, y, w, h) = words_coords[i]
      temp_area = deepcopy(highlighted_img[y:y+h, x:x+w])
      ret, thresh = cv2.threshold(image,127,255,cv2.THRESH_BINARY_INV)
      mask = deepcopy(thresh[y:y+h, x:x+w])
      temp_area[mask == 255] = [255, 0, 0]
      highlighted_img[y:y+h, x:x+w] = temp_area 
  return highlighted_img


# font_names [fonts_n] - масив назв шрифтів
# imgs_range [2] - індекси початкового та кіневого-1 зображення 
# alphabet_str - стрічка з алфавітом
# labels_path - шлях до y.txt
# save_dir_path - шлях до директорії для зображеннь
def save_highlights_from_images(model, images_dir_path, labels_path, batch_size, alphabet_str, save_dir_path):
  print('get_words')
  all_words_images, all_words_coords, all_text_images = get_all_words_images_and_coords(images_dir_path, write=0, save_dir_path=None)
  
  print('get_labels and indexes')
  #all_labels_words = get_labels_words(labels_path)
  #all_labels_indexes = get_labels_indexes(all_labels_words, alphabet_str) #[img_n, word_n]
  
  print('start get preds batches')
  for i in range(len(all_words_images)):
    img_pred_words = []
    offset = 0
    word_num = len(all_words_images[i])
    
    while offset < word_num:
      batch_x = []
      if offset+batch_size > word_num:
        batch_x += [all_words_images[i][offset:]]
        offset = word_num
      else:
        batch_x += [all_words_images[i][offset:batch_size]]
        offset+=batch_size
        
      batch_x = np.asarray(batch_x).T  
      batch_x = np.reshape(batch_x, [-1,128,32,1])
      y_pred = model.predict_on_batch(batch_x)  
      
      img_pred_words += get_words_arr_from_y(y_pred, alphabet_str)
      
    print('highlight')
    highlighted_image = highlight_most_frequent(all_text_images[i], img_pred_words, all_words_coords[i]) 
    print('save')
    cv2.imwrite(os.path.join(save_dir_path, str(i)+'.jpg'), highlighted_image)
      

## test

In [0]:
%%time
kwargs = {
    'model': model,
    'images_dir_path': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/test_on_text/text_imgs_20',
    'labels_path': r'',
    'batch_size': 5,
    'alphabet_str': string.ascii_letters + string.digits + '.',
    'save_dir_path': r'/content/drive/My Drive/Colab Notebooks/course_project/data_crnn/test_on_text/result'
}

save_highlights_from_images(**kwargs)