In [11]:
import os
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Embedding, LSTM
import tensorflow as tf
import pickle

In [12]:
if not os.path.exists("handwritten_text_generator.h5"):
    def load_image_paths(directory):
        image_paths = []
        for root, _, files in os.walk(directory):
            for file in files:
                if file.endswith(('.jpg', '.jpeg', '.png')):
                    image_paths.append(os.path.join(root, file))
        return image_paths

    train_image_paths = load_image_paths('train_v2')
    test_image_paths = load_image_paths('test_v2')
    validation_image_paths = load_image_paths('validation_v2')


In [13]:
if not os.path.exists("handwritten_text_generator.h5"):
    def load_data(file_path):
        df = pd.read_csv(file_path)
        return dict(zip(df['FILENAME'], df['IDENTITY']))

    train_texts = load_data('written_name_train_v2.csv')
    test_texts = load_data('written_name_test_v2.csv')
    validation_texts = load_data('written_name_validation_v2.csv')


In [14]:
if not os.path.exists("handwritten_text_generator.h5"):
    def create_image_text_pairs(image_paths, texts):
        pairs = []
        for path in image_paths:
            file_name = os.path.basename(path)
            if file_name in texts:
                pairs.append((path, texts[file_name]))
        return pairs

    train_pairs = create_image_text_pairs(train_image_paths, train_texts)
    test_pairs = create_image_text_pairs(test_image_paths, test_texts)
    validation_pairs = create_image_text_pairs(validation_image_paths, validation_texts)

In [15]:
def preprocess_image(image_path, target_size=(128, 32)):
        image = load_img(image_path, color_mode='grayscale', target_size=target_size)
        image = img_to_array(image) / 255.0
        return image

if not os.path.exists("handwritten_text_generator.h5"):

    train_images = np.array([preprocess_image(path) for path, _ in train_pairs])
    test_images = np.array([preprocess_image(path) for path, _ in test_pairs])
    validation_images = np.array([preprocess_image(path) for path, _ in validation_pairs])

    train_texts = [text for _, text in train_pairs]
    test_texts = [text for _, text in test_pairs]
    validation_texts = [text for _, text in validation_pairs]

In [16]:
if not os.path.exists("handwritten_text_generator.h5") or not os.path.exists("tokenizer.pickle"):
    def filter_non_string_texts(texts):
        return [text for text in texts if isinstance(text, str)]

    train_texts = filter_non_string_texts(train_texts)
    test_texts = filter_non_string_texts(test_texts)
    validation_texts = filter_non_string_texts(validation_texts)

    tokenizer = Tokenizer(char_level=True)
    tokenizer.fit_on_texts(train_texts)
    total_chars = len(tokenizer.word_index) + 1

    with open('tokenizer.pickle', 'wb') as handle:
        pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

    train_sequences = tokenizer.texts_to_sequences(train_texts)

    def create_dataset(sequences, images, step=1):
        X_text = []
        X_image = []
        y = []
        for i, seq in enumerate(sequences):
            for j in range(0, len(seq) - step, step):
                X_text.append(seq[j:j+step])
                X_image.append(images[i])
                y.append(seq[j+step])
        return np.array(X_text), np.array(X_image), np.array(y)

    step = 5
    X_text, X_image, y = create_dataset(train_sequences, train_images, step)

    input_text = tf.keras.layers.Input(shape=(step,))
    input_image = tf.keras.layers.Input(shape=(128, 32, 1))

    x_text = Embedding(total_chars, 50)(input_text)
    x_text = LSTM(128, return_sequences=True)(x_text)
    x_text = LSTM(128)(x_text)

    x_image = tf.keras.layers.Conv2D(32, (3, 3), activation='relu')(input_image)
    x_image = tf.keras.layers.MaxPooling2D((2, 2))(x_image)
    x_image = tf.keras.layers.Conv2D(64, (3, 3), activation='relu')(x_image)
    x_image = tf.keras.layers.MaxPooling2D((2, 2))(x_image)
    x_image = tf.keras.layers.Flatten()(x_image)
    x_image = tf.keras.layers.Dense(128, activation='relu')(x_image)

    x = tf.keras.layers.Concatenate()([x_text, x_image])
    output = tf.keras.layers.Dense(total_chars, activation='softmax')(x)

    model = tf.keras.models.Model(inputs=[input_text, input_image], outputs=output)
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')

    model.summary()

In [17]:
if not os.path.exists("handwritten_text_generator.h5"):
    model.fit([X_text, X_image], y, epochs=20, batch_size=64, validation_split=0.2)

In [18]:
if not os.path.exists("handwritten_text_generator.h5"):
    model.save('handwritten_text_generator.h5')

In [19]:
model = load_model('handwritten_text_generator.h5')

with open('tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

def generate_text(model, tokenizer, seed_text, num_chars, image, step=5):
    result = seed_text
    for _ in range(num_chars):
        sequence = tokenizer.texts_to_sequences([result])[-1]
        sequence = pad_sequences([sequence], maxlen=step, padding='pre')
        predicted = model.predict([sequence, np.expand_dims(image, axis=0)], verbose=0)
        predicted_char_index = np.argmax(predicted)
        predicted_char = tokenizer.index_word.get(predicted_char_index, '')
        if predicted_char:
            result += predicted_char
        else:
            break
    return result

seed_text = "example"
num_chars = 100
image = preprocess_image('validation_v2\\validation\\VALIDATION_0022.jpg')
generated_text = generate_text(model, tokenizer, seed_text, num_chars, image)
print(generated_text)


examplet
