In [1]:
import numpy as np
import pandas as pd
from PIL import Image

In [2]:
df = pd.read_csv("written_name_train_v2.csv")

In [3]:
df.head()

Unnamed: 0,FILENAME,IDENTITY
0,TRAIN_00001.jpg,BALTHAZAR
1,TRAIN_00002.jpg,SIMON
2,TRAIN_00003.jpg,BENES
3,TRAIN_00004.jpg,LA LOVE
4,TRAIN_00005.jpg,DAPHNE


In [4]:
df.shape

(29999, 2)

In [5]:
def load_images(image_paths, target_size=(128, 128)):
    images = []
    for path in image_paths:
        img = Image.open('train_v2/train/'+path).convert('L')
        img = img.resize(target_size)
        img = np.array(img) / 255.0
        images.append(img)
    return np.array(images)

In [6]:
image_paths = df['FILENAME'].values
images = load_images(image_paths)

In [12]:
def preprocess_text(texts):
    texts = [str(text) for text in texts]
    unique_chars = sorted(set(''.join(texts)))
    char_to_idx = {char: idx for idx, char in enumerate(unique_chars)}
    idx_to_char = {idx: char for char, idx in char_to_idx.items()}
    sequences = [[char_to_idx[char] for char in text] for text in texts]
    return sequences, char_to_idx, idx_to_char

In [13]:
ids = df['IDENTITY'].values
sequences, char_to_idx, idx_to_char = preprocess_text(ids)

In [10]:
ids

array(['BALTHAZAR', 'SIMON', 'BENES', ..., 'LEPERS', 'LUCIE', 'MARIE'],
      dtype=object)

In [14]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [15]:
max_seq_length = max([len(seq) for seq in sequences])
sequences = pad_sequences(sequences, maxlen=max_seq_length, padding='post')

In [7]:
#model building

In [16]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, TimeDistributed, Conv2DTranspose, Reshape, Flatten, RepeatVector
from tensorflow.keras.models import Model

In [19]:
images.shape

(29999, 128, 128)

In [20]:
vocab_size = len(char_to_idx)
embedding_dim = 256
rnn_units = 512
image_height, image_width = images.shape[1], images.shape[2]

In [21]:
# Encoder
input_text = Input(shape=(max_seq_length,))
embedding = Embedding(vocab_size, embedding_dim)(input_text)
encoder_lstm = LSTM(rnn_units, return_sequences=True, return_state=True)
encoder_output, state_h, state_c = encoder_lstm(embedding)

In [22]:
# Decoder
decoder_input = RepeatVector(image_height * image_width)(state_h)
decoder_lstm = LSTM(rnn_units, return_sequences=True, return_state=False)
decoder_output = decoder_lstm(decoder_input)
decoder_output = Reshape((image_height, image_width, rnn_units))(decoder_output)

In [25]:
output_image = Conv2DTranspose(1, (3, 3), activation='sigmoid', padding='same')(decoder_output)

In [26]:
model = Model(inputs=input_text, outputs=output_image)
model.compile(optimizer='adam', loss='binary_crossentropy')
model.summary()

In [27]:
#model training

In [28]:
# Reshape images to fit the model
images = np.expand_dims(images, axis=-1)

In [None]:
model.fit(sequences, images, epochs=10, batch_size=64, validation_split=0.2)