In [None]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models

import os
import time
import json
import glob

import matplotlib.pyplot as plt
from PIL import Image

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns

tf.__version__

In [None]:
import os
from PIL import Image

# Download and extract the zip file
!curl -LO https://github.com/MrTimonM/cap-dataset/releases/download/v1.10/train.zip
!unzip -qq train.zip

# Define the directory where the extracted images are located
extracted_dir = 'train'

# Loop through all the images in the extracted directory
for filename in os.listdir(extracted_dir):
    filepath = os.path.join(extracted_dir, filename)
    # Open the image
    with Image.open(filepath) as img:
        # Resize the image to the desired dimensions
        img_resized = img.resize((270, 130))
        # Save the resized image, overwriting the original
        img_resized.save(filepath)

print("All images resized successfully.")


In [3]:
DATA_DIR = 'train'
H, W, C = 130, 270, 3  # height, width, 3(RGB channels)
label_choices = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'
# N_LABELS = len(label_choices)  # label_size
N_LABELS = 256
D = 4 # num_per_image

In [4]:
def parse_filepath(filepath):
    try:
        path, filename = os.path.split(filepath)
        filename, ext = os.path.splitext(filename)
        label, _ = filename.split("_")
        return label
    except Exception as e:
        print('error to parse %s. %s' % (filepath, e))
        return None, None

In [None]:
import os
import glob2
import pandas as pd

# Function to parse file path and extract attributes
def parse_filepath(filepath):
    filename = os.path.basename(filepath)
    label = filename.split('.')[0]  # Assuming label is the part before the first '.'
    return label, filepath

# Create a pandas DataFrame of images, age, gender, and race
DATA_DIR = 'train'  # Update this with your data directory path
files = glob2.glob(os.path.join(DATA_DIR, "*.png"))

if not files:
    raise ValueError("No image files found in the specified directory.")

attributes = [parse_filepath(file) for file in files]

# Check if attributes have consistent length
if attributes:
    num_attributes = len(attributes[0])
    for attr in attributes:
        if len(attr) != num_attributes:
            raise ValueError("Attributes have inconsistent length.")

    # Create DataFrame
    df = pd.DataFrame(attributes, columns=['label', 'file'])

    # Drop any rows with NaN values
    df = df.dropna()

    # Display the DataFrame
    print(df.head())
else:
    print("No files found.")


In [12]:
import numpy as np  # Add this line to import NumPy

p = np.random.permutation(len(df))
train_up_to = int(len(df) * 0.7)
train_idx = p[:train_up_to]
test_idx = p[train_up_to:]

# split train_idx further into training and validation set
train_up_to = int(train_up_to * 0.7)
train_idx, valid_idx = train_idx[:train_up_to], train_idx[train_up_to:]

print('train count: %s, valid count: %s, test count: %s' % (
    len(train_idx), len(valid_idx), len(test_idx)))


train count: 970, valid count: 417, test count: 595


In [13]:
from tensorflow.keras.utils import to_categorical
from PIL import Image


def get_data_generator(df, indices, for_training, batch_size=16):
    images, labels = [], []
    while True:
        for i in indices:
            r = df.iloc[i]
            file, label = r['file'], r['label']
            im = Image.open(file)
#             im = im.resize((H, W))
            im = np.array(im) / 255.0
            images.append(np.array(im))
            labels.append(np.array([np.array(to_categorical(ord(i), N_LABELS)) for i in label]))
            if len(images) >= batch_size:
#                 print(np.array(images), np.array(labels))
                yield np.array(images), np.array(labels)
                images, labels = [], []
        if not for_training:
            break

In [None]:
import tensorflow as tf  # Add this line to import TensorFlow
from tensorflow.keras import layers, models  # Add this line to import layers and models from TensorFlow.keras

input_layer = tf.keras.Input(shape=(H, W, C))
x = layers.Conv2D(32, 3, activation='relu')(input_layer)
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Conv2D(64, 3, activation='relu')(x)
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Conv2D(64, 3, activation='relu')(x)
x = layers.MaxPooling2D((2, 2))(x)

x = layers.Flatten()(x)
x = layers.Dense(1024, activation='relu')(x)

x = layers.Dense(D * N_LABELS, activation='softmax')(x)
x = layers.Reshape((D, N_LABELS))(x)

model = models.Model(inputs=input_layer, outputs=x)

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.summary()


In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint

batch_size = 64
valid_batch_size = 64
train_gen = get_data_generator(df, train_idx, for_training=True, batch_size=batch_size)
valid_gen = get_data_generator(df, valid_idx, for_training=True, batch_size=valid_batch_size)

callbacks = [
    ModelCheckpoint("./model_checkpoint", monitor='val_loss')
]

history = model.fit(train_gen,
                    steps_per_epoch=len(train_idx)//batch_size,
                    epochs=3,
#                     callbacks=callbacks,
                    validation_data=valid_gen,
                    validation_steps=len(valid_idx)//valid_batch_size)

In [None]:
import matplotlib.pyplot as plt
def plot_train_history(history):
    fig, axes = plt.subplots(1, 2, figsize=(20, 5))

    axes[0].plot(history.history['accuracy'], label='Train accuracy')
    axes[0].plot(history.history['val_accuracy'], label='Val accuracy')
    axes[0].set_xlabel('Epochs')
    axes[0].legend()

    axes[1].plot(history.history['loss'], label='Training loss')
    axes[1].plot(history.history['val_loss'], label='Validation loss')
    axes[1].set_xlabel('Epochs')
    axes[1].legend()

plot_train_history(history)
plt.show()

In [None]:
# evaluate loss and accuracy in test dataset
test_gen = get_data_generator(df, test_idx, for_training=False, batch_size=128)
dict(zip(model.metrics_names, model.evaluate(test_gen, steps=len(test_idx)//128)))

In [20]:
if not os.path.isdir('./saved_model'): os.mkdir('./saved_model')
model.save('saved_model/alexnet-4char-with-upper-letters')

In [21]:
test_gen = get_data_generator(df, test_idx, for_training=False, batch_size=128)
x_test, y_test = next(test_gen)

y_pred = model.predict_on_batch(x_test)

y_true = tf.math.argmax(y_test, axis=-1)
y_pred = tf.math.argmax(y_pred, axis=-1)

In [22]:
def format_y(y):
    return ''.join(map(lambda x: chr(int(x)), y))

In [None]:
import math
n = 30
random_indices = np.random.permutation(n)
n_cols = 5
n_rows = math.ceil(n / n_cols)
fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 20))
for i, img_idx in enumerate(random_indices):
    ax = axes.flat[i]
    ax.imshow(x_test[img_idx])
    ax.set_title('pred: %s' % format_y(y_pred[img_idx]))
    ax.set_xlabel('true: %s' % format_y(y_true[img_idx]))
    ax.set_xticks([])
    ax.set_yticks([])

plt.show()