In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
%matplotlib tk

# Setup

In [2]:
number = [str(x) for x in range(10)]
alphabet = [chr(x) for x in range(97, 97+26)]
ALPHABET = [chr(x) for x in range(65, 65+26)]
CHARS_SET = number + alphabet + ALPHABET
CHARS_SET_LEN = len(CHARS_SET)

HEIGHT = 60
WIDTH = 160
CHARS_NUM=4

In [3]:
def text2vec(text):
    vector = np.zeros([len(text),CHARS_SET_LEN])
    for i,c in enumerate(text):
        idx = CHARS_SET.index(c)
        vector[i][idx] = 1
    return vector

def vec2text(vec):
    text = []
    for i, c in enumerate(vec):
        text.append(CHARS_SET[np.argmax(c)])
    return "".join(text)

In [4]:
def mklabel(label):
    labels = np.zeros([label.shape[0],4,CHARS_SET_LEN])
    for i,c in enumerate(label):
        labels[i] = text2vec(c.decode())
    return tf.constant(labels)

In [5]:
image_feature_description={
    "Label" : tf.io.FixedLenFeature([],tf.string),
    "Image_raw" : tf.io.FixedLenFeature([],tf.string),
}

def parse_tf_example(example):
    parse_example = tf.io.parse_single_example(example,image_feature_description)
    image = tf.io.decode_raw(parse_example["Image_raw"],tf.uint8) 
    image = tf.reshape(image,(HEIGHT,WIDTH,3))
    label = parse_example["Label"]
    
    return label,image

# Check TFRecord

In [6]:
raw_data = tf.data.TFRecordDataset("train.tfrecord")
dataset = raw_data.map(parse_tf_example)
data_ds = dataset.shuffle(buffer_size=1000).batch(10)

In [7]:
for batch,(x,y) in enumerate(data_ds):
    labels = mklabel(x.numpy())
    print(y.shape)
    break

(10, 60, 160, 3)


In [8]:
labels.shape

TensorShape([10, 4, 62])

In [9]:
x

<tf.Tensor: shape=(10,), dtype=string, numpy=
array([b'9bvf', b'P805', b'5xrp', b'yOYM', b'XsO4', b'dWqT', b'7iuW',
       b'0jnA', b'QtoS', b'rIyQ'], dtype=object)>

In [10]:
lst = []
for i in labels:
    lst.append(vec2text(i))
np.asarray(lst)

array(['9bvf', 'P805', '5xrp', 'yOYM', 'XsO4', 'dWqT', '7iuW', '0jnA',
       'QtoS', 'rIyQ'], dtype='<U4')

# Make Model

In [11]:
# model = tf.keras.Sequential()

# model.add(tf.keras.layers.Conv2D(32, (3, 3)))
# model.add(tf.keras.layers.PReLU())
# model.add(tf.keras.layers.MaxPool2D((2, 2), strides=2))

# model.add(tf.keras.layers.Conv2D(64, (5, 5)))
# model.add(tf.keras.layers.PReLU())
# model.add(tf.keras.layers.MaxPool2D((2, 2), strides=2))

# model.add(tf.keras.layers.Conv2D(128, (5, 5)))
# model.add(tf.keras.layers.PReLU())
# model.add(tf.keras.layers.MaxPool2D((2, 2), strides=2))

# model.add(tf.keras.layers.Flatten())
# model.add(tf.keras.layers.Dense(4 * CHARS_SET_LEN))
# model.add(tf.keras.layers.Reshape([4, CHARS_SET_LEN]))
# model.add(tf.keras.layers.Softmax())

# model.build(input_shape=[None,60,160,1])
# model.compile(optimizer='Adam',
#                   metrics=['accuracy'],
#                   loss='categorical_crossentropy')

# model.save("model")

# Load Model

In [12]:
model = tf.keras.models.load_model("./model")

# Train

In [13]:
raw_data = tf.data.TFRecordDataset("train.tfrecord")
dataset = raw_data.map(parse_tf_example)
data_ds = dataset.shuffle(buffer_size=1000).batch(500)

for batch,(x,y) in enumerate(data_ds):
    label = mklabel(x.numpy())
    model.fit(y,label,epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [14]:
# model.save("model")

# Test

In [15]:
raw_data = tf.data.TFRecordDataset("test.tfrecord")
dataset = raw_data.map(parse_tf_example)
data_ds = dataset.shuffle(buffer_size=1000).batch(100)

lst = []

for i, (label, image) in enumerate(data_ds):
    labels = mklabel(label.numpy())
    
    pre = model.predict(image)
    pre_label = list(map(vec2text, pre))

    true_label = list(map(lambda x: x.decode(), label.numpy()))

    p = 0
    for i in range(len(pre_label)):
        for j in range(4):
            if pre_label[i][j] == true_label[i][j]:
                p += 1
    # print("{:.3f}".format(p / (4 * len(pre_label))))
    lst.append(p / (4 * len(pre_label)))

print("final:{}".format(np.array(lst,dtype=float).mean()))

final:0.775


In [16]:
np.asarray(pre_label),np.asarray(true_label)

(array(['BPTK', '2oce', 'h7A9', '6im2', 'ABFt', 'rntW', 'bLVo', 'zh73',
        '09yW', 'wWp7', 'kaTJ', '0HfZ', 'D8q2', 'jm9W', 'uXXQ', 'XqNY',
        'N0y7', 'jnXC', 'Z0Ki', 'EfBz', '1all', 'CpS6', 'lUNK', 'dY6f',
        '37uF', 'L1IY', '8AIP', 'Yhvd', 'Wowd', '11zL', 'K64t', 'DJ4Y',
        'OXLp', 'fsU9', 'Wh24', 'EfDW', 'Ssuv', 'Lcs3', 'pA7I', 'Dq7P',
        'v8MN', 'CBfx', 'tXVu', 'ITLw', 'y4t1', 'Gb6y', 'BVO5', 'Gk4V',
        '9zhP', '4mvl', 'fDs0', '6ZU3', 'cmFe', 'DIfg', 'XMMS', 'vgJ9',
        '5FBj', 'M4qs', 'Qnh8', '0CmN', 'H4pp', 'Y5cL', '9F3S', 'wlNb',
        '02ez', 'Vxb0', 'WCRR', '6eez', 'gka3', 'Jpps', 'G3CK', 'kUx0',
        '5lXx', 'r2GV', '9Sjw', 'v6oS', 'JSS3', 'ZTVb', '699F', 'nwwR',
        'JyiQ', 'mYn1', 'ATQ0', '5PJl', 'G33T', 'SkkO', 'mI0e', 'ZtCk',
        'Lovd', 'NIZa', 'Cdds', 'UnIJ', 'LnIB', '4Eus', '8rbf', '9bnh',
        'qkDW', 'EJyT', 'LzzZ', 'haZs'], dtype='<U4'),
 array(['BPTK', '3Cae', 'h7A9', '6im2', 'ABkt', 'nVtW', 'bLvo', 'zh73',
        '

In [17]:
fig, ax = plt.subplots(10, 5)
for i in range(10):
    for j in range(5):
        tar = i * 10 + j
        ax[i][j].imshow(image[tar, ..., 0].numpy())
        color = "blue" if pre_label[tar] == true_label[tar] else "red"
        ax[i][j].set_title(f"True: {true_label[tar]}")
        ax[i][j].set_ylabel(pre_label[tar], color=color)