In [68]:
import tensorflow as tf
import PIL
import random
import os
import shutil
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
from tensorflow import keras
import json
import matplotlib.pyplot as plt 
import matplotlib.pyplot as plt

In [69]:
def makeDataSet(directory, tokenizer, train_ratio):
    files = os.listdir(directory)
    random.shuffle(files)
    x_train=[]
    y_train=[]
    x_validation=[]
    y_validation=[]
    for f in files[:int(train_ratio*len(files))]:
        pil=tf.keras.utils.load_img(os.path.join(directory,f),color_mode='grayscale')
        npArray = tf.keras.utils.img_to_array(pil)/255.
        x_train.append(npArray)
        y_train.append(f[:-4])
    for f in files[int(train_ratio*len(files)):]:
        pil=tf.keras.utils.load_img(os.path.join(directory,f),color_mode='grayscale')
        npArray = tf.keras.utils.img_to_array(pil)/255.
        x_validation.append(npArray)
        y_validation.append(f[:-4])
    return np.array(x_train), np.array(y_train), np.array(x_validation), np.array(y_validation)

In [74]:

directory = "CAPTCHA"
files = os.listdir(directory)
random.shuffle(files)

labels = []

for i in files:
    labels.append(i[:-4])

    
tokenizer=tf.keras.preprocessing.text.Tokenizer(char_level=True, oov_token = "NA")

tokenizer.fit_on_texts(labels)

In [75]:
split_ratio = .9 
batch_size = 32
#xt,yt,xv,yv = makeDataSet(directory,tokenizer,.9)
xt = []
yt = []
xv = []
yv = []
for i in files[:int(len(files)*split_ratio)]:
    xt.append(os.path.join(directory,i))
    yt.append(tokenizer.texts_to_sequences(i[:-4]))
for i in files[int(len(files)*split_ratio):]:
    xv.append(os.path.join(directory,i))
    yv.append(tokenizer.texts_to_sequences(i[:-4]))

def encode_sample(img_path, label): 
    img = tf.io.read_file(img_path) 
    img = tf.io.decode_png(img, channels=1) 
    img = tf.image.convert_image_dtype(img, tf.float32) 
    img = tf.transpose(img, perm=[1, 0, 2])
    img = tf.image.resize(img, [150, 40]) 
    return {"image": img, "label": label} 

dataset_train = tf.data.Dataset.from_tensor_slices((xt, yt)) 
dataset_train = (dataset_train.map(encode_sample, num_parallel_calls=tf.data.AUTOTUNE) .batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE) ) 

dataset_validation = tf.data.Dataset.from_tensor_slices((xv, yv))
dataset_validation = (dataset_validation.map(encode_sample, num_parallel_calls=tf.data.AUTOTUNE).batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE))

class CTCLayer(tf.keras.layers.Layer):
    def __init__(self, name = None):
        super().__init__(name = name)
        self.loss_fn = tf.keras.backend.ctc_batch_cost

    def call(self, y_true, y_pred):
        batch_len = tf.cast(tf.shape(y_true)[0], dtype = "int64")
        input_length = tf.cast(tf.shape(y_pred)[1], dtype = "int64")
        input_length = input_length * tf.ones(shape = (batch_len,1), dtype = "int64")
        label_length = tf.cast(tf.shape(y_true)[1], dtype = "int64")
        label_length = label_length * tf.ones(shape = (batch_len,1), dtype = "int64")
        loss = self.loss_fn(y_true, y_pred, 
                            input_length, label_length)
        self.add_loss(loss)
        return y_pred
###

In [76]:

input_img = tf.keras.layers.Input(shape = (150, 40, 1), 
                               name = "image", dtype = "float32")
labels = tf.keras.layers.Input(name = "label", 
               shape = (None,), dtype = "float32")
x=tf.keras.layers.Conv2D(32, (3,3), activation ='relu')(input_img)
x=tf.keras.layers.MaxPooling2D(2,2)(x)
x=tf.keras.layers.Conv2D(64, (3,3), activation = 'relu')(x)
x=tf.keras.layers.MaxPooling2D(2,2)(x)
x = tf.keras.layers.Reshape(target_shape = ((36), 8*64))(x)
x=tf.keras.layers.Dense(512, activation ='relu')(x)
x=tf.keras.layers.Dropout(.5)(x)
x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences = True))(x)
x = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences = True))(x)
x=tf.keras.layers.Dense(512, activation ='relu')(x)
x=tf.keras.layers.Dense(len(tokenizer.get_config()['word_index'])+1, activation ='softmax', name = 'output')(x)
output=CTCLayer(name = "ctc_loss")(labels,x)
model = tf.keras.models.Model(
        inputs=[input_img, labels], outputs=output
    )
model.compile(optimizer = 'adam')
callback = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience =10)

In [None]:
history = model.fit(
            dataset_train,
            validation_data=dataset_validation,
            epochs=100,
            callbacks=[callback]
            )

Epoch 1/100
 79/563 [===>..........................] - ETA: 1:11 - loss: 31.2658

In [None]:

def decodeResult(code):
    results = []
    for label in code:
        results.append(tokenizer.sequences_to_texts(np.argmax(code,0))) 
    return results

In [None]:
predModel = tf.keras.models.Model(model.input[0], model.get_layer(name='output').output)


In [None]:
prediction_model = keras.models.Model(
    model.input[0], model.get_layer(name="output").output
)
  

In [None]:
def ctc_decode(y_pred):
    input_shape = tf.shape(y_pred)
    input_len = np.ones(y_pred.shape[0]) * y_pred.shape[1]
    y_pred = tf.math.log(tf.transpose(y_pred, perm=[1, 0, 2]) + keras.backend.epsilon())
    (decoded, log_prob) = tf.nn.ctc_greedy_decoder(inputs=y_pred, sequence_length=tf.cast(input_len, tf.int32))
    st = tf.SparseTensor(decoded[0].indices, decoded[0].values, (input_shape[0], input_shape[1]))
    st = tf.sparse.to_dense(sp_input=st, default_value=-1)
    return st


def decode_batch_predictions(pred):
    input_len = np.ones(pred.shape[0]) * pred.shape[1]
    results = ctc_decode(pred)[:,:5]
    output_text = []
    for ans in results:
        listfyied = [tf.make_ndarray(tf.make_tensor_proto(ans)).tolist()]
        ans_char = tf.strings.reduce_join(tokenizer.sequences_to_texts(listfyied)).numpy().decode("utf-8")
        output_text.append(ans_char)
    return output_text


#  Let's check results on some validation samples
for batch in dataset_validation.take(1):
    batch_images = batch["image"]
    batch_labels = batch["label"]
    preds = prediction_model.predict(batch_images)
    pred_texts = decode_batch_predictions(preds)
    
    image_num = 0
    for label in pred_texts:
        listfyied = [tf.make_ndarray(tf.make_tensor_proto(label)).tolist()]
        ans_char = tf.strings.reduce_join(tokenizer.sequences_to_texts(listfyied)).numpy().decode("utf-8")
        #PIL.Image.fromarray(batch_images[0])
        plt.imshow(tf.keras.utils.array_to_img(tf.transpose(batch_images[image_num],perm=[1, 0, 2])),cmap='gray')
        plt.show()
        print(ans_char)
        image_num +=1
        
        