# Import Package

In [None]:
import os
import tensorflow as tf
import keras.src.legacy.backend
from tensorflow.keras.layers import *
from CaptchaDataset import Dataset

BATCH_SIZE = 512
IMG_HEIGHT = 60
IMG_WIDTH = 160
CHANNEL = 3
CAPTCHA_LENGTH = 4
N_LABELS = 62
ds = Dataset(height=IMG_HEIGHT, width=IMG_WIDTH)
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True) # distribute GPU memory dynamic.
        print(f"{len(gpus)} GPU(s) available. Memory growth enabled.")
    except RuntimeError as e:
        print(e)
else:
    print("No GPU detected.")

# Create Dataset

In [None]:
# ds.build_dataset()

# Preprocessing Data

In [33]:
# Load captcha positions and labels
train_name, train_labels = ds.load_labels('datasets/train/labels.csv')
val_name, val_labels = ds.load_labels('datasets/val/labels.csv')

# Create train dataset
train_dataset = ds.captcha_image_generator('datasets/train', train_name, train_labels)
# batch()：Divide dataset according to batch size；prefetch()：Read data previously
train_dataset = train_dataset.batch(BATCH_SIZE).prefetch(buffer_size=tf.data.AUTOTUNE)

# Create validation dataset
val_dataset = ds.captcha_image_generator('datasets/val', val_name, val_labels)
val_dataset = val_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# Model Define

In [34]:
def RNN_Model():
    model = keras.Sequential([
        # 第一组卷积
        Conv2D(32, (3, 3), activation='relu', padding='same'), # 每个卷积核生成一张特征图，所以输出通道数为卷积核数量
        BatchNormalization(),
        MaxPooling2D((2, 2)),  # 高度和宽度都减半：60->30, 160->80
        Dropout(0.25),
        
        # 第二组卷积
        Conv2D(64, (3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),  # 30->15, 80->40
        Dropout(0.25),
        
        # 第三组卷积
        Conv2D(128, (3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D((2, 2)),  # 15->7, 40->20
        Dropout(0.25),
        
        # 第四组卷积 - 将宽度减少到接近4
        Conv2D(128, (3, 3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D((1, 5)),  # 高度不变，宽度：20->4
        Dropout(0.25),
        
        # 当前shape - (None, 7, 4, 128)
        # 将高度和通道展平为特征
        # 每个字符是一个时间步，4个时间步，每个时间步有896个特征
        Reshape((4, 7 * 128)),  # (时间步，特征数)
        
        # LSTM - 一种RNN架构，可以让模型像人一样从左往右看，并记住重要的信息进行判断
        # Bidirectional - 让模型也可以从右往左看
        Bidirectional(LSTM(128, return_sequences=True)),
        BatchNormalization(),
        Dropout(0.3),
        
        # 对每一个时间步输出的字符都运用Dense()
        TimeDistributed(Dense(64, activation='relu')),
        BatchNormalization(),
        Dropout(0.3),
        
        # 输出层 - 每个时间步对应一个字符
        TimeDistributed(Dense(N_LABELS, activation='softmax')),
    ])
    return model

In [39]:
model = RNN_Model()
model.build(input_shape=[None, IMG_HEIGHT, IMG_WIDTH, CHANNEL]) # 批次大小设置为None模型可以接收批次任意大小的数据
optimizer = keras.optimizers.Adam(
    learning_rate=0.0005,  # RNN通常需要稍大的学习率
    clipnorm=1.0
)


model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

# Callbacks

In [43]:
root = os.getcwd()
ckpt_path = os.path.join(root, 'checkpoints')
if not os.path.exists(ckpt_path):
    os.mkdir(ckpt_path)
weight_file = os.path.join(ckpt_path, 'CNN_RNN_{epoch:04d}.keras')

ck_callbacks = keras.callbacks.ModelCheckpoint(
    filepath=weight_file,
    monitor='val_accuracy',
    save_best_only=True,
    save_weights_only=False,
    verbose = 1
)
tb_callbacks = keras.callbacks.TensorBoard(
    log_dir='./logs',
    histogram_freq=1,
    update_freq=1
)
# 动态调整学习率
lr_scheduler = keras.callbacks.ReduceLROnPlateau(
    monitor='val_accuracy',
    factor=0.5,     # 降低因子
    patience=5,
    min_lr=1e-7,    # 最小学习率
    verbose=1
)
early_stop = keras.callbacks.EarlyStopping(
    monitor='val_accuracy',
    patience=15,
    restore_best_weights=True,
    min_delta=0.001
)

# Model Training

In [44]:
with tf.device('/GPU:0'):
    history = model.fit(x=train_dataset, batch_size=BATCH_SIZE, validation_data=val_dataset, epochs=400, callbacks=[ck_callbacks, tb_callbacks, lr_scheduler, early_stop])

Epoch 1/400
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 522ms/step - accuracy: 0.3694 - loss: 2.1619
Epoch 1: val_accuracy improved from -inf to 0.08341, saving model to /kaggle/working/checkpoints/CNN_RNN_0001.keras
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 571ms/step - accuracy: 0.3696 - loss: 2.1610 - val_accuracy: 0.0834 - val_loss: 5.7672 - learning_rate: 5.0000e-04
Epoch 2/400
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 516ms/step - accuracy: 0.4983 - loss: 1.6853
Epoch 2: val_accuracy improved from 0.08341 to 0.12061, saving model to /kaggle/working/checkpoints/CNN_RNN_0002.keras
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 566ms/step - accuracy: 0.4984 - loss: 1.6847 - val_accuracy: 0.1206 - val_loss: 5.7824 - learning_rate: 5.0000e-04
Epoch 3/400
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 518ms/step - accuracy: 0.5879 - loss: 1.3562
Epoch 3: val_accuracy improved f