In [None]:
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import numpy as np
import PIL
from functools import reduce

In [None]:
IMG_HEIGHT = 128
IMG_WIDTH = 128
def path2img(path):
    img = PIL.Image.open(path)
    return img
def path2imgarray(path):
    import cv2
    img = PIL.Image.open(path).resize((IMG_WIDTH, IMG_HEIGHT))
    img = np.array(img)
    def binaryzation(img):
        cv_img = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)
        return cv_img

    img = binaryzation(img) if len(img.shape) == 3 else img / 255.0
    img = img.reshape(IMG_HEIGHT, IMG_WIDTH, 1)
    return img



In [None]:
# 识别验证码模型
import functools
class CaptchaIdentifier(keras.Model):
    
    labelLen = 4
    
    charSet = [chr(ord('A') + i) for i in range(26)]
    charSetLen = len(charSet)
    def text2vector(self, text):
        vectors = np.zeros([self.labelLen, self.charSetLen], dtype=np.float32)
        for i, c in enumerate(text):
            vectors[i, self.charSet.index(c)] = 1.0
        return vectors
    def vector2text(self, vectors):
        return ''.join(map(lambda vector: chr(ord('A') + vector.index(np.max(vector))),vectors))
    def __init__(self):
        layers = keras.layers
        super().__init__()
        self.model = tf.keras.Sequential([
            layers.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 1)), # (128, 128, 1)

            layers.Conv2D(16, (7,7), activation='relu'),    # (128, 128, 16)
            layers.MaxPooling2D((2, 2)),                    # (64, 64, 16)
            layers.BatchNormalization(),
            
            layers.Conv2D(128, (3, 3), activation='relu'),  # (64, 64, 128)
            layers.MaxPooling2D((2, 2),),                   # (32, 32, 128)
            layers.BatchNormalization(),
            
            layers.Flatten(),                             
            layers.Dense(1024, activation='relu', ),        # (1024)
            layers.Dropout(0.2),
            
            layers.Dense(CaptchaIdentifier.labelLen * CaptchaIdentifier.charSetLen), # (144)
            layers.Reshape([CaptchaIdentifier.labelLen, CaptchaIdentifier.charSetLen]), # 144 -> (4, 36)

            layers.Softmax()                              # (4, 36) -> (4, 36)
        ])
        
                
    def call(self, x):
        return self.model(x)


In [None]:
# Create an instance of the model
identifier = CaptchaIdentifier()

trainLoss = tf.keras.metrics.Mean(name='trainLoss')
valLoss = tf.keras.metrics.Mean(name='valLoss')
trainAccuracy = tf.keras.metrics.CategoricalAccuracy(name='trainAccuracy')
valAccuracy = tf.keras.metrics.CategoricalAccuracy(name='valAccuracy')
calLoss = tf.keras.losses.CategoricalCrossentropy()

identifier.build(input_shape=(None, IMG_HEIGHT, IMG_WIDTH, 1))
identifier.summary()
# 查看每一层
print('model')

for layer in identifier.model.layers:
    print(layer.name, layer.output_shape)

In [None]:
def getTrainStap(model):
    optimizer = tf.keras.optimizers.Adam()
    @tf.function
    def trainStep(images, labels):
        with tf.GradientTape() as tape:
            predictions = model(images)
            # print(f'predict shape {predictions.shape}')
            loss = calLoss(labels, predictions)
            # print('calLoss end')
        gradients = tape.gradient(loss, model.trainable_variables)
        # print(1)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        # print(1)
        trainLoss(loss)
        trainAccuracy(labels, predictions)
        # print(1)
    return trainStep

def getcValStep(model):
    @tf.function
    def valStep(images, labels):
        predictions = model(images, training=False)
        loss = calLoss(labels, predictions)

        valLoss(loss)
        valAccuracy(labels, predictions)
    return valStep

In [None]:
import os
def basenameWithoutExt(path):
    return os.path.basename(path).split('.')[0]


# Data

In [None]:
BATCH_SIZE = 50

    
trainDir = './dataset1/train/'
testDir = './dataset1/test/'
# noise 和文件名是一一对应的
dataset = tf.data.Dataset.from_tensor_slices(
    ([path2imgarray(path) for path in tf.io.gfile.glob(trainDir + '*.jpg')], 
    [identifier.text2vector(basenameWithoutExt(path).replace('_resample', '')) for path in tf.io.gfile.glob(trainDir + '*.jpg')])
)
dataset = dataset.shuffle(buffer_size=1000)
dataset = dataset.batch(BATCH_SIZE)
dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

valDataset = tf.data.Dataset.from_tensor_slices(
    ([path2imgarray(path) for path in tf.io.gfile.glob(testDir + '*.jpg')],
    [identifier.text2vector(basenameWithoutExt(path).replace('_resample', '')) for path in tf.io.gfile.glob(testDir + '*.jpg')])
)

valDataset = valDataset.shuffle(buffer_size=1000)
valDataset = valDataset.batch(BATCH_SIZE)
valDataset = valDataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

print(len(dataset))

In [None]:
text = [*dataset.take(1)][0]
print(text[1].shape)
# print(text[1][0])
print(identifier.vector2text(np.array(text[1][0]).tolist()))

print(text[0].shape)
plt.imshow(text[0][0])

# print(text[0][0])
# identifier.text2vector(text.numpy().decode('utf-8'))

In [None]:
import time
# train face1
trainStep = getTrainStap(identifier)
valStep = getcValStep(identifier)
EPOCHS = 500

trainLossHistory = []
valLossHistory = []
trainAccHistory = []
valAccHistory = []
epochsRange = []



start = time.time()
for epoch in range(EPOCHS):
    trainLoss.reset_states()
    valLoss.reset_states()
    trainAccuracy.reset_states()
    valAccuracy.reset_states()
        
    for images, labels in dataset:
        trainStep(images, labels)
    
    # random validate
    for images, labels in valDataset.take(1):
        valStep(images, labels)

    
    print(
        f'Epoch {epoch + 1}, '
        f'\nTrain      Loss: {trainLoss.result()}, '
        f'Train      Acc: {trainAccuracy.result()} '
        f'\nValidation Loss: {valLoss.result()}'
        f'Validation  Acc: {valAccuracy.result()}'
        f' - {int((time.time() - start) / (epoch + 1) * (EPOCHS - epoch)) / 60} minutes left,\n '
    )
    trainLossHistory.append(trainLoss.result())
    valLossHistory.append(valLoss.result())
    trainAccHistory.append(trainAccuracy.result())
    valAccHistory.append(valAccuracy.result())
    epochsRange.append(epoch)



In [None]:
minloss = np.min(valLossHistory)
maxacc = np.max(valAccHistory)
epochln = len(epochsRange)


for i in range(4):
  # dpi = 150
  plt.figure(figsize=(5, 5), dpi=80)
  # 画虚线 y = minloss
  plt.plot([0, epochln], [minloss,minloss],linestyle='--',color='black')
  plt.text(0, minloss, '%.5f'%minloss, fontsize=10) 
  plt.plot(epochsRange[5 * i:], trainLossHistory[5 * i:], label='Training Loss')
  plt.plot(epochsRange[5 * i:], valLossHistory[5 * i:], label='Validation Loss')
  # 图例
  plt.legend(loc='upper right')
  plt.show()

for i in range(4):
  plt.figure(figsize=(5, 5), dpi=80)
  # 画虚线 y = maxacc
  plt.plot([0, epochln], [maxacc,maxacc],linestyle='--',color='black')
  plt.text(0, maxacc, '%.5f'%maxacc, fontsize=10)
  plt.plot(epochsRange[5 * i:], trainAccHistory[5 * i:], label='TrainingAcc')
  plt.plot(epochsRange[5 * i:], valAccHistory[5 * i:], label='ValidationAcc')
  # 图例
  plt.legend(loc='upper right')
  plt.show()

In [None]:
text = [*valDataset.take(1)][0]
print(text[1][0])
print(identifier(text[0][:1]))
print(identifier.vector2text(np.array(text[1][0]).tolist()))
print(identifier.vector2text(np.array(identifier(text[0][:1],training=False)[0]).tolist()))
change = tf.keras.Sequential([
              # keras.layers.RandomFlip("horizontal"),
            keras.layers.RandomRotation(0.01),
            keras.layers.RandomZoom(0.1),
])
plt.imshow(change(text[0][0]))
