In [4]:
from keras.models import Model
from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization,GRU,Reshape,Bidirectional
from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
from keras.models import load_model
from PIL import Image
import numpy as np
import csv
from captcha_gen import generate


LETTERSTR = "0123456789ABCDEFGHJKLMNPQRSTUVWXYZ*"


def toonehot(text):
    labellist = []
    for letter in text:
        onehot = [0 for _ in range(35)]
        num = LETTERSTR.find(letter)
        onehot[num] = 1
        labellist.append(onehot)
    return labellist

# NN model 模型

In [5]:
#Create new NN Model
def build_new_model():
    print("Creating CNN model...")
    in_type =Input((60, 200, 3))
    out = in_type
    out = Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu')(out)
    out = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(out)
    out = BatchNormalization()(out)
    out = MaxPooling2D(pool_size=(2, 2))(out)
    out = Dropout(0.1)(out)
    out = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu')(out)
    out = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(out)
    out = BatchNormalization()(out)
    out = MaxPooling2D(pool_size=(2, 2))(out)
    out = Dropout(0.1)(out)
    out = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(out)
    out = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(out)
    out = BatchNormalization()(out)
    out = MaxPooling2D(pool_size=(2, 2))(out)
    out = Dropout(0.1)(out)
    out = Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu')(out)
    out = Conv2D(filters=256, kernel_size=(3, 3), activation='relu')(out)
    out = BatchNormalization()(out)
    out = MaxPooling2D(pool_size=(2, 2))(out)
    out = Flatten()(out)
    # out = Dropout(0.1)(out)
    out = Reshape((2560,1))(out)
    out = Dense(256, activation='relu')(out)
    out = Dropout(0.1)(out)
    out = Bidirectional(GRU(84))(out)
    out = [
        Dense(35, name='digit1', activation='softmax')(out),\
        Dense(35, name='digit2', activation='softmax')(out),\
        Dense(35, name='digit3', activation='softmax')(out),\
        Dense(35, name='digit4', activation='softmax')(out),\
        Dense(35, name='digit5', activation='softmax')(out),\
        Dense(35, name='digit6', activation='softmax')(out)
          ]
    model = Model(inputs=in_type, outputs=out)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.summary()

# 1.讀取檔案

In [6]:
READING_SIZE=5000

def laod_traindata():
    print("Reading training data...")
    traincsv = open('./data/56_imitate_train_set/captcha_train.csv', 'r', encoding = 'utf8')
    pic_lsit=[]
    #控制讀取筆數_pic
    for rowp in csv.reader(traincsv):
        pic_lsit.append(str(rowp[0]) )
    pic_lsit=pic_lsit[:READING_SIZE]
    train_data = np.stack([np.array(Image.open("./data/56_imitate_train_set/" + row + ".jpg"))/255.0 for row in pic_lsit])
    traincsv = open('./data/56_imitate_train_set/captcha_train.csv', 'r', encoding = 'utf8')
    read_label = [toonehot(row[1]) for row in csv.reader(traincsv)]
    train_label1 = [[] for _ in range(6)]
    for arr in read_label:
        for index in range(6):
            train_label1[index].append(arr[index])
    train_label1 = [arr for arr in np.asarray(train_label1)]
    #控制讀取筆數_label
    train_label = []
    for w in train_label1:
        train_label.append(w[:READING_SIZE])
    return train_data,train_label

def laod_validata():
    print("Reading validation data...")
    valicsv = open('./data/56_imitate_vali_set/captcha_vali.csv', 'r', encoding = 'utf8')
    vali_data = np.stack([np.array(Image.open("./data/56_imitate_vali_set/" + row[0] + ".jpg"))/255.0 for row in csv.reader(valicsv)  ]) #
    valicsv = open('./data/56_imitate_vali_set/captcha_vali.csv', 'r', encoding = 'utf8')
    read_label = [toonehot(row[1]) for row in csv.reader(valicsv)]
    vali_label = [[] for _ in range(6)]
    for arr in read_label:
        for index in range(6):
            vali_label[index].append(arr[index])
    vali_label = [arr for arr in np.asarray(vali_label)]
    return vali_data,vali_label


# 2.1 Traing model (逐次訓練都記錄model)

In [7]:
try:
    print("loading model")
    model=load_model('./data/model/imitate_56_model.h5')
except :
    build_new_model()
    print("building new model")
    
train_data,train_label = laod_traindata()
vali_data,vali_label = laod_validata()

filepath="./data/model/imitate_56_model.h5"
checkpoint = ModelCheckpoint(filepath, monitor='val_digit3_acc', verbose=1, save_best_only=True, mode='max')
earlystop = EarlyStopping(monitor='val_digit3_acc', patience=3, verbose=1, mode='auto')
tensorBoard = TensorBoard(log_dir = "./logs", histogram_freq = 1)
callbacks_list = [checkpoint, earlystop, tensorBoard]
model.fit(train_data, train_label, batch_size=150, epochs=3, verbose=1, validation_data=(vali_data, vali_label), callbacks=callbacks_list)

loading model
Reading training data...
Reading validation data...
Train on 5000 samples, validate on 5000 samples
Epoch 1/3

Epoch 00001: val_digit3_acc improved from -inf to 0.83820, saving model to ./data/model/imitate_56_model_1.h5
Epoch 2/3

Epoch 00002: val_digit3_acc improved from 0.83820 to 0.85720, saving model to ./data/model/imitate_56_model_1.h5
Epoch 3/3

Epoch 00003: val_digit3_acc improved from 0.85720 to 0.87060, saving model to ./data/model/imitate_56_model_1.h5


<keras.callbacks.History at 0x7f12e40a5390>

# 2.2 Traing model (每訓練10輪記錄一次model,每訓練完一輪更新traing data)

In [None]:
try:
    print("loading model")
    model=load_model('./data/model/imitate_56_model.h5')
except :
    build_new_model()
    print("building new model")
  

for now_times in range(100):
    print("#############################")
    print("######    Round:",now_times,"    ######")    
    print("#############################")    
    print("loading train data")
    train_data,train_label = laod_traindata()
    
    if now_times%10!=0 :
        model.fit(train_data, train_label, batch_size=280, epochs=15, verbose=2)# 不紀錄model
        generate(5000, "./data/56_imitate_train_set/",  ENGP=100, FIVEP=50, ENGNOLIMIT=True, filename="train")
        print("Creating data")
    else:
        vali_data,vali_label = laod_validata()
        filepath="./data/model/imitate_56_model.h5"
        checkpoint = ModelCheckpoint(filepath, monitor='val_digit3_acc', verbose=1, save_best_only=True, mode='max')
        earlystop = EarlyStopping(monitor='val_digit3_acc', patience=3, verbose=1, mode='auto')
        tensorBoard = TensorBoard(log_dir = "./logs", histogram_freq = 1)
        callbacks_list = [checkpoint, earlystop, tensorBoard]
        model.fit(train_data, train_label, batch_size=150, epochs=3, verbose=1, validation_data=(vali_data, vali_label), callbacks=callbacks_list)