In [1]:
import numpy as np
import pandas as pd
import string
import os

import tensorflow as tf
from tensorflow.keras import utils, datasets, layers, models, optimizers
from tensorflow.keras.callbacks import ModelCheckpoint, Callback

In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Restrict TensorFlow to only use the fourth GPU
        tf.config.experimental.set_visible_devices(gpus[0], 'GPU')

        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

2 Physical GPUs, 1 Logical GPUs


In [3]:
def create_simpleCnnRnn(image_shape, max_caption_len,vocab_size):
    image_model = models.Sequential()
    # image_shape : C,W,H
    # input: 100x100 images with 3 channels -> (3, 100, 100) tensors.
    # this applies 32 convolution filters of size 3x3 each.
    #32, (3, 3), activation='relu', input_shape=(28, 28, 1)
    image_model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=image_shape))
    #print('fff')
    image_model.add(layers.BatchNormalization())
    image_model.add(layers.Activation('relu'))
    image_model.add(layers.Conv2D(32, (3, 3)))
    image_model.add(layers.BatchNormalization())
    image_model.add(layers.Activation('relu'))
    image_model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    image_model.add(layers.Dropout(0.25))
    image_model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    image_model.add(layers.BatchNormalization())
    image_model.add(layers.Activation('relu'))
    image_model.add(layers.Conv2D(64, (3, 3)))
    image_model.add(layers.BatchNormalization())
    image_model.add(layers.Activation('relu'))
    image_model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    image_model.add(layers.Dropout(0.25))
    image_model.add(layers.Flatten())
    # Note: Keras does automatic shape inference.
    image_model.add(layers.Dense(128))
    image_model.add(layers.RepeatVector(max_caption_len)) # 复制8份
    image_model.add(layers.Bidirectional(layers.GRU(units=128, return_sequences=True)))
    image_model.add(layers.TimeDistributed(layers.Dense(vocab_size)))
    image_model.add(layers.Activation('softmax'))
    sgd = optimizers.SGD(lr=0.002, decay=1e-6, momentum=0.9, nesterov=True)
    image_model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
    return image_model

In [4]:
with open('./words_captcha/spec_train_val.txt', 'r') as file:
    datasets = file.read().split('\n')

In [5]:
X_fName = []
X_text = []
maxCharNum = 0

for data in datasets:
    if data == '':
        continue
    pair = data.split(' ')
    X_fName.append(pair[0])
    X_text.append(pair[1])
    
    if (len(pair[1]) > maxCharNum):
        maxCharNum = len(pair[1])

print('maxCharNum: ' + str(maxCharNum))
len(X_fName), len(X_text)

maxCharNum: 5


(120000, 120000)

In [6]:
X_df = []
X_df.append(X_fName)
X_df.append(X_text)

print(len(X_df))
X_df = list(map(list, zip(*X_df)))
print(len(X_df))

2
120000


In [7]:
df_ALL = pd.DataFrame(data = X_df, columns = ['filename', 'text'])
display(df_ALL.shape)
df_ALL.head(3)

(120000, 2)

Unnamed: 0,filename,text
0,a0,thus
1,a1,www
2,a2,tied


In [8]:
dict_Char = dict()
charList = list(string.ascii_lowercase)
idx = -1

for char in charList:
    idx += 1
    alpha = np.array([0]*27)
    alpha[idx] = 1
    dict_Char[char] = alpha

In [9]:
X_text = df_ALL['text'].values.copy()
X_label = []
X_label_matr = []

for i in range(len(X_text)):
    word = X_text[i]
    label = []
    
    for j in range(5):
        if j < len(word):
            label.append(dict_Char[word[j]])
        else:
            alphaEnd = [0]*27
            alphaEnd[-1] = 1
            label.append(alphaEnd)
            
    X_label.append(str(label))
    X_label_matr.append(label)
            
df_ALL['label'] = pd.DataFrame(data = X_label)        

In [10]:
df_ALL.head(3)

Unnamed: 0,filename,text,label
0,a0,thus,"[array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
1,a1,www,"[array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
2,a2,tied,"[array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."


In [11]:
X_grayScale = []
X_fName = df_ALL['filename'].values
_dir = './words_captcha/'

for fn in X_fName:
    img = tf.io.read_file(_dir + fn + '.png')
    img = tf.image.decode_jpeg(img, channels=1)
    img = tf.image.resize(img, (80, 150))
    
    X_grayScale.append(img)
    del img

In [12]:
if not os.path.isdir('checkpoints'):
    os.mkdir('checkpoints')
    
model_output = "./checkpoints/"
check_pointer = ModelCheckpoint(filepath=model_output + "weights.{epoch:02d}.hdf5")

In [13]:
X_grayScale[0].shape

TensorShape([80, 150, 1])

In [None]:
X_grayScale = np.array(X_grayScale)

In [None]:
X_label_matr = np.array(X_label_matr)

In [None]:
X_train = X_grayScale[:100000]
X_valid = X_grayScale[100000:120000]
X_test = X_grayScale[120000:]

y_train = X_label_matr[:100000]
y_valid = X_label_matr[100000:120000]
y_truth = X_label_matr[120000:]

In [None]:
type(X_train)

In [None]:
image_model = create_simpleCnnRnn((80, 150, 1), 5, 27)
image_model.fit(X_train, y_train, batch_size=16, nb_epoch=100)

In [None]:
y_truth = df_ALL['text'].values

In [None]:
def getKey(DICT, sval):
    for key, val in DICT.items():    # for name, age in dictionary.iteritems():  (for Python 2.x)
        if val == sval:
            return key

In [None]:
y_pred_valid = image_model.predict(X_valid)
y_pred_valid_label = []

alphaEnd = [0]*27
alphaEnd[-1] = 1
for pred in y_pred_valid:
    char = ''
    for i in range(len(pred)):
        if pred[i] == alphaEnd:
            break
        else:
            char += getKey(dict_Char, pred[i])
            
    y_pred_valid_label.append(char)
        
y_pred_valid_label = np.array(y_pred_valid_label)

In [None]:
from sklearn.metrics import accuracy_score
print('Validation Accuracy: %.2f' % accuracy_score(y_truth[100000:120000], y_pred_valid_label))

In [None]:
y_pred = image_model.predict(X_test)
y_pred_label = []

alphaEnd = [0]*27
alphaEnd[-1] = 1
for pred in y_pred:
    char = ''
    for i in range(len(pred)):
        if pred[i] == alphaEnd:
            break
        else:
            char += getKey(dict_Char, pred[i])
            
    y_pred_label.append(char)
        
y_pred_label = np.array(y_pred_label)

In [None]:
print('Validation Accuracy: %.2f' % accuracy_score(y_truth[120000:], y_pred_label))

In [None]:
X_fileNames = df_ALL['filename'].values
X_test_fileNames = X_fileNames[120000:]

len(X_test_fileNames), len(y_pred_label)

In [None]:
if not os.path.isdir('./output'):
    os.mkdir('./output')

f = open('./output/res.txt', 'w').close()
    
with open('./output/res.txt', 'a') as file:
    for i in range(len(X_test_fileNames)):
        file.write(str(X_test_fileNames[i]) + ' \t ' + str(y_pred_label[i]) + '\n')