In [2]:
class Config:
    def __init__(self):
        self.image_shape = (64, 64, 3)
        # self.class_map = ['Image', 'Icon', 'Button', 'Input']     # ele-4
        self.class_map = ['Button', 'CheckBox', 'Chronometer', 'EditText', 'ImageButton', 'ImageView',
                          'ProgressBar', 'RadioButton', 'RatingBar', 'SeekBar', 'Spinner', 'Switch',
                          'ToggleButton', 'VideoView', 'TextView']            # ele-14
        self.DATA_PATH = "E:\Temp\clipping"
        self.MODEL_PATH = 'E:/Mulong/Model/rico_compos/cnn-rico-1.h5'
        self.class_id = {'Button':0, 'CheckBox':1, 'Chronometer':2, 'EditText':3, 'ImageButton':4, 'ImageView':5,
                          'ProgressBar':6, 'RadioButton':7, 'RatingBar':8, 'SeekBar':9, 'Spinner':10, 'Switch':11,
                          'ToggleButton':12, 'VideoView':13, 'TextView':14}

        # self.DATA_PATH = "E:/Mulong/Datasets/rico/elements-14"
        # self.MODEL_PATH = 'E:/Mulong/Model/rico_compos/cnn2-ele14.h5'

        # self.DATA_PATH = "E:\Mulong\Datasets\dataset_webpage\Components3"
        # self.MODEL_PATH = 'E:/Mulong/Model/rico_compos/cnn1-text.h5'

        # TEXT AND NON-TEXT
        # self.DATA_PATH = "E:\Mulong\Datasets\dataset_webpage\Components3"
        # self.MODEL_PATH = 'E:/Mulong/Model/rico_compos/cnn2-textview.h5'
        # self.class_map = ['Text', 'Non-Text']

        # NOISE RECOGNITION
        # self.DATA_PATH = "E:\Mulong\Datasets\dataset_webpage\Components3"
        # self.MODEL_PATH = 'E:/Mulong/Model/rico_compos/cnn1-noise.h5'
        # self.class_map = ['Noise', 'Non-Noise']

        # IMAGE RECOGNITION
        # self.DATA_PATH = "E:\Mulong\Datasets\dataset_webpage\Components3"
        # self.MODEL_PATH = 'E:/Mulong/Model/rico_compos/cnn-image-1.h5'
        # self.class_map = ['Image', 'Non-Image']

        self.class_number = len(self.class_map)

cfg = Config()

In [18]:
import cv2
import numpy as np
from os.path import join as pjoin
import glob
from tqdm import tqdm


class Data:

    def __init__(self):
        self.data_num = 0
        self.images = []
        self.labels = []
        self.X_train, self.Y_train = None, None
        self.X_test, self.Y_test = None, None

        self.image_shape = cfg.image_shape
        self.class_number = cfg.class_number
        self.class_map = cfg.class_map
        self.DATA_PATH = cfg.DATA_PATH

    def load_data(self, resize=True, shape=None, max_number=1000000):
        # if customize shape
        if shape is not None:
            self.image_shape = shape
        else:
            shape = self.image_shape
        # load data
        for p in glob.glob(pjoin(self.DATA_PATH, '*')):
            print("*** Loading components of %s: %d ***" %(p.split('\\')[-1], int(len(glob.glob(pjoin(p, '*.png'))))))
            label = cfg.class_id[p.split('\\')[-1]]

            for i, image_path in enumerate(tqdm(glob.glob(pjoin(p, '*.png'))[:max_number])):
                image = cv2.imread(image_path)
                if resize:
                    image = cv2.resize(image, shape[:2])
                self.images.append(image)
                self.labels.append(label)
        assert len(self.images) == len(self.labels)
        self.data_num = len(self.images)
        print('%d Data Loaded' % self.data_num)

    def generate_training_data(self, train_data_ratio=0.8):
        # transfer int into c dimensions one-hot array
        def expand(label, class_number):
            # return y : (num_class, num_samples)
            y = np.eye(class_number)[label]
            y = np.squeeze(y)
            return y
        # reshuffle
        np.random.seed(0)
        self.images = np.random.permutation(self.images)
        np.random.seed(0)
        self.labels = np.random.permutation(self.labels)
        Y = expand(self.labels, self.class_number)
        # separate dataset
        cut = int(train_data_ratio * self.data_num)
        self.X_train = (self.images[:cut] / 255).astype('float32')
        self.X_test = (self.images[cut:] / 255).astype('float32')
        self.Y_train = Y[:cut]
        self.Y_test = Y[cut:]
        print('X_train:%d, Y_train:%d' % (len(self.X_train), len(self.Y_train)))
        print('X_test:%d, Y_test:%d' % (len(self.X_test), len(self.Y_test)))


In [19]:
data = Data()

In [25]:
data.load_data(max_number=30000)

  3%|██▌                                                                           | 101/3082 [00:00<00:04, 603.09it/s]

*** Loading components of Button: 3082 ***


100%|██████████████████████████████████████████████████████████████████████████████| 3082/3082 [01:21<00:00, 37.88it/s]
 67%|████████████████████████████████████████████████████▍                         | 330/491 [00:00<00:00, 3026.11it/s]

*** Loading components of CheckBox: 491 ***


100%|██████████████████████████████████████████████████████████████████████████████| 491/491 [00:00<00:00, 2329.60it/s]
100%|███████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 362.08it/s]
 13%|█████████▉                                                                    | 110/862 [00:00<00:00, 1038.59it/s]

*** Loading components of Chronometer: 1 ***
*** Loading components of EditText: 862 ***


100%|██████████████████████████████████████████████████████████████████████████████| 862/862 [00:00<00:00, 1067.77it/s]
  4%|██▉                                                                          | 205/5344 [00:00<00:02, 2017.12it/s]

*** Loading components of ImageButton: 5344 ***


100%|████████████████████████████████████████████████████████████████████████████| 5344/5344 [00:02<00:00, 2193.57it/s]
  1%|▌                                                                             | 91/11820 [00:00<00:13, 884.45it/s]

*** Loading components of ImageView: 11820 ***


100%|████████████████████████████████████████████████████████████████████████████| 11820/11820 [02:29<00:00, 79.24it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 41/41 [00:00<00:00, 1374.53it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 98/98 [00:00<00:00, 2671.84it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 14/14 [00:00<00:00, 3375.12it/s]
  0%|                                                                                           | 0/86 [00:00<?, ?it/s]

*** Loading components of ProgressBar: 41 ***
*** Loading components of RadioButton: 98 ***
*** Loading components of RatingBar: 14 ***
*** Loading components of SeekBar: 86 ***


100%|████████████████████████████████████████████████████████████████████████████████| 86/86 [00:00<00:00, 1168.64it/s]
100%|███████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 939.06it/s]
 51%|███████████████████████████████████████▊                                      | 106/208 [00:00<00:00, 1025.95it/s]

*** Loading components of Spinner: 6 ***
*** Loading components of Switch: 208 ***


100%|███████████████████████████████████████████████████████████████████████████████| 208/208 [00:01<00:00, 125.53it/s]
  0%|▎                                                                            | 102/22043 [00:00<00:23, 924.32it/s]

*** Loading components of TextView: 22043 ***


100%|████████████████████████████████████████████████████████████████████████████| 22043/22043 [12:16<00:00, 29.93it/s]
  5%|████                                                                              | 7/142 [00:00<00:01, 69.47it/s]

*** Loading components of ToggleButton: 142 ***


100%|████████████████████████████████████████████████████████████████████████████████| 142/142 [00:03<00:00, 37.82it/s]
100%|█████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 285.31it/s]

*** Loading components of VideoView: 27 ***
44417 Data Loaded





In [26]:
data.generate_training_data(train_data_ratio=1.0)

X_train:44417, Y_train:44417
X_test:0, Y_test:0


In [39]:
from keras.models import Sequential, load_model
from keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout
from sklearn.metrics import confusion_matrix
import cv2
import numpy as np

class CNN:

    def __init__(self):
        self.data = None
        self.model = None

        self.image_shape = cfg.image_shape
        self.class_number = cfg.class_number
        self.class_map = cfg.class_map
        self.MODEL_PATH = cfg.MODEL_PATH

    def network(self, epoch_num):
        # block 1
        self.model.add(Conv2D(64, (3, 3), activation='relu', input_shape=self.image_shape, padding='same'))
        self.model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
        self.model.add(MaxPool2D(pool_size=(2, 2)))
        self.model.add(Dropout(0.25))
        # block 2
        self.model.add(Flatten())
        self.model.add(Dense(128, activation='relu'))
        self.model.add(Dropout(0.5))
        # block 3
        self.model.add(Dense(self.class_number, activation='softmax'))

        self.model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy'])
        self.model.fit(self.data.X_train, self.data.Y_train, batch_size=64, epochs=epoch_num, verbose=1, validation_data=(self.data.X_test, self.data.Y_test))

    def train(self, data, epoch_num=30):
        print("Trained model is saved to", self.MODEL_PATH)
        self.data = data
        self.model = Sequential()
        self.network(epoch_num)
        self.model.save(self.MODEL_PATH)

    def evaluate(self, data, load=True):
        # calculate TP, FN, FP, TN
        def calculate_n_p(matrix):
            TP, FN, FP, TN = 0, 0, 0, 0
            for i in range(len(matrix)):
                TP += matrix[i][i] / np.sum(matrix[i])
                FN += (np.sum(matrix[:, i]) - matrix[i][i]) / np.sum(matrix[:, i])
                FP += (np.sum(matrix[i]) - matrix[i][i]) / np.sum(matrix[i])
                TN += (np.trace(matrix) - matrix[i][i]) / np.trace(matrix)
            TP = TP / len(matrix)
            FN = FN / len(matrix)
            FP = FP / len(matrix)
            TN = TN / len(matrix)
            return TP, FN, FP, TN

        if load:
            self.load()
        X_test = data.X_test
        Y_test = [np.argmax(y) for y in data.Y_test]
        Y_pre = []
        for X in X_test:
            X = np.array([X])
            Y_pre.append(np.argmax(self.model.predict(X)))

        matrix = confusion_matrix(Y_test, Y_pre)
        TP, FN, FP, TN = calculate_n_p(matrix)
        recall = TP / (TP + FN)
        precision = TP / (TP + FP)
        accuracy = (TP + TN) / (TP + FN + FP + TN)
        balanced_accuracy = TP
        print(matrix)
        print('\nTP:%.3f \t FN:%.3f \nFP:%.3f \t TN:%.3f\n' % (TP, FN, FP, TN))
        print('recall:%.3f \t precision:%.3f \t accuracy:%.3f \t balanced accuracy:%.3f' % (recall, precision, accuracy, balanced_accuracy))
        return matrix
        
    def predict(self, img_path, load=True, show=False):
        """
        :type img_path: list of img path
        """
        if load:
            self.load()
        for path in img_path:
            img = cv2.imread(path)
            X = cv2.resize(img, self.image_shape[:2])
            X = (X / 255).astype('float32')
            X = np.array([X])  # from (64, 64, 3) to (1, 64, 64, 3)
            Y = self.class_map[np.argmax(self.model.predict(X))]
            print(Y)
            if show:
                cv2.imshow('img', img)
                cv2.waitKey(0)
                cv2.destroyAllWindows()

    def load(self):
        self.model = load_model(self.MODEL_PATH)
        print('Model Loaded From', self.MODEL_PATH)


In [40]:
cnn = CNN()

In [31]:
cnn.train(data, 10)

Trained model is saved to E:/Mulong/Model/rico_compos/cnn-rico-1.h5
Train on 44417 samples, validate on 0 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [36]:
data.X_test = data.X_train
data.Y_test = data.Y_train

In [41]:
matrix = cnn.evaluate(data)

Model Loaded From E:/Mulong/Model/rico_compos/cnn-rico-1.h5
[[ 2197     0     0     1    27   167     0     0     0     0     0     0
      0     0   701]
 [    1   448     0     0     5    43     0     0     0     0     0     0
      0     0     5]
 [    0     0     0     0     0     0     0     0     0     0     0     0
      0     0     3]
 [   10     0     0   390     6    43     0     0     0     0     0     0
      0     0   424]
 [   24     1     0     0  4869   367     0     0     0     0     0     0
      0     0    94]
 [   22     7     0     3   252 10938     0     0     0     0     0     1
      0     0   608]
 [    3     0     0     0     3    18    17     0     0     0     0     0
      0     0    11]
 [    3     0     0     0     5    52     0    37     0     0     0     0
      0     0    12]
 [    0     0     0     0     2    11     0     0     0     0     0     0
      0     0    12]
 [    0     0     0     0     2    15     0     0     0    70     0     1
      0    



In [42]:
matrix

array([[ 2197,     0,     0,     1,    27,   167,     0,     0,     0,
            0,     0,     0,     0,     0,   701],
       [    1,   448,     0,     0,     5,    43,     0,     0,     0,
            0,     0,     0,     0,     0,     5],
       [    0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     3],
       [   10,     0,     0,   390,     6,    43,     0,     0,     0,
            0,     0,     0,     0,     0,   424],
       [   24,     1,     0,     0,  4869,   367,     0,     0,     0,
            0,     0,     0,     0,     0,    94],
       [   22,     7,     0,     3,   252, 10938,     0,     0,     0,
            0,     0,     1,     0,     0,   608],
       [    3,     0,     0,     0,     3,    18,    17,     0,     0,
            0,     0,     0,     0,     0,    11],
       [    3,     0,     0,     0,     5,    52,     0,    37,     0,
            0,     0,     0,     0,     0,    12],
       [    0,     0,   

In [48]:
def calculate_n_p(matrix):
    TP, FN, FP, TN = 0, 0, 0, 0
    for i in range(len(matrix)):
        TP += matrix[i][i]
        FN += (np.sum(matrix[:, i]) - matrix[i][i])
        FP += (np.sum(matrix[i]) - matrix[i][i])
    return TP, FN, FP

In [49]:
TP, FN, FP = calculate_n_p(matrix)

In [50]:
print(TP, FN, FP)

40627 3790 3790


In [51]:
TP / (TP + FN)

0.9146723101515185

In [52]:
TP / (TP + FP)

0.9146723101515185

In [54]:
open('matrix.txt', 'w').write(str(matrix))

1425