In [4]:
class Config:
    def __init__(self):
        self.image_shape = (64, 64, 3)
        # self.class_map = ['Image', 'Icon', 'Button', 'Input']     # ele-4
        self.class_map = ['Button', 'CheckBox', 'Chronometer', 'EditText', 'ImageButton', 'ImageView',
                          'ProgressBar', 'RadioButton', 'RatingBar', 'SeekBar', 'Spinner', 'Switch',
                          'ToggleButton', 'VideoView', 'TextView']            # ele-14
#         self.DATA_PATH = "E:\Temp\clipping"
        self.DATA_PATH = "E:/Mulong/Datasets/rico/elements-14"
        self.MODEL_PATH = 'E:/Mulong/Model/rico_compos/cnn-rico-1.h5'
        self.class_id = {'Button':0, 'CheckBox':1, 'Chronometer':2, 'EditText':3, 'ImageButton':4, 'ImageView':5,
                          'ProgressBar':6, 'RadioButton':7, 'RatingBar':8, 'SeekBar':9, 'Spinner':10, 'Switch':11,
                          'ToggleButton':12, 'VideoView':13, 'TextView':14}

        # self.DATA_PATH = "E:/Mulong/Datasets/rico/elements-14"
        # self.MODEL_PATH = 'E:/Mulong/Model/rico_compos/cnn2-ele14.h5'

        # self.DATA_PATH = "E:\Mulong\Datasets\dataset_webpage\Components3"
        # self.MODEL_PATH = 'E:/Mulong/Model/rico_compos/cnn1-text.h5'

        # TEXT AND NON-TEXT
        # self.DATA_PATH = "E:\Mulong\Datasets\dataset_webpage\Components3"
        # self.MODEL_PATH = 'E:/Mulong/Model/rico_compos/cnn2-textview.h5'
        # self.class_map = ['Text', 'Non-Text']

        # NOISE RECOGNITION
        # self.DATA_PATH = "E:\Mulong\Datasets\dataset_webpage\Components3"
        # self.MODEL_PATH = 'E:/Mulong/Model/rico_compos/cnn1-noise.h5'
        # self.class_map = ['Noise', 'Non-Noise']

        # IMAGE RECOGNITION
        # self.DATA_PATH = "E:\Mulong\Datasets\dataset_webpage\Components3"
        # self.MODEL_PATH = 'E:/Mulong/Model/rico_compos/cnn-image-1.h5'
        # self.class_map = ['Image', 'Non-Image']

        self.class_number = len(self.class_map)

cfg = Config()

In [5]:
import cv2
import numpy as np
from os.path import join as pjoin
import glob
from tqdm import tqdm


class Data:

    def __init__(self):
        self.data_num = 0
        self.images = []
        self.labels = []
        self.X_train, self.Y_train = None, None
        self.X_test, self.Y_test = None, None

        self.image_shape = cfg.image_shape
        self.class_number = cfg.class_number
        self.class_map = cfg.class_map
        self.DATA_PATH = cfg.DATA_PATH

    def load_data(self, resize=True, shape=None, max_number=1000000):
        # if customize shape
        if shape is not None:
            self.image_shape = shape
        else:
            shape = self.image_shape
        # load data
        for p in glob.glob(pjoin(self.DATA_PATH, '*')):
            print("*** Loading components of %s: %d ***" %(p.split('\\')[-1], int(len(glob.glob(pjoin(p, '*.png'))))))
            label = cfg.class_id[p.split('\\')[-1]]

            for i, image_path in enumerate(tqdm(glob.glob(pjoin(p, '*.png'))[:max_number])):
                image = cv2.imread(image_path)
                if resize:
                    image = cv2.resize(image, shape[:2])
                self.images.append(image)
                self.labels.append(label)
        assert len(self.images) == len(self.labels)
        self.data_num = len(self.images)
        print('%d Data Loaded' % self.data_num)

    def generate_training_data(self, train_data_ratio=0.8):
        # transfer int into c dimensions one-hot array
        def expand(label, class_number):
            # return y : (num_class, num_samples)
            y = np.eye(class_number)[label]
            y = np.squeeze(y)
            return y
        # reshuffle
        np.random.seed(0)
        self.images = np.random.permutation(self.images)
        np.random.seed(0)
        self.labels = np.random.permutation(self.labels)
        Y = expand(self.labels, self.class_number)
        # separate dataset
        cut = int(train_data_ratio * self.data_num)
        self.X_train = (self.images[:cut] / 255).astype('float32')
        self.X_test = (self.images[cut:] / 255).astype('float32')
        self.Y_train = Y[:cut]
        self.Y_test = Y[cut:]
        print('X_train:%d, Y_train:%d' % (len(self.X_train), len(self.Y_train)))
        print('X_test:%d, Y_test:%d' % (len(self.X_test), len(self.Y_test)))


In [6]:
data = Data()

In [7]:
data.load_data(max_number=10000)

  0%|                                                                                        | 0/10000 [00:00<?, ?it/s]

*** Loading components of Button: 33477 ***


100%|███████████████████████████████████████████████████████████████████████████| 10000/10000 [01:15<00:00, 131.74it/s]
  0%|▏                                                                              | 13/7118 [00:00<00:55, 128.79it/s]

*** Loading components of CheckBox: 7118 ***


100%|█████████████████████████████████████████████████████████████████████████████| 7118/7118 [00:53<00:00, 133.13it/s]
 15%|████████████                                                                       | 8/55 [00:00<00:00, 76.41it/s]

*** Loading components of Chronometer: 55 ***


100%|█████████████████████████████████████████████████████████████████████████████████| 55/55 [00:00<00:00, 118.09it/s]
  0%|                                                                                | 9/10000 [00:00<01:54, 87.62it/s]

*** Loading components of EditText: 11365 ***


100%|███████████████████████████████████████████████████████████████████████████| 10000/10000 [01:34<00:00, 105.31it/s]


*** Loading components of ImageButton: 71742 ***


100%|███████████████████████████████████████████████████████████████████████████| 10000/10000 [01:15<00:00, 133.07it/s]


*** Loading components of ImageView: 222864 ***


100%|███████████████████████████████████████████████████████████████████████████| 10000/10000 [01:08<00:00, 145.61it/s]
  0%|▏                                                                                | 4/1462 [00:00<00:46, 31.58it/s]

*** Loading components of ProgressBar: 1462 ***


100%|██████████████████████████████████████████████████████████████████████████████| 1462/1462 [00:24<00:00, 60.57it/s]
  0%|                                                                                 | 4/4854 [00:00<02:43, 29.64it/s]

*** Loading components of RadioButton: 4854 ***


100%|█████████████████████████████████████████████████████████████████████████████| 4854/4854 [00:43<00:00, 111.92it/s]
  1%|▍                                                                                 | 5/848 [00:00<00:17, 48.67it/s]

*** Loading components of RatingBar: 848 ***


100%|████████████████████████████████████████████████████████████████████████████████| 848/848 [00:11<00:00, 72.55it/s]
  0%|▎                                                                                | 5/1553 [00:00<00:37, 41.42it/s]

*** Loading components of SeekBar: 1553 ***


100%|██████████████████████████████████████████████████████████████████████████████| 1553/1553 [00:25<00:00, 61.13it/s]
  4%|███▏                                                                              | 4/104 [00:00<00:02, 39.27it/s]

*** Loading components of Spinner: 104 ***


100%|████████████████████████████████████████████████████████████████████████████████| 104/104 [00:02<00:00, 46.63it/s]
  0%|                                                                                         | 0/2736 [00:00<?, ?it/s]

*** Loading components of Switch: 2736 ***


100%|██████████████████████████████████████████████████████████████████████████████| 2736/2736 [00:36<00:00, 75.95it/s]


*** Loading components of TextView: 392887 ***


100%|███████████████████████████████████████████████████████████████████████████| 10000/10000 [00:43<00:00, 227.91it/s]
  0%|▏                                                                                | 4/2444 [00:00<01:16, 32.01it/s]

*** Loading components of ToggleButton: 2444 ***


100%|██████████████████████████████████████████████████████████████████████████████| 2444/2444 [00:27<00:00, 90.34it/s]
  1%|█▏                                                                                | 4/278 [00:00<00:07, 34.57it/s]

*** Loading components of VideoView: 278 ***


100%|████████████████████████████████████████████████████████████████████████████████| 278/278 [00:09<00:00, 30.71it/s]

71452 Data Loaded





In [8]:
data.generate_training_data(1)

X_train:71452, Y_train:71452
X_test:0, Y_test:0


In [15]:
data.X_test = data.X_train
data.Y_test = data.Y_train

In [11]:
class Config:
    def __init__(self):
        self.image_shape = (64, 64, 3)
        # self.class_map = ['Image', 'Icon', 'Button', 'Input']     # ele-4
        self.class_map = ['Button', 'CheckBox', 'Chronometer', 'EditText', 'ImageButton', 'ImageView',
                          'ProgressBar', 'RadioButton', 'RatingBar', 'SeekBar', 'Spinner', 'Switch',
                          'ToggleButton', 'VideoView', 'TextView']            # ele-14
#         self.DATA_PATH = "E:\Temp\clipping"
        self.DATA_PATH = "E:/Mulong/Datasets/rico/elements-14"
        self.MODEL_PATH = 'E:/Mulong/Model/rico_compos/cnn-rico-1.h5'
        self.class_id = {'Button':0, 'CheckBox':1, 'Chronometer':2, 'EditText':3, 'ImageButton':4, 'ImageView':5,
                          'ProgressBar':6, 'RadioButton':7, 'RatingBar':8, 'SeekBar':9, 'Spinner':10, 'Switch':11,
                          'ToggleButton':12, 'VideoView':13, 'TextView':14}

        # self.DATA_PATH = "E:/Mulong/Datasets/rico/elements-14"
        # self.MODEL_PATH = 'E:/Mulong/Model/rico_compos/cnn2-ele14.h5'

        # self.DATA_PATH = "E:\Mulong\Datasets\dataset_webpage\Components3"
        # self.MODEL_PATH = 'E:/Mulong/Model/rico_compos/cnn1-text.h5'

        # TEXT AND NON-TEXT
        # self.DATA_PATH = "E:\Mulong\Datasets\dataset_webpage\Components3"
        # self.MODEL_PATH = 'E:/Mulong/Model/rico_compos/cnn2-textview.h5'
        # self.class_map = ['Text', 'Non-Text']

        # NOISE RECOGNITION
        # self.DATA_PATH = "E:\Mulong\Datasets\dataset_webpage\Components3"
        # self.MODEL_PATH = 'E:/Mulong/Model/rico_compos/cnn1-noise.h5'
        # self.class_map = ['Noise', 'Non-Noise']

        # IMAGE RECOGNITION
        # self.DATA_PATH = "E:\Mulong\Datasets\dataset_webpage\Components3"
        # self.MODEL_PATH = 'E:/Mulong/Model/rico_compos/cnn-image-1.h5'
        # self.class_map = ['Image', 'Non-Image']

        self.class_number = len(self.class_map)

cfg = Config()

In [17]:
from keras.models import Sequential, load_model
from keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout
from sklearn.metrics import confusion_matrix
import cv2
import numpy as np

class CNN:

    def __init__(self):
        self.data = None
        self.model = None

        self.image_shape = cfg.image_shape
        self.class_number = cfg.class_number
        self.class_map = cfg.class_map
        self.MODEL_PATH = cfg.MODEL_PATH

    def network(self, epoch_num):
        # block 1
        self.model.add(Conv2D(64, (3, 3), activation='relu', input_shape=self.image_shape, padding='same'))
        self.model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
        self.model.add(MaxPool2D(pool_size=(2, 2)))
        self.model.add(Dropout(0.25))
        # block 2
        self.model.add(Flatten())
        self.model.add(Dense(128, activation='relu'))
        self.model.add(Dropout(0.5))
        # block 3
        self.model.add(Dense(self.class_number, activation='softmax'))

        self.model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy'])
        self.model.fit(self.data.X_train, self.data.Y_train, batch_size=64, epochs=epoch_num, verbose=1, validation_data=(self.data.X_test, self.data.Y_test))

    def train(self, data, epoch_num=30):
        print("Trained model is saved to", self.MODEL_PATH)
        self.data = data
        self.model = Sequential()
        self.network(epoch_num)
        self.model.save(self.MODEL_PATH)

    def evaluate(self, data, load=True):
        # calculate TP, FN, FP, TN
        def calculate_n_p(matrix):
            TP, FN, FP, TN = 0, 0, 0, 0
            for i in range(len(matrix)):
                TP += matrix[i][i] 
                FN += (np.sum(matrix[:, i]) - matrix[i][i])
                FP += (np.sum(matrix[i]) - matrix[i][i])
                TN += (np.trace(matrix) - matrix[i][i]) 
            return TP, FN, FP, TN

        if load:
            self.load()
        X_test = data.X_test
        Y_test = [np.argmax(y) for y in data.Y_test]
        Y_pre = []
        for X in X_test:
            X = np.array([X])
            Y_pre.append(np.argmax(self.model.predict(X)))

        matrix = confusion_matrix(Y_test, Y_pre)
        TP, FN, FP, TN = calculate_n_p(matrix)
        recall = TP / (TP + FN)
        precision = TP / (TP + FP)
        accuracy = (TP + TN) / (TP + FN + FP + TN)
        balanced_accuracy = TP
        print(matrix)
        print('\nTP:%.3f \t FN:%.3f \nFP:%.3f \t TN:%.3f\n' % (TP, FN, FP, TN))
        print('recall:%.3f \t precision:%.3f \t accuracy:%.3f \t balanced accuracy:%.3f' % (recall, precision, accuracy, balanced_accuracy))
        return matrix
        
    def predict(self, img_path, load=True, show=False):
        """
        :type img_path: list of img path
        """
        if load:
            self.load()
        for path in img_path:
            img = cv2.imread(path)
            X = cv2.resize(img, self.image_shape[:2])
            X = (X / 255).astype('float32')
            X = np.array([X])  # from (64, 64, 3) to (1, 64, 64, 3)
            Y = self.class_map[np.argmax(self.model.predict(X))]
            print(Y)
            if show:
                cv2.imshow('img', img)
                cv2.waitKey(0)
                cv2.destroyAllWindows()

    def load(self):
        self.model = load_model(self.MODEL_PATH)
        print('Model Loaded From', self.MODEL_PATH)

In [13]:
cnn = CNN()

In [19]:
matrix = cnn.evaluate(data)

Model Loaded From E:/Mulong/Model/rico_compos/cnn-rico-1.h5
[[1767    0    0   60  160 1777    0    0    0    5    0    0    0    0
  6231]
 [  64  288    0   10  537 4626    2    0    0    2    0    8    0    0
  1581]
 [   2    0    0    1    0   11    0    0    0    0    0    0    0    0
    41]
 [  81    0    0 2768   28 1985    0    0    0    7    0    0    0    0
  5131]
 [ 112   11    0   13 4163 4630    1    0    0    7    0    0    0    0
  1063]
 [  61    1    0   16  788 7926    0    0    0    2    0    0    0    0
  1206]
 [  19    2    0    5   66 1118    1    0    0    9    0    0    0    0
   242]
 [  51    1    0    2  231 3672    0    0    0    1    0    0    0    0
   896]
 [   0    0    0    1    6  695    0    0    0    1    0    0    0    0
   145]
 [  24    0    0   11   52  970    0    0    0  225    0    1    0    0
   270]
 [   5    0    0    2    2   71    0    0    0    0    0    0    0    0
    24]
 [  27    0    0   22  132 1694    0    0    0    2    0   3



In [20]:
matrix

array([[1767,    0,    0,   60,  160, 1777,    0,    0,    0,    5,    0,
           0,    0,    0, 6231],
       [  64,  288,    0,   10,  537, 4626,    2,    0,    0,    2,    0,
           8,    0,    0, 1581],
       [   2,    0,    0,    1,    0,   11,    0,    0,    0,    0,    0,
           0,    0,    0,   41],
       [  81,    0,    0, 2768,   28, 1985,    0,    0,    0,    7,    0,
           0,    0,    0, 5131],
       [ 112,   11,    0,   13, 4163, 4630,    1,    0,    0,    7,    0,
           0,    0,    0, 1063],
       [  61,    1,    0,   16,  788, 7926,    0,    0,    0,    2,    0,
           0,    0,    0, 1206],
       [  19,    2,    0,    5,   66, 1118,    1,    0,    0,    9,    0,
           0,    0,    0,  242],
       [  51,    1,    0,    2,  231, 3672,    0,    0,    0,    1,    0,
           0,    0,    0,  896],
       [   0,    0,    0,    1,    6,  695,    0,    0,    0,    1,    0,
           0,    0,    0,  145],
       [  24,    0,    0,   11,   52,

In [21]:
def calculate_n_p(matrix):
    TP, FN, FP, TN = 0, 0, 0, 0
    for i in range(len(matrix)):
        TP += matrix[i][i] 
        FN += (np.sum(matrix[:, i]) - matrix[i][i])
        FP += (np.sum(matrix[i]) - matrix[i][i])
        TN += (np.trace(matrix) - matrix[i][i]) 
    return TP, FN, FP, TN

In [22]:
score = calculate_n_p(matrix)