### Imports and configuration

#### Imports

In [None]:
import os,cv2,keras
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import xml.etree.ElementTree as ET
import random
from collections import defaultdict
from tqdm.notebook import trange, tqdm

from keras.layers import Dense
from keras import Model
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg16 import VGG16
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping

from sklearn.preprocessing import LabelBinarizer, MultiLabelBinarizer
from sklearn.model_selection import train_test_split

#### Paths

In [None]:
# Where are we going to work?
data_path = '../data/gen4/'
work_path = '../data/work/'
pickle_path = os.path.join(work_path, 'out.pickle')
checkpoint_path = os.path.join(work_path, 'checkpoint_{epoch:02d}.h5')

#### Parameters

In [None]:
# Random
SEED = 1337
np.random.seed(SEED)
random.seed(SEED)

# Force data preparation
force_conversion = True

# Only use a few examples
subsamples = False
subsamples_num = 10

# Recognition
N = 2000
IOU_THRESHOLD = 0.8
MAX_SAMPLES = 30  # We need to balance the numbers of examples for each class

# Training
STEPS_PER_EPOCH=10
EPOCHS=100

### Data preparation/retrieving

In [None]:
if not os.path.exists(pickle_path) or force_conversion:  # If we need to convert data
    # Collect files
    names = []
    for root, dirs, files in os.walk(data_path):
        for name in files:
            names.append(os.path.join(root, name.split('.')[0]))
    names = list(set(names))  # We only want the files once - discard .jpg/.xml duality
    
    # Columns
    columns = ['file_name', 'class_name', 'xmin', 'ymin', 'xmax', 'ymax']

    # Data
    data = []
    for name in names:
        xml_path = name + '.xml'
        try:
            tree = ET.parse(xml_path)
        except:  # The annotation file is missing.
            continue
        root = tree.getroot()

        for obj in root.findall('object'):
            temp = [name + '.jpg', obj.find('name').text]
            for child in obj.find('bndbox'):
                temp.append(child.text)
            data.append(temp)
        
        if subsamples:
            if subsamples_num > 0:
                subsamples_num -= 1
            else:
                break

    # Create a new pandas dataframe
    df = pd.DataFrame(data, columns=columns)
    df.head()
    
    # Save pickle
    df.to_pickle(pickle_path)
else:  # The data is already available
    df = pd.read_pickle(pickle_path)

In [None]:
# See the df
df.head()

In [None]:
# See files
files = df.groupby('file_name').size().reset_index(name='counts')
files

In [None]:
# See classes
classes = df.groupby('class_name').size().reset_index(name='counts').sort_values(by='counts', ascending=False)
classes

In [None]:
# See numbers
# Why use len(foo.index)? https://stackoverflow.com/a/15943975
num_classes = len(classes.index)
num_files = len(files.index)
num_pod = len(df.index)
print("We have {} classes in {} files for {} points of data.".format(num_classes, num_files, num_pod))

#### Example

In [None]:
sample_row = files.sample(1, random_state=SEED)
sample_file = sample_row.iloc[0, 0]
sample_data = df.loc[df['file_name'] == sample_file]
sample_data

In [None]:
img = cv2.imread(sample_file)
plt.imshow(img)
for index, row in sample_data.iterrows():
    class_name, xmin, ymin, xmax, ymax = row['class_name'], int(row['xmin']), int(row['ymin']), int(row['xmax']), int(row['ymax'])
    cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255,0,0), 2)
    cv2.putText(img, class_name, (xmin, ymin), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA) 
plt.figure()
plt.imshow(img)

### Recognition and data expension

In [None]:
cv2.setUseOptimized(True)  # Enables the optimized code.

#### Example

In [None]:
# We are using selective search to find potential areas of interest.
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()  # Uses optimized selective search (https://www.learnopencv.com/selective-search-for-object-detection-cpp-python/)
img = cv2.imread(sample_file)
ss.setBaseImage(img)
print("Processing Selective Search...")
ss.switchToSelectiveSearchFast()
rects = ss.process()
img_out = img.copy()
for i, rect in (enumerate(rects)):
    x, y, w, h = rect
    cv2.rectangle(img_out, (x, y), (x + w, y + h), (0, 255, 0), 1, cv2.LINE_AA)
plt.figure()
plt.imshow(img_out)

#### All

In [None]:
train_images = []
train_labels = []

In [None]:
# Uses optimized selective search (https://www.learnopencv.com/selective-search-for-object-detection-cpp-python/)
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()

In [None]:
def get_iou(bb1, bb2):  # Intersection over Union (IoU) (https://www.pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/)
    # Basically, Area of Intersection / Area of Union...
    # bb1, bb2: {xmin, ymin, xmax, ymax}
    assert bb1['xmin'] < bb1['xmax']
    assert bb1['ymin'] < bb1['ymax']
    assert bb2['xmin'] < bb2['xmax']
    assert bb2['ymin'] < bb2['ymax']

    x_left = max(bb1['xmin'], bb2['xmin'])
    y_top = max(bb1['ymin'], bb2['ymin'])
    x_right = min(bb1['xmax'], bb2['xmax'])
    y_bottom = min(bb1['ymax'], bb2['ymax'])

    if x_right < x_left or y_bottom < y_top:
        return 0.0

    intersection_area = (x_right - x_left) * (y_bottom - y_top)

    bb1_area = (bb1['xmax'] - bb1['xmin']) * (bb1['ymax'] - bb1['ymin'])
    bb2_area = (bb2['xmax'] - bb2['xmin']) * (bb2['ymax'] - bb2['ymin'])

    iou = intersection_area / float(bb1_area + bb2_area - intersection_area)
    assert iou >= 0.0
    assert iou <= 1.0
    return iou

In [None]:
# Loop over each image
for index, row in tqdm(files.iterrows(), total=num_files, desc="Iterating through files..."):
    file = row[0]
    data = df.loc[df['file_name'] == file]
    img = cv2.imread(file)
    
    # Set image as the base for selective search
    ss.setBaseImage(img)
    
    # Initialising fast selective search and getting proposed regions
    ss.switchToSelectiveSearchFast()
    rects = ss.process()
    
    img_out = img.copy()
    
    classes_counter = defaultdict(int)  # As stated, we need an uniform sample between classes
    
    # Iterate over the first N results of selective search
    # Calculate IOU of proposed region and annoted region
    used = False  # Check if that bbox is used as a class example
    
    # For each rectangle in the results of selective search
    for i, rect in enumerate(tqdm(rects, desc="Iterating through rectangles...", leave=False)):
        if i < N:  # We don't want to waste ressources on too many possibilities.
            x, y, w, h = rect
            rect_bbox = {'xmin': x, 'xmax': x + w, 'ymin': y, 'ymax': y + h}
            
            # For each bbox within the image
            for index, row in data.iterrows():
                ground_truth_bbox = {'xmin': int(row['xmin']), 'xmax': int(row['xmax']),
                                     'ymin': int(row['ymin']), 'ymax': int(row['ymax'])}
                ground_truth_class_name = row['class_name']
                
                # Compare them
                iou = get_iou(ground_truth_bbox, rect_bbox)
                
                if iou > IOU_THRESHOLD and classes_counter[ground_truth_class_name] < MAX_SAMPLES:
                    img_sample = cv2.resize(img_out[y:y + h, x:x + w], (224,224), interpolation = cv2.INTER_AREA)  # Get the sample
                    train_images.append(img_sample)
                    train_labels.append(ground_truth_class_name)
                    classes_counter[ground_truth_class_name] += 1
                    used = True
                else:
                    continue
                    
            if not used and classes_counter['background'] < MAX_SAMPLES: # We can use that bbox as a background example!
                img_sample = cv2.resize(img_out[y:y + h, x:x + w], (224, 224), interpolation = cv2.INTER_AREA)  # Get the sample
                train_images.append(img_sample)
                train_labels.append('background')  # Background
                classes_counter['background'] += 1
        else:
            break

In [None]:
train_images = np.array(train_images)
train_labels = np.array(train_labels)

### Model
We are going to use transfer learning, aka "We do not have time to train a whole new model for a while so let's cut through an existing model and specialize it". https://medium.com/@1297rohit/transfer-learning-from-scratch-using-keras-339834b153b9.

In [None]:
# Import pretrained original VGG16 model with ImageNet weights
vggmodel = VGG16(weights='imagenet', include_top=True)  # https://keras.io/applications/
vggmodel.summary()  # Pretty sure I can optimize that thing...

In [None]:
# Freeze first 15 layers
for i, layers in enumerate(vggmodel.layers[:15]):
    layers.trainable = False
    print("- Layer {} ({}) is not trainable anymore.".format(layers.get_config()['name'], i + 1))

In [None]:
# Add a {number of classes} unit softmax dense layer
predictions = Dense(len(set(train_labels)), activation="softmax")(vggmodel.layers[-2].output)  # Maybe not all labels
model = Model(input = vggmodel.input, output = predictions)

In [None]:
# Compile the model using Adam optimizer with learning rate of 0.001
# We are using categorical_crossentropy as loss since the output of the model is categorical
opt = Adam(lr=0.0001)
model.compile(loss = keras.losses.categorical_crossentropy, optimizer = opt, metrics=["accuracy"])
model.summary()

In [None]:
# One-hot encoding: Basically "unique-fy-ish" each class.
# https://hackernoon.com/what-is-one-hot-encoding-why-and-when-do-you-have-to-use-it-e3c6186d008f
# from sklearn.preprocessing import LabelBinarizer
class MyLabelBinarizer(LabelBinarizer):
    def transform(self, y):
        Y = super().transform(y)
        if self.y_type_ == 'binary':
            return np.hstack((Y, 1-Y))
        else:
            return Y
    def inverse_transform(self, Y, threshold=None):
        if self.y_type_ == 'binary':
            return super().inverse_transform(Y[:, 0], threshold)
        else:
            return super().inverse_transform(Y, threshold)
        
lenc = MyLabelBinarizer()

In [None]:
mlb = MultiLabelBinarizer()

In [None]:
chosen_binarizer = lenc
train_labels_fit = chosen_binarizer.fit_transform(train_labels)
# train_labels_fit = chosen_binarizer.fit_transform([train_labels])  # For mlb
chosen_binarizer.classes_

In [None]:
# Test and train set, yay.
X_train, X_test, y_train, y_test = train_test_split(train_images, train_labels_fit, test_size=0.10)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [None]:
# Dataset augmentation
# This may not be needed following some magazines, as we do not often have rotated texts...
# ... Or do we? Anyway it applies for the pictures so there's that.
imgdatagen = ImageDataGenerator(horizontal_flip=True, vertical_flip=True, rotation_range=90)
train_data = imgdatagen.flow(x=X_train, y=y_train)
imgdatagen = ImageDataGenerator(horizontal_flip=True, vertical_flip=True, rotation_range=90)
test_data = imgdatagen.flow(x=X_test, y=y_test)

In [None]:
# We want checkpoints because losing training suckz lolz. https://keras.io/callbacks/#modelcheckpoint
checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True,
                             save_weights_only=False, mode='auto', period=1)

# If we are not doing any progress, stops the whole thing. https://keras.io/callbacks/#earlystopping
early = EarlyStopping(monitor='val_loss', min_delta=0, patience=100, verbose=1, mode='auto')

In [None]:
# FINALLY train the model. https://keras.io/models/sequential/#fit_generator
hist = model.fit_generator(generator=train_data, steps_per_epoch=STEPS_PER_EPOCH, epochs=EPOCHS,
                           verbose=1, validation_data=test_data,
                           validation_steps=2, callbacks=[checkpoint, early])

### Moment of (ground) truth

#### Visualization

In [None]:
# Loss
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title("Model loss")
plt.ylabel("Loss")
plt.xlabel("Epoch")
plt.legend(["Loss", "Validation Loss"])
plt.show()

In [None]:
# Accuracy
plt.plot(hist.history['acc'])
plt.plot(hist.history['val_acc'])
plt.title("Model accuracy")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend(["Accuracy", "Validation Accuracy"])
plt.show()

#### Testing

In [None]:
for i in range(3):
    test_img = X_test[random.randint(0, num_files)]
    plt.figure()
    test_out = np.expand_dims(test_img, axis=0)
    test_out = model.predict(test_out)
    guess = chosen_binarizer.classes_[test_out.argmax()]
    guess_probability = test_out.max()
    plt.title("{} with probability {}.".format(guess, guess_probability))
    plt.imshow(test_img)