In [2]:
import os, sys, json
import cv2
import numpy as np
import matplotlib.pyplot as plt


In [3]:
# input
data_path = '//home//mayer//LocalRepository//JupyterProjects//MADE_2019_cv//02_CarPlatesOCR//data//'
data_path_generatedPlates = "//home//mayer//LocalRepository//JupyterProjects//MADE_2019_cv//02_CarPlatesOCR//data//train_generated_60k//"
train_file = os.path.join(data_path, 'train.json')
# output
train_seg_file = os.path.join(data_path, 'train_segmentation.json')
train_rec_file = os.path.join(data_path, 'train_recognition.json')
train_rec_file_ext = os.path.join(data_path, 'train_recognitionExt.json')

#### Generate segmentation dataset

In [None]:
# Lets convert bounding boxes to segmentation masks, you can do it on the fly,
# if you've got enough CPU and high num workers
with open(train_file) as rf:
    data = json.load(rf)

for i, elem in enumerate(data):
    if i % 100 == 0:
        print (i, len(data))
    fname = elem['file']
    path = os.path.join(data_path, fname)
    base, ext = os.path.splitext(fname)
    mask_fname = base + '.mask' + ext
    mask_path = os.path.join(data_path, mask_fname)
    nums = elem['nums']

    if not os.path.exists(mask_path):
        img = cv2.imread(path)
        if img is None:
            continue
        mask = np.zeros(shape=img.shape, dtype=np.uint8)
        for num in nums:
            bbox = np.array(num['box'])
            cv2.fillConvexPoly(mask, bbox, (255,255,255))
        cv2.imwrite(mask_path, mask)

    elem['mask'] = mask_fname

In [None]:
with open(train_seg_file, 'w') as wf:
    json.dump(data, wf)

In [None]:
# check data
from detection.dataset import DetectionDataset
#
dataset = DetectionDataset(data_path, config_file=train_seg_file,part=0.001)
print(len(dataset))
image, mask = dataset[0]
print(image.shape)
print(mask.shape)
print(np.max(mask))


#### Generate recognition dataset

In [None]:
# generate ocr dataset
if not os.path.exists(train_rec_file):
    with open(train_file) as rf:
        data = json.load(rf)

    new_data = []
    for i, elem in enumerate(data):
        #if i % 100 == 0:
        #    print (i, len(data))
        img_path = os.path.join(data_path, elem['file'])
        img = cv2.imread(img_path)
        if img is None:
            continue
        for j, true_box in enumerate(elem['nums']):
            base, ext = os.path.splitext(elem['file'])
            addon = base + '.box' + str(j) + ext
            crop_name = os.path.join(data_path, addon)

            box = true_box['box']
            x_box = [w[0] for w in box]
            y_box = [w[1] for w in box]
            x1, x2 = max(0, min(x_box)), max(x_box)
            y1, y2 = max(0, min(y_box)), max(y_box)
            crop = img[y1: y2, x1: x2, :] #  TODO: you can normalize bounding box to make the OCR task easier
            cv2.imwrite(crop_name, crop)

            new_data.append(dict(file=addon, text=true_box['text']))
        
    with open(train_rec_file, 'w') as wf:
         json.dump(new_data, wf)

In [None]:
# check
import recognition
#sys.path.insert(0, recognition.__path__[0])
from recognition.dataset import RecognitionDataset
train_dataset = RecognitionDataset(data_path, train_rec_file)
print('Train dataset: {}'.format(len(train_dataset)))
print(train_dataset[0]['image'].shape, train_dataset[0]['text'])

In [None]:
import torch
from recognition.model import RecognitionModel
model = RecognitionModel()
model.eval()
np.random.seed(0)
inputs = torch.Tensor(np.random.normal(size=(11, 3, 32, 320)))
outputs = model.forward(inputs, decode=True)
print(outputs)

#### Generate extended recognition dataset 

In [29]:
# read json with recognition dataset 
with open(train_rec_file) as rf:
    data = json.load(rf)

In [30]:
new_data = []
for root,dirs,names in os.walk(data_path_generatedPlates):
    folder = root.split("//")[-2]
    for name in names:
        new_data.append(dict(file=os.path.join(folder,name), text=name.split(".")[0].upper()))
data.extend(new_data)

In [31]:
with open(train_rec_file_ext, 'w') as wf:
    json.dump(data, wf)