Scene-Text Detection

In [169]:
# Loading all necessary libraries and modules
import os
import cv2
import csv
import math
import pickle
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from pprint import pprint
from matplotlib import gridspec

import xml.etree.ElementTree as ET

In [13]:
def get_data_dir(dataset_name: str):
    main_dir = os.getcwd().strip()
    dataset_dir = os.path.join(main_dir, 'dataset', dataset_name).replace("\\", "/")
    return dataset_dir

In [40]:
#dataset annotation csv preparation
def load_map_data(dataset: str, dataset_dir: str, image_format: str, annot_format: str):
    file_map_dict = {}
    annot_map_dict = {}

    data_dirs = ['train', 'test']

    for fname in os.listdir(dataset_dir):
        path = os.path.join(dataset_dir, fname).strip().replace("\\", "/")
        all_files = os.listdir(path)
        if fname in data_dirs:
            img_files = [os.path.join(path, f).strip().replace("\\", "/") for f in all_files if f[-len(image_format):] == image_format]
            file_map_dict[fname] = [os.path.relpath(f).strip().replace("\\", "/") for f in img_files]
        if fname == 'Annotations':
            annot_files = [os.path.join(path, f).strip().replace("\\", "/") for f in all_files if f[-len(annot_format):] == annot_format]
            annot_map_dict[fname] = [os.path.relpath(f).strip().replace("\\", "/") for f in annot_files]

    return file_map_dict, annot_map_dict


In [49]:
def extract_annot_paths(dataset_dir: str, file_map_dict: dict, annot_map_dict: dict, annot_format: str, prefix: str):
    train_annots = list()
    test_annots = list()

    orig_annot_path = path = os.path.join(dataset_dir, 'Annotations').replace("\\", "/")

    for ind, each in enumerate([fs.split("/")[-1][:-4] for fs in file_map_dict['train']] ):
        if prefix+each in [annot.split("/")[-1][:-4] for annot in annot_map_dict['Annotations']]:
            train_annots.append(os.path.join(orig_annot_path, prefix+each+'.'+annot_format).replace("\\", "/"))

    for ind, each in enumerate([fs.split("/")[-1][:-4] for fs in file_map_dict['test']] ):
        if prefix+each in [annot.split("/")[-1][:-4] for annot in annot_map_dict['Annotations']]:
            test_annots.append(os.path.join(orig_annot_path, prefix+each+'.'+annot_format).replace("\\", "/"))

    return train_annots, test_annots

In [121]:
def parse_annot(file_path, dataset_name: str, prefix:str, suffix:str):
    if 'voc' in dataset_name:
        class_n = 'Non-Text'
        tree = ET.parse(file_path)
        root = tree.getroot()

        size_elem = root.find('size')
        file_w = size_elem.find('width').text
        file_h = size_elem.find('height').text

        name_elem = root.find('filename')
        filename = name_elem.text

        obj_elem = root.findall('object')
        bbox_elem = []
        for obj in obj_elem:
            bbox = obj.find('bndbox')
            xmin = bbox.find('xmin').text
            ymin = bbox.find('ymin').text
            xmax = bbox.find('xmax').text
            ymax = bbox.find('ymax').text
            bbox_elem.append([filename, file_w, file_h, xmin,ymin, xmax, ymax, class_n])

    elif 'icdar' in dataset_name:
        class_n = 'Text'
        with open(file_path, 'r', encoding='utf8') as f:
            filename = file_path.split("/")[-1].split(prefix)[-1].split(".")[0]+suffix
            annot_data = []
            for dat in f.readlines():
                try:
                    temp = dat.split(" ")
                    int(temp[0])
                except ValueError:
                    temp = dat.split(", ")

                annot_data.append(temp)

            text_name = [each[-1].split('"')[1] for each in annot_data]
            bbox_elem = [[filename]+each[:-1]+[class_n] for each in annot_data]

    return bbox_elem

In [183]:
def extract_annotations(train_annots:list, test_annots:list, dataset_name:str, filename:list, show_annot=False):
    annotation_voc = []
    for annots in train_annots:
        annotation_voc.extend(parse_annot(annots, dataset_name, prefix='gt_', suffix='.jpg'))
    for annots in test_annots:
        annotation_voc.extend(parse_annot(annots, dataset_name, prefix='gt_', suffix='.jpg'))

    if show_annot:
        assert filename, "a list of filename must be provided."
        for f_name in filename:
            print("Annotations found for image", f_name, "==>")
            pprint([name for name in annotation_voc if name[0] == f_name])
            print("")
    
    return annotation_voc

In [162]:
def display_annotations(dataset_name:str, dataset: str, file_map_dict: dict, annotation_voc: list, resize_attrib = (900, 600), n_images = 5):
    for ind, fpaths in enumerate(file_map_dict[dataset]):
        img = cv2.imread(fpaths)
        for annots in annotation_voc:
            if annots[0] == fpaths.split("/")[-1]:
                fname = annots[0]
                if 'voc' in dataset_name:
                    cv2.rectangle(img, (int(annots[3]), int(annots[4])), 
                                        (int(annots[5]), int(annots[6])), (255,0,0), 2)
                if 'icdar' in dataset_name:
                    cv2.rectangle(img, (int(annots[1]), int(annots[2])), 
                                        (int(annots[3]), int(annots[4])), (255,0,0), 2)
        print("Displaying",fname, "from", dataset+"ing dataset: ")
        print("Info: ")
        print("size=(h, w)", img.shape[:2], end="\n\n")

        img = cv2.resize(img, resize_attrib)
        cv2.imshow("image", img)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
        if n_images == "all":
            pass
        else:
            if ind == n_images:
                break

In [193]:
def get_annot_csv(dataset_dir: str,
                dataset: str,
                file_map_dict: dict,
                csv_data: list,
                filename: str,
                header = ['filename', 'xmin', 'ymin', 'xmax', 'ymax', 'class']):
    to_csv = csv_data.copy()
    to_csv = [header]+to_csv

    if dataset:
        all_filenames = [fnames.split("/")[-1] for fnames in file_map_dict[dataset]]

    with open(os.path.join(dataset_dir, filename).replace("\\", "/"), 'w', encoding='utf8', newline='') as f:
        for row in to_csv:
            if not dataset:
                for x in row:
                    f.write(x + ',')
                f.write('\n')
            else:
                if row[0] in all_filenames:
                    for x in row:
                        f.write(x + ',')
                    f.write('\n')           

In [56]:
dataset_name = "icdar-2013"
dataset_dir = get_data_dir(dataset_name)

print("Directory to extract data ==> ", dataset_dir, end="\n\n")

file_map_dict, annot_map_dict = load_map_data(dataset_name, dataset_dir, "jpg", "txt")

print("Total annotations found: ", len(annot_map_dict['Annotations']))
print("Total training images found: ", len(file_map_dict['train']))
print("Total testimg images found: ", len(file_map_dict['test']))

Directory to extract data ==>  e:/Gustovalley/PhD Projects/Text segmentation/Git-Repo/Code/text_classification/dataset/icdar-2013

Total annotations found:  390
Total training images found:  300
Total testimg images found:  90


In [172]:
train_annots, test_annots = extract_annot_paths(dataset_dir, file_map_dict, annot_map_dict, "txt", prefix='gt_')

print("Top 5 training annotation files found: ", train_annots[:5], end="\n\n")
print("Top 5 testimg annotation files found: ", test_annots[:5])

Top 5 training annotation files found:  ['e:/Gustovalley/PhD Projects/Text segmentation/Git-Repo/Code/text_classification/dataset/icdar-2013/Annotations/gt_100.txt', 'e:/Gustovalley/PhD Projects/Text segmentation/Git-Repo/Code/text_classification/dataset/icdar-2013/Annotations/gt_101.txt', 'e:/Gustovalley/PhD Projects/Text segmentation/Git-Repo/Code/text_classification/dataset/icdar-2013/Annotations/gt_102.txt', 'e:/Gustovalley/PhD Projects/Text segmentation/Git-Repo/Code/text_classification/dataset/icdar-2013/Annotations/gt_103.txt', 'e:/Gustovalley/PhD Projects/Text segmentation/Git-Repo/Code/text_classification/dataset/icdar-2013/Annotations/gt_104.txt']

Top 5 testimg annotation files found:  ['e:/Gustovalley/PhD Projects/Text segmentation/Git-Repo/Code/text_classification/dataset/icdar-2013/Annotations/gt_img_100.txt', 'e:/Gustovalley/PhD Projects/Text segmentation/Git-Repo/Code/text_classification/dataset/icdar-2013/Annotations/gt_img_101.txt', 'e:/Gustovalley/PhD Projects/Text s

In [182]:
annotation_voc = extract_annotations(train_annots, test_annots, dataset_name, ["img_98.jpg", "img_99.jpg"], True)

Annotations found for image img_98.jpg ==>
[['img_98.jpg', '1012', '350', '1145', '370', 'Text'],
 ['img_98.jpg', '553', '568', '700', '611', 'Text'],
 ['img_98.jpg', '213', '651', '1037', '749', 'Text'],
 ['img_98.jpg', '328', '773', '490', '825', 'Text'],
 ['img_98.jpg', '516', '767', '610', '823', 'Text'],
 ['img_98.jpg', '633', '767', '731', '838', 'Text'],
 ['img_98.jpg', '757', '767', '926', '822', 'Text']]

Annotations found for image img_99.jpg ==>
[['img_99.jpg', '423', '742', '446', '758', 'Text'],
 ['img_99.jpg', '462', '743', '513', '761', 'Text'],
 ['img_99.jpg', '531', '744', '651', '762', 'Text'],
 ['img_99.jpg', '667', '745', '696', '762', 'Text'],
 ['img_99.jpg', '713', '745', '819', '763', 'Text'],
 ['img_99.jpg', '835', '744', '894', '763', 'Text'],
 ['img_99.jpg', '912', '743', '1043', '767', 'Text'],
 ['img_99.jpg', '363', '769', '516', '791', 'Text'],
 ['img_99.jpg', '537', '772', '594', '793', 'Text'],
 ['img_99.jpg', '615', '773', '695', '794', 'Text'],
 ['img_9

In [184]:
display_annotations(dataset_name, 'train', file_map_dict, annotation_voc, n_images=5)

Displaying 100.jpg from training dataset: 
Info: 
size=(h, w) (480, 640)

Displaying 101.jpg from training dataset: 
Info: 
size=(h, w) (480, 640)

Displaying 102.jpg from training dataset: 
Info: 
size=(h, w) (480, 640)

Displaying 103.jpg from training dataset: 
Info: 
size=(h, w) (480, 640)

Displaying 104.jpg from training dataset: 
Info: 
size=(h, w) (640, 480)

Displaying 105.jpg from training dataset: 
Info: 
size=(h, w) (1280, 960)



In [196]:
get_annot_csv(dataset_dir, 'train', file_map_dict, csv_data = annotation_voc, filename= '../annotation_train.csv')
get_annot_csv(dataset_dir, 'test', file_map_dict, csv_data = annotation_voc, filename= '../annotation_test.csv')
get_annot_csv(dataset_dir, False, file_map_dict, csv_data = annotation_voc, filename= '../annotation_icdar.csv')