In [1]:
import pandas as pd
import os
import cv2 as cv
import tensorflow as tf
from tensorflow.train import Example , Int64List , BytesList , FeatureList , Features 
import xml.etree.cElementTree as ET
from PIL import Image

# -----------------------------------
# Create jpg images from ppm images :

In [2]:
data_dir = '../data/TrainIJCNN2013/'
data_info_dir = '../data/TrainIJCNN2013/data_info.csv'
data_info_df = pd.read_csv(data_info_dir)
final_df = pd.DataFrame(columns=data_info_df.columns)

In [3]:
for i in range(len(data_info_df)):
    data_info_df['path'][i] = data_info_df['path'][i][:-4]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_info_df['path'][i] = data_info_df['path'][i][:-4]


In [4]:
for current_dir , dirs ,files in os.walk(data_dir):
    for f in files:
        if f.endswith('.ppm'):
            image_name = f[:-4]
            image = cv.imread(data_dir + f)
            single_data_line = data_info_df.loc[data_info_df['path'] == f[:-4]].copy()
            if single_data_line.isnull().values.all():
                os.remove(data_dir + f)
            else:
                final_df = final_df.append(single_data_line)
                save_path = "../data/Images/" + image_name + '.jpg'
                if not os.path.isfile(save_path):
                    cv.imwrite(save_path,image)

In [5]:
final_df = final_df[~final_df.index.duplicated(keep='first')]
final_df.sort_index(inplace=True)

In [6]:
%%writefile ../data/obj.names
prohibitory
danger
mandatory
other

Writing ../data/obj.names


# -----------------------------------
# Creating annotations files :

In [7]:
ANNOTATIONS_DIR_PREFIX = "../data/Images"
DESTINATION_DIR = "../data/XML"
CLASS_MAPPING = {
    '0' : 'prohibitory',
    '1' : 'danger' ,
    '2' : 'mandatory' ,
    '3' : 'other' ,
}

In [8]:
def create_root(filename, width, height):
    root = ET.Element("annotation")
    ET.SubElement(root, "folder").text = "Images"
    ET.SubElement(root, "filename").text = (filename)
    ET.SubElement(root, "path").text = "../data/Images/{}".format(filename)
    source = ET.SubElement(root, "source")
    ET.SubElement(source, "database").text = "Unknown"
    size = ET.SubElement(root, "size")
    ET.SubElement(size, "width").text = str(width)
    ET.SubElement(size, "height").text = str(height)
    ET.SubElement(size, "depth").text = "3"
    ET.SubElement(root, "segmented").text = "0"
    return root
# ---------------------------------------------------
def create_object_annotation(root, voc):
    for ind,voc_label in voc.iterrows():
        obj = ET.SubElement(root, "object")
        ET.SubElement(obj, "name").text=str(CLASS_MAPPING.get(str(voc_label["id"])))
        ET.SubElement(obj, "pose").text = "Unspecified"
        ET.SubElement(obj, "truncated").text = str(0)
        ET.SubElement(obj, "difficult").text = str(0)
        bbox = ET.SubElement(obj, "bndbox")
        ET.SubElement(bbox, "xmin").text = str(voc_label["left"])
        ET.SubElement(bbox, "ymin").text = str(voc_label["top"])
        ET.SubElement(bbox, "xmax").text = str(voc_label["right"])
        ET.SubElement(bbox, "ymax").text = str(voc_label["bottom"])
    if(len(voc)==0):
        print(voc)
        print('no')
    return root
# ---------------------------------------------------
def create_file(filename, width, height, voc):
    root = create_root(filename, width, height)
    root = create_object_annotation(root, voc)
    tree = ET.ElementTree(root)
    tree.write("{}/{}.xml".format(DESTINATION_DIR, filename[:-4]))
# ---------------------------------------------------
def read_file(filename):
    img = cv.imread("{}/{}".format("../data/Images/", filename))
    w,h=img.shape[:2]
    voc = final_df.loc[final_df.path == filename[:-4]].copy()
    voc["name"] = CLASS_MAPPING.get(str((voc["id"])))
    create_file(filename, w, h, voc)
# ---------------------------------------------------
def start():
    if not os.path.exists(DESTINATION_DIR):
        os.makedirs(DESTINATION_DIR)
    for filename in os.listdir(ANNOTATIONS_DIR_PREFIX):
          if filename.endswith(".jpg"):
            read_file(filename)

In [9]:
start()