# Import Libraries

In [48]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches

from PIL import Image

from utils import generate_tfrecord

import tensorflow as tf

In [13]:
TRAIN_DATASET_PATH = "./dataset/train"
VAL_DATASET_PATH = "./dataset/valid"
TEST_DATASET_PATH = "./dataset/test"

LABELS_FOLDER = "/labels"
IMAGES_FOLDER = "/images"

CSV_LABELS_NAME = "/labels.csv"
RECORD_FILE_NAME = "/dataset.record"

IMG_WIDTH = 416
IMG_HEIGHT = 416

In [14]:
def load_dataset(path: str) -> pd.DataFrame:
    dct = {
        "filename": [],
        "width": [],
        "height": [],
        "class": [],
        "xmin": [],
        "ymin": [],
        "xmax": [],
        "ymax": []
    }

    for image in os.listdir(path):
        label_file = open(path + "/" + image)

        labels_str = label_file.read().split(sep="\n")
        for label in labels_str:
            label_parts = label.split()

            width_half = float(label_parts[3]) / 2
            height_half = float(label_parts[4]) / 2
            xmin = (float(label_parts[1]) - width_half) * IMG_WIDTH
            ymin = (float(label_parts[2]) - height_half) * IMG_HEIGHT
            xmax = (float(label_parts[1]) + width_half) * IMG_WIDTH
            ymax = (float(label_parts[2]) + height_half) * IMG_HEIGHT

            dct["filename"].append(image[:-3] + "jpg")
            dct["width"].append(IMG_WIDTH)
            dct["height"].append(IMG_HEIGHT)
            dct["class"].append(int(label_parts[0]) + 1)
            dct["xmin"].append(int(xmin))
            dct["ymin"].append(int(ymin))
            dct["xmax"].append(int(xmax))
            dct["ymax"].append(int(ymax))

        label_file.close()

    return pd.DataFrame(dct)


# Load Data

In [15]:
path = TRAIN_DATASET_PATH + LABELS_FOLDER

train_labels = load_dataset(path)
train_labels

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,009000_jpg.rf.8c46e1aa5b46a0ad24ee4bcb2945d22a...,416,416,3,259,211,285,239
1,009002_jpg.rf.18bf80f2cfdb51f853da15019f787cef...,416,416,3,220,126,243,155
2,009003_jpg.rf.46963402c4cb6f46a47e508b892c6521...,416,416,3,155,146,196,169
3,009007_jpg.rf.a5143afbb0c741f3b60fc72403fdde6a...,416,416,3,155,108,178,183
4,009012_jpg.rf.bc99877ade8754d2be89119361e7820c...,416,416,3,323,146,345,176
...,...,...,...,...,...,...,...,...
6176,042973_jpg.rf.6792837ba2183435721cd7b4e9674ecd...,416,416,2,127,107,162,143
6177,042982_jpg.rf.79a8b11af76f74faa44312da78ee1486...,416,416,5,138,184,164,254
6178,042984_jpg.rf.5005cade401420afa36aac4b818e8dfa...,416,416,2,151,142,170,164
6179,042994_jpg.rf.141176d36edf4a11d33dad95a76e61b6...,416,416,2,193,215,230,241


In [16]:
train_labels.to_csv(TRAIN_DATASET_PATH + CSV_LABELS_NAME, index=False)

In [17]:
path = VAL_DATASET_PATH + LABELS_FOLDER

val_labels = load_dataset(path)
val_labels

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,009004_jpg.rf.0f740b5e6ccaea6cc4179f6b479938a1...,416,416,3,241,228,271,260
1,009005_jpg.rf.1282241890cc85a75efb33799f4697f3...,416,416,3,245,239,269,273
2,009009_jpg.rf.a9b812f04180caf37d20ba44cf2e1f5f...,416,416,3,241,132,348,153
3,009025_jpg.rf.4b29baf3c21372ab94c409f8705f53bb...,416,416,3,256,176,275,236
4,009029_jpg.rf.1bdc0520750c7e352c088c9fca463669...,416,416,3,201,182,218,268
...,...,...,...,...,...,...,...,...
1761,042914_jpg.rf.81db8c00042e1c9cfd16c252b7d4a512...,416,416,2,179,254,235,281
1762,042923_jpg.rf.9e9a10350da109f12875d26020da1ce0...,416,416,2,188,138,209,157
1763,042945_jpg.rf.eaa2f42a7aacb48dd2ea969c3a07de00...,416,416,2,216,117,276,153
1764,042950_jpg.rf.0cfa30f0f1ef4896e6a1bf5a6738ef45...,416,416,4,230,152,261,238


In [18]:
val_labels.to_csv(VAL_DATASET_PATH + CSV_LABELS_NAME, index=False)

In [19]:
path = TEST_DATASET_PATH + LABELS_FOLDER

test_labels = load_dataset(path)
test_labels

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,009008_jpg.rf.8f9d287571d5d48f46a87116a4a82d56...,416,416,3,194,146,211,245
1,009024_jpg.rf.6de3f50660b30e85323eb009190d7747...,416,416,3,225,143,243,168
2,009030_jpg.rf.c072eddbe5c4df12ba092a3fea353d7c...,416,416,3,205,137,227,167
3,009045_jpg.rf.73baf175ea33c52e85c53979fcc1cde0...,416,416,3,245,226,279,267
4,009072_jpg.rf.ee449c968c7fbd052f338148998aff10...,416,416,3,253,150,272,245
...,...,...,...,...,...,...,...,...
878,042828_jpg.rf.22fccd9a6d1b28c3c2df4f05229e1a94...,416,416,2,206,129,257,165
879,042831_jpg.rf.6a2e31c377a986c2bc3bf7ea7746c321...,416,416,5,260,270,286,327
880,042904_jpg.rf.47d8f51176d6ab5d0e93b66dc563ecc0...,416,416,4,186,158,216,259
881,042916_jpg.rf.bafff2c66a8068e1d5b2029bf4b00510...,416,416,4,262,186,291,259


In [20]:
test_labels.to_csv(TEST_DATASET_PATH + CSV_LABELS_NAME, index=False)

# Transform Dataset

In [21]:
generate_tfrecord.run(csv_input=TRAIN_DATASET_PATH + CSV_LABELS_NAME, output_path=TRAIN_DATASET_PATH + RECORD_FILE_NAME, image_dir=TRAIN_DATASET_PATH + IMAGES_FOLDER)
generate_tfrecord.run(csv_input=VAL_DATASET_PATH + CSV_LABELS_NAME, output_path=VAL_DATASET_PATH + RECORD_FILE_NAME, image_dir=VAL_DATASET_PATH + IMAGES_FOLDER)
generate_tfrecord.run(csv_input=TEST_DATASET_PATH + CSV_LABELS_NAME, output_path=TEST_DATASET_PATH + RECORD_FILE_NAME, image_dir=TEST_DATASET_PATH + IMAGES_FOLDER)

Successfully created the TFRecords: d:\Codes\Hacktiv8\Final Project\p2-final-project-ftds-hck-026\./dataset/train/dataset.record
Successfully created the TFRecords: d:\Codes\Hacktiv8\Final Project\p2-final-project-ftds-hck-026\./dataset/valid/dataset.record
Successfully created the TFRecords: d:\Codes\Hacktiv8\Final Project\p2-final-project-ftds-hck-026\./dataset/test/dataset.record
