In [None]:
import os
from os import path, listdir
import tensorflow as tf
import tensorflow as tf
import cv2
import waymo_open_dataset
from waymo_open_dataset.utils import  frame_utils
from waymo_open_dataset import dataset_pb2 as open_dataset
import utils
from utils import extract_and_serialize_frame, deserialize_example, extract_frame_features
import matplotlib.pyplot as plt
import numpy as np
import tarfile
import math
from PIL import Image

In [None]:
def convert_waymo_data(tar_dir, temp_dir, tgt_dir, start, end):
    tars = [path.join(tar_dir, file) for file in listdir(tar_dir)][start:end]
    print(tars)

    for (idx, tar) in enumerate(tars):
        print(f"Extracting {tar} to {temp_dir}")
        tar_file = tarfile.open(tar, 'r:')
        tar_file.extractall(temp_dir)
        tar_file.close()

        parsed_file = path.join(tgt_dir, 'parsed-' + str(idx).rjust(3, '0') + '.tfrecord')
        license_file = f'{temp_dir}/LICENSE'

        print(f'Deleting LICENSE file {temp_dir}/LICENSE')
        if path.exists(license_file):
            os.remove(license_file)
        temp_files = [f'{temp_dir}/{file}' for file in listdir(temp_dir)]

        print(f"Parsing data and writing to {parsed_file}")
        dataset = tf.data.TFRecordDataset(temp_files)
        writer = tf.io.TFRecordWriter(parsed_file)
        for (idx, data) in enumerate(dataset):
            if idx % 100 == 0:
                print(f'Processed {idx} frames')
            converted = extract_and_serialize_frame(data)
            writer.write(converted)
        writer.close()

        print(f"Deleting temp files from {temp_dir}")
        for file in temp_files:
            os.remove(file)

In [None]:
convert_waymo_data('Data/unprocessed/training', 'Data/temp/training', 'Data/parsed/training', 0, 32)
convert_waymo_data('Data/unprocessed/validation', 'Data/temp/validation', 'Data/parsed/validation', 0, 8)

In [None]:
SCALE_PCT = 0.5

def convert_to_darknet(src_dir, tgt_dir, index_file):
    files = [path.join(src_dir, file) for file in listdir(src_dir)]
    img_id = len(listdir(f'{tgt_dir}/obj')) // 2
    
    for (idx, file) in enumerate(files):
        print(f"Converting {file} to {tgt_dir} ({idx + 1} of {len(files)})")

        dataset = tf.data.TFRecordDataset(file)
        for data in dataset:
            example = deserialize_example(data)
            og_width = example['width'].numpy()
            og_height = example['height'].numpy()
            img_width = int(og_width * SCALE_PCT)
            img_height = int(og_height * SCALE_PCT)

            image = Image.fromarray(tf.io.decode_jpeg(example['raw_image']).numpy())
            image = image.resize((img_height, img_width))

            image_name = f'example-{img_id}.jpg'
            image.save(f'{tgt_dir}/obj/{image_name}')
            
            classes = example['class'].numpy()
            boxes_center_y = example['box_center_y'].numpy()
            boxes_center_x = example['box_center_x'].numpy()
            boxes_width = example['box_width'].numpy()
            boxes_height = example['box_height'].numpy()
            label_name = f'example-{img_id}.txt'
            with open(f'{tgt_dir}/obj/{label_name}', 'w') as label_file:
                for (clazz, cx, cy, w, h) in zip(classes, boxes_center_x, boxes_center_y, boxes_height, boxes_width):
                    cx /= og_width
                    cy /= og_height
                    w /= og_width
                    h /= og_height
                    darknet_labels = f'{clazz} {cx} {cy} {w} {h}\n'
                    label_file.write(darknet_labels)
            
            index_file.write(f'data/obj/{image_name}\n')
            img_id += 1

        

In [None]:
with open('Data/darknet-data/training.txt', 'w') as training_index:
    convert_to_darknet('Data/processed/training', 'Data/darknet-data', training_index)

with open('Data/darknet-data/validation.txt', 'w') as validation_index:
    convert_to_darknet('Data/processed/validation', 'Data/darknet-data', validation_index)