## Dataset folder structure 

- rear_signal_dataset
   - Footage_name
      - Footage_name_XXX
         - Footage_name_XXX_DDD (sequence of class XXX starting from frame number DDD)
            - light_mask
               - frameDDDDDDDD.png (frames with a 8 digit number indicating the frame number)
               - ...
         - Footage_name_XXX_DDD
            - light_mask
               - frameDDDDDDDD.png
               - ...
      - Footage_name_XXX
         - Footage_name_XXX_DDD
            - ...
         - Footage_name_XXX_DDD
            -...
         - Footage_name_XXX_DDD
            - ...
   - Footage_name
      - ...

*XXX indicates the label of the signal

The dataset first divides the data by Footage_name (ex. "20160809_route8-08-09-2016_09-50-36_idx99"). Within each footage directory, there is a sub-directory for each class of label (brake lights on, lights off, etc.). Then, there is a subdirectory for each brief burst of shots. Each burst of shots consists of a single vehicle. These images are nearly identical to each other. 

In [1]:
from utils import get_immediate_directories, get_label_sequence_from_name, get_split, image_is_valid, get_immediate_images

## Read in the list of easy, medium, hard

In [2]:
from difficulty_levels import DifficultyLevels, write_difficulty_levels

difficulties = DifficultyLevels('./data/Easy.txt', './data/Moderate.txt', './data/Hard.txt')  


E: 569, M: 72, H: 26


## Get the paths to the images
The path to each image is collected per class.

In [3]:
from collections import defaultdict
import random

# Seed the random image selector so the output is reproducable
random.seed(0)

def get_paths_for_images(one_per_sequence=True):
    """
    Gets the path for the images.
    - one_per_sequence: if this is true, only one image will be used from each sequence for a particular class.
    
    If one_per_sequence, it will return a dictionary of form:
        {label: [(image_path, difficulty), (image_path, difficulty), (image_path, difficulty), ...]}
        
    If not one_per_sequence, it will return a dictionary of form:
        {label: [(image_path, difficulty, split), (image_path, difficulty, split), (image_path, difficulty, split)]}
    """
    
    # Store the path to each of the image paths per class
    # label => [(path, difficulty)] where difficulty is "E", "M", or "H"
    per_class_image_paths = defaultdict(list)

    # Footage directories
    footage_dirs = get_immediate_directories('./data')

    for f_dir in footage_dirs:
        # These are folders corresponding to each class
        path_1 = os.path.join('./data', f_dir)
        f_class_dirs = get_immediate_directories(path_1)

        # Loop through all sequence dirs of form Footage_name_XXX_DDD
        for f_class_dir in f_class_dirs:
            # The label of the class ex. "BLO"
            class_label, _ = get_label_sequence_from_name(f_class_dir)
            path_2 = os.path.join(path_1, f_class_dir)
            footage_sequence_dirs = get_immediate_directories(path_2)

            for footage_sequence_dir in footage_sequence_dirs:
                # Only used in keeping the whole sequence 
                split = get_split()
                
                path_3 = os.path.join(path_2, footage_sequence_dir, "light_mask")

                difficulty_level = difficulties.get_difficulty_level(footage_sequence_dir)

                image_names = get_immediate_images(path_3)
                image_names = [os.path.join(path_3, i) for i in image_names]
                    
                if one_per_sequence:
                    # Choose one image randomly
                    # All the images are pretty similar, so just need one
                    full_image_path = random.choice(image_names)
                    
                    per_class_image_paths[class_label].append((full_image_path, difficulty_level))
                else:
                    for full_image_path in image_names:
                        per_class_image_paths[class_label].append((full_image_path, difficulty_level, split))
    return per_class_image_paths

## Extract single image from each sequence
The following will extract a single image from each sequence. It will update the names of the images and the new easy, medium, and hard .txt files.

In [4]:
from shutil import copyfile
from utils import rm_mkdir

per_class_image_paths = get_paths_for_images()

output_dir = "./output"

rm_mkdir(output_dir)

# Create directories for each label
for label in per_class_image_paths:
    os.mkdir(os.path.join(output_dir, label))

# Move files in per_class_image_paths to new directories
new_easy = []
new_medium = []
new_hard = []

frame_number = 0

# Copy the images into the new paths
for label in per_class_image_paths:
    output_dir_path = os.path.join(output_dir, label)
    
    for image_path, difficulty in per_class_image_paths[label]:
        # Create a new name for the image
        output_image_name = 'frame_' + f'{frame_number:05}.png'
        frame_number += 1
        
        output_path = os.path.join(output_dir_path, output_image_name)
        
        # Copy the file over
        copyfile(image_path, output_path)
        
        # Add the new name to the difficulty level path
        if difficulty == "E":
            new_easy.append(output_image_name)
        elif difficulty == "M":
            new_medium.append(output_image_name)
        elif difficulty == "H":
            new_hard.append(output_image_name)

Unrecognized sequence dir name: 20161013_demo_test-10-13-2016_15-51-02_OLO_00001274
Unrecognized sequence dir name: 20161013_demo_test-10-13-2016_15-51-02_BOO_00001274
Unrecognized sequence dir name: 20161007_demo_surface-10-07-2016_16-14-04_2_BOR_00005877
Unrecognized sequence dir name: 20161007_demo_surface-10-07-2016_16-14-04_2_BOO_00005877
Unrecognized sequence dir name: 20160915_route_demo2-09-15-2016_18-49-23_OOR_00000215
Unrecognized sequence dir name: route-02-23-2016_17-17-51_BOO_9125
Unrecognized sequence dir name: 20160920_route_demo-09-20-2016_18-47-39_BLO00001405


In [5]:
write_difficulty_levels(output_dir, new_easy, new_medium, new_hard)

## Use the entire sequence
The following can be used to make use of the entire image sequence. It ensures that each sequence will be used for either training, validation, or testing. This means the same car can only appear in a single split.

In [6]:
from shutil import copyfile
from utils import rm_mkdir

per_class_image_paths = get_paths_for_images(one_per_sequence=False)

splits = ['train', 'val', 'test']

output_dir = "./sequence_output"

rm_mkdir(output_dir)

for s in splits:
    # Make sub-directory
    split_directory = os.path.join(output_dir, s)
    os.mkdir(split_directory)
    
    # Create directories for each label
    for label in per_class_image_paths:
        os.mkdir(os.path.join(split_directory, label))

# Move files in per_class_image_paths to new directories
new_easy = []
new_medium = []
new_hard = []

frame_number = 0

# Copy the images into the new paths
for label in per_class_image_paths:
    for image_path, difficulty, split in per_class_image_paths[label]:
        output_dir_path = './sequence_output/{}/{}'.format(split, label)
        
        # Create a new name for the image
        output_image_name = 'frame_' + f'{frame_number:05}.png'
        frame_number += 1
        
        output_path = os.path.join(output_dir_path, output_image_name)
        
        # Copy the file over
        copyfile(image_path, output_path)
        
        # Add the new name to the difficulty level path
        if difficulty == "E":
            new_easy.append(output_image_name)
        elif difficulty == "M":
            new_medium.append(output_image_name)
        elif difficulty == "H":
            new_hard.append(output_image_name)

Unrecognized sequence dir name: 20161013_demo_test-10-13-2016_15-51-02_OLO_00001274
Unrecognized sequence dir name: 20161013_demo_test-10-13-2016_15-51-02_BOO_00001274
Unrecognized sequence dir name: 20161007_demo_surface-10-07-2016_16-14-04_2_BOR_00005877
Unrecognized sequence dir name: 20161007_demo_surface-10-07-2016_16-14-04_2_BOO_00005877
Unrecognized sequence dir name: 20160915_route_demo2-09-15-2016_18-49-23_OOR_00000215
Unrecognized sequence dir name: route-02-23-2016_17-17-51_BOO_9125
Unrecognized sequence dir name: 20160920_route_demo-09-20-2016_18-47-39_BLO00001405


In [7]:
write_difficulty_levels('./sequence_output', new_easy, new_medium, new_hard)