## Imports

In [1]:
import os
import pathlib
import re
import shutil
import sys

import gdown
from PIL import Image
from sklearn.model_selection import train_test_split
from tqdm import tqdm

# Used for multiprocessing scripts
NUM_PROC = 20

#### Add parent directory to path to enable access to other scripts)

In [2]:
module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)

# Download RUGD Data
Change dataset_path if you want to download to a different directory

In [3]:
dataset_path = os.path.join(os.getcwd(), "..", "datasets", "RUGD")
print(dataset_path)

d:\Github Repos\SJSU\Offroad-Panoptic-Segmentation\notebooks\..\datasets\RUGD


## Download Images

In [4]:
rugd_images_path = os.path.join(dataset_path, "RUGD_frames-with-annotations.zip")
print(rugd_images_path)
gdown.download("http://rugd.vision/data/RUGD_frames-with-annotations.zip", rugd_images_path)

d:\Github Repos\SJSU\Offroad-Panoptic-Segmentation\notebooks\..\datasets\RUGD\RUGD_frames-with-annotations.zip


## Download Labels

In [5]:
rugd_labels_path = os.path.join(dataset_path, "RUGD_annotations.zip")
print(rugd_labels_path)
gdown.download("http://rugd.vision/data/RUGD_annotations.zip", rugd_labels_path)

d:\Github Repos\SJSU\Offroad-Panoptic-Segmentation\notebooks\..\datasets\RUGD\RUGD_annotations.zip


## Unzip images and labels

In [6]:
rugd_extract_path = os.path.join(dataset_path, "extracted")
print(rugd_extract_path)

d:\Github Repos\SJSU\Offroad-Panoptic-Segmentation\notebooks\..\datasets\RUGD\extracted


In [7]:
gdown.extractall(rugd_images_path, rugd_extract_path)

['d:\\Github Repos\\SJSU\\Offroad-Panoptic-Segmentation\\notebooks\\..\\datasets\\RUGD\\extracted\\RUGD_frames-with-annotations/',
 'd:\\Github Repos\\SJSU\\Offroad-Panoptic-Segmentation\\notebooks\\..\\datasets\\RUGD\\extracted\\RUGD_frames-with-annotations/park-1/',
 'd:\\Github Repos\\SJSU\\Offroad-Panoptic-Segmentation\\notebooks\\..\\datasets\\RUGD\\extracted\\RUGD_frames-with-annotations/park-1/park-1_01181.png',
 'd:\\Github Repos\\SJSU\\Offroad-Panoptic-Segmentation\\notebooks\\..\\datasets\\RUGD\\extracted\\RUGD_frames-with-annotations/park-1/park-1_02016.png',
 'd:\\Github Repos\\SJSU\\Offroad-Panoptic-Segmentation\\notebooks\\..\\datasets\\RUGD\\extracted\\RUGD_frames-with-annotations/park-1/park-1_02681.png',
 'd:\\Github Repos\\SJSU\\Offroad-Panoptic-Segmentation\\notebooks\\..\\datasets\\RUGD\\extracted\\RUGD_frames-with-annotations/park-1/park-1_01391.png',
 'd:\\Github Repos\\SJSU\\Offroad-Panoptic-Segmentation\\notebooks\\..\\datasets\\RUGD\\extracted\\RUGD_frames-with

In [8]:
gdown.extractall(rugd_labels_path, rugd_extract_path)

['d:\\Github Repos\\SJSU\\Offroad-Panoptic-Segmentation\\notebooks\\..\\datasets\\RUGD\\extracted\\RUGD_annotations/',
 'd:\\Github Repos\\SJSU\\Offroad-Panoptic-Segmentation\\notebooks\\..\\datasets\\RUGD\\extracted\\RUGD_annotations/RUGD_annotation-colormap.txt',
 'd:\\Github Repos\\SJSU\\Offroad-Panoptic-Segmentation\\notebooks\\..\\datasets\\RUGD\\extracted\\RUGD_annotations/trail-9/',
 'd:\\Github Repos\\SJSU\\Offroad-Panoptic-Segmentation\\notebooks\\..\\datasets\\RUGD\\extracted\\RUGD_annotations/trail-9/trail-9_00001.png',
 'd:\\Github Repos\\SJSU\\Offroad-Panoptic-Segmentation\\notebooks\\..\\datasets\\RUGD\\extracted\\RUGD_annotations/trail-9/trail-9_00006.png',
 'd:\\Github Repos\\SJSU\\Offroad-Panoptic-Segmentation\\notebooks\\..\\datasets\\RUGD\\extracted\\RUGD_annotations/trail-9/trail-9_00011.png',
 'd:\\Github Repos\\SJSU\\Offroad-Panoptic-Segmentation\\notebooks\\..\\datasets\\RUGD\\extracted\\RUGD_annotations/trail-9/trail-9_00016.png',
 'd:\\Github Repos\\SJSU\\Offro

# Clean Up File Structure

### Convert images from .png to .jpg

In [9]:
def change_file_ext(labels_dir):
    images = list(pathlib.Path(labels_dir).glob("**/*.png"))
    for image in tqdm(images, desc="Converting Images", position=0):
        im = Image.open(image)
        im.save(str(image).replace(".png", ".jpg"))
        image.unlink()
    del images

In [10]:
rugd_images_extract_path = os.path.join(rugd_extract_path, "RUGD_frames-with-annotations")
print(rugd_images_extract_path)
rugd_labels_extract_path = os.path.join(rugd_extract_path, "RUGD_annotations")
print(rugd_labels_extract_path)

change_file_ext(rugd_images_extract_path)

d:\Github Repos\SJSU\Offroad-Panoptic-Segmentation\notebooks\..\datasets\RUGD\extracted\RUGD_frames-with-annotations
d:\Github Repos\SJSU\Offroad-Panoptic-Segmentation\notebooks\..\datasets\RUGD\extracted\RUGD_annotations


Converting Images: 100%|██████████| 7436/7436 [01:24<00:00, 88.00it/s] 


### Helper Methods

In [11]:
def split_name(filename):
    parts = re.split(r"\.|/|\\", str(filename))
    return (parts[-2], parts[-1])

def consolidate_files(dataset_dir, new_dir, file_ext):
    if not os.path.exists(new_dir):
        os.mkdir(new_dir)
        files = list(pathlib.Path(dataset_dir).glob(f"**/*.{file_ext}"))
        print(f"{len(files)} files with extension .{file_ext} found")
        for f in tqdm(files):
            file_name = ".".join(split_name(f))
            shutil.move(f, os.path.join(new_dir, file_name))
    else:
        print(f"Directory already exists at path: {new_dir}")

def remove_unpaired_images(image_dir, label_dir):
    if not os.path.exists(image_dir) or not os.path.exists(label_dir):
        return
    
    image_set = set(split_name(image)[0] for image in pathlib.Path(image_dir).glob("*"))
    label_set = set(split_name(label)[0] for label in pathlib.Path(label_dir).glob("*"))
    unpaired_images = image_set.difference(label_set)
    print(f"Deleting {len(unpaired_images)} unpaired images")

    for im in unpaired_images:
        image_path = os.path.join(image_dir, f"{im}.jpg")
        if os.path.exists(image_path):
            os.remove(image_path)

In [12]:
rugd_processed_images_path = os.path.join(dataset_path, "images")
print(rugd_processed_images_path)
rugd_processed_labels_path = os.path.join(dataset_path, "labels")
print(rugd_processed_labels_path)

consolidate_files(rugd_images_extract_path, rugd_processed_images_path, "jpg")
consolidate_files(rugd_labels_extract_path, rugd_processed_labels_path, "png")

d:\Github Repos\SJSU\Offroad-Panoptic-Segmentation\notebooks\..\datasets\RUGD\images
d:\Github Repos\SJSU\Offroad-Panoptic-Segmentation\notebooks\..\datasets\RUGD\labels
7436 files with extension .jpg found


100%|██████████| 7436/7436 [00:05<00:00, 1354.96it/s]


7436 files with extension .png found


100%|██████████| 7436/7436 [00:03<00:00, 2197.87it/s]


In [13]:
remove_unpaired_images(rugd_processed_images_path, rugd_processed_labels_path)

Deleting 0 unpaired images


### For splitting data

In [14]:
def get_data_splits(image_dir, test_size=0.10, val_size=0.05, random_state=123):
    files = [split_name(filename)[0] for filename in pathlib.Path(image_dir).glob("*")]
    actual_test_size = val_size + test_size
    actual_val_size  = val_size / actual_test_size
    train, test = train_test_split(files, test_size=actual_test_size, random_state=random_state)
    test, val   = train_test_split(test,  test_size=actual_val_size,  random_state=random_state)
    print(len(train), len(test), len(val))
    return train, test, val

def make_data_dirs(image_dir, label_dir):
    for dir_name in [image_dir, label_dir]:
        if not os.path.exists(dir_name):
            os.mkdir(dir_name)
        for split_name in ["train", "test", "val"]:
            split_dir = os.path.join(dir_name, split_name)
            if not os.path.exists(split_dir):
                os.mkdir(split_dir)

def move_files(train, test, val, image_dir, label_dir):
    make_data_dirs(image_dir, label_dir)

    for dir_name, data_split in [("train", train), ("test", test), ("val", val)]:
        for filename in tqdm(data_split, desc=dir_name):
            image_name = f"{filename}.jpg"
            label_name = f"{filename}.png"

            shutil.move(
                os.path.join(image_dir, image_name),
                os.path.join(image_dir, dir_name, image_name)
            )
            shutil.move(
                os.path.join(label_dir, label_name),
                os.path.join(label_dir, dir_name, label_name)
            )

In [15]:
train, test, val = get_data_splits(rugd_processed_images_path)

6320 744 372


In [16]:
move_files(train, test, val, rugd_processed_images_path, rugd_processed_labels_path)

train: 100%|██████████| 6320/6320 [00:09<00:00, 673.20it/s] 
test: 100%|██████████| 744/744 [00:00<00:00, 1907.69it/s]
val: 100%|██████████| 372/372 [00:00<00:00, 1646.02it/s]


# Run scripts

## Generate Instance Ids

In [17]:
from createPanopticInstanceIds import main as cpii_main

cpii_main([rugd_processed_labels_path, "rugd", NUM_PROC])

  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 7436/7436 [05:47<00:00, 21.42it/s]

7436 files generated
TOOK 347.6515681743622 SECONDS!





## Generate Panoptic Annotations

In [18]:
from createPanopticAnnotationsParallel import main as cpa_main

processed_labels_train_path = os.path.join(rugd_processed_labels_path, "train")
print(processed_labels_train_path)
cpa_main([processed_labels_train_path, "rugd", NUM_PROC])

d:\Github Repos\SJSU\Offroad-Panoptic-Segmentation\notebooks\..\datasets\RUGD\labels\train


100%|██████████| 6320/6320 [00:24<00:00, 255.42it/s]



Saving the json file d:\Github Repos\SJSU\Offroad-Panoptic-Segmentation\notebooks\..\datasets\RUGD\labels\annotations_train_panoptic.json
TOOK 26.060615301132202 SECONDS!


In [19]:
processed_labels_test_path = os.path.join(rugd_processed_labels_path, "test")
print(processed_labels_test_path)
cpa_main([processed_labels_test_path, "rugd", NUM_PROC])

d:\Github Repos\SJSU\Offroad-Panoptic-Segmentation\notebooks\..\datasets\RUGD\labels\test


100%|██████████| 744/744 [00:03<00:00, 215.40it/s]



Saving the json file d:\Github Repos\SJSU\Offroad-Panoptic-Segmentation\notebooks\..\datasets\RUGD\labels\annotations_test_panoptic.json
TOOK 3.727997303009033 SECONDS!


In [20]:
processed_labels_val_path = os.path.join(rugd_processed_labels_path, "val")
print(processed_labels_val_path)
cpa_main([processed_labels_val_path, "rugd", NUM_PROC])

d:\Github Repos\SJSU\Offroad-Panoptic-Segmentation\notebooks\..\datasets\RUGD\labels\val


100%|██████████| 372/372 [00:02<00:00, 180.85it/s]



Saving the json file d:\Github Repos\SJSU\Offroad-Panoptic-Segmentation\notebooks\..\datasets\RUGD\labels\annotations_val_panoptic.json
TOOK 2.2779805660247803 SECONDS!


## Generate Instance Annotations

In [21]:
from createInstances import main as ci_main

ci_main([processed_labels_train_path, "rugd"])

Generating Panoptic Images: 100%|██████████| 6320/6320 [01:50<00:00, 57.24it/s] 



Saving the json file d:\Github Repos\SJSU\Offroad-Panoptic-Segmentation\notebooks\..\datasets\RUGD\labels\annotations_train_instances.json
TOOK 111.44105291366577 SECONDS!


In [22]:
ci_main([processed_labels_test_path, "rugd"])

Generating Panoptic Images: 100%|██████████| 744/744 [00:13<00:00, 56.29it/s] 



Saving the json file d:\Github Repos\SJSU\Offroad-Panoptic-Segmentation\notebooks\..\datasets\RUGD\labels\annotations_test_instances.json
TOOK 13.359012603759766 SECONDS!


In [23]:
ci_main([processed_labels_val_path, "rugd"])

Generating Panoptic Images: 100%|██████████| 372/372 [00:06<00:00, 57.29it/s]



Saving the json file d:\Github Repos\SJSU\Offroad-Panoptic-Segmentation\notebooks\..\datasets\RUGD\labels\annotations_val_instances.json
TOOK 6.559033393859863 SECONDS!


## Generate Semantic Segmentation Images

In [24]:
# Clone Panoptic API code
!git clone https://github.com/cocodataset/panopticapi.git ../panopticapi

In [25]:
!python ../panopticapi/converters/panoptic2semantic_segmentation.py \
--input_json_file ../datasets/RUGD/labels/annotations_train_panoptic.json \
--segmentations_folder ../datasets/RUGD/labels/train_panoptic/ \
--semantic_seg_folder ../datasets/RUGD/labels/train_semantic \
--categories_json_file ../datasets/RUGD/labels/categories.json

EXTRACTING FROM...
COCO panoptic format:
	Segmentation folder: ../datasets/RUGD/labels/train_panoptic/
	JSON file: ../datasets/RUGD/labels/annotations_train_panoptic.json
SEMANTIC SEGMENTATION
in PNG format:
	Folder with semnatic segmentations: ../datasets/RUGD/labels/train_semantic
Creating folder ../datasets/RUGD/labels/train_semantic for semantic segmentation PNGs


Number of cores: 20, images per core: 316
Core: 0, 0 from 316 images processed
Core: 1, 0 from 316 images processed
Core: 2, 0 from 316 images processed
Core: 3, 0 from 316 images processed
Core: 4, 0 from 316 images processed
Core: 5, 0 from 316 images processed
Core: 6, 0 from 316 images processed
Core: 7, 0 from 316 images processed
Core: 8, 0 from 316 images processed
Core: 9, 0 from 316 images processed
Core: 10, 0 from 316 images processed
Core: 11, 0 from 316 images processed
Core: 13, 0 from 316 images processed
Core: 12, 0 from 316 images processed
Core: 14, 0 from 316 images processed
Core: 15, 0 from 316 image

In [26]:
!python ../panopticapi/converters/panoptic2semantic_segmentation.py \
--input_json_file ../datasets/RUGD/labels/annotations_test_panoptic.json \
--segmentations_folder ../datasets/RUGD/labels/test_panoptic/ \
--semantic_seg_folder ../datasets/RUGD/labels/test_semantic \
--categories_json_file ../datasets/RUGD/labels/categories.json

EXTRACTING FROM...
COCO panoptic format:
	Segmentation folder: ../datasets/RUGD/labels/test_panoptic/
	JSON file: ../datasets/RUGD/labels/annotations_test_panoptic.json
SEMANTIC SEGMENTATION
in PNG format:
	Folder with semnatic segmentations: ../datasets/RUGD/labels/test_semantic
Creating folder ../datasets/RUGD/labels/test_semantic for semantic segmentation PNGs


Number of cores: 20, images per core: 38
Core: 0, 0 from 38 images processed
Core: 1, 0 from 38 images processed
Core: 2, 0 from 38 images processed
Core: 3, 0 from 38 images processed
Core: 4, 0 from 37 images processed
Core: 5, 0 from 37 images processed
Core: 6, 0 from 37 images processed
Core: 7, 0 from 37 images processed
Core: 8, 0 from 37 images processed
Core: 9, 0 from 37 images processed
Core: 10, 0 from 37 images processed
Core: 11, 0 from 37 images processed
Core: 12, 0 from 37 images processed
Core: 13, 0 from 37 images processed
Core: 14, 0 from 37 images processed
Core: 15, 0 from 37 images processed
Core: 16,

In [27]:
!python ../panopticapi/converters/panoptic2semantic_segmentation.py \
--input_json_file ../datasets/RUGD/labels/annotations_val_panoptic.json \
--segmentations_folder ../datasets/RUGD/labels/val_panoptic/ \
--semantic_seg_folder ../datasets/RUGD/labels/val_semantic \
--categories_json_file ../datasets/RUGD/labels/categories.json

EXTRACTING FROM...
COCO panoptic format:
	Segmentation folder: ../datasets/RUGD/labels/val_panoptic/
	JSON file: ../datasets/RUGD/labels/annotations_val_panoptic.json
SEMANTIC SEGMENTATION
in PNG format:
	Folder with semnatic segmentations: ../datasets/RUGD/labels/val_semantic
Creating folder ../datasets/RUGD/labels/val_semantic for semantic segmentation PNGs


Number of cores: 20, images per core: 19
Core: 0, 0 from 19 images processed
Core: 1, 0 from 19 images processed
Core: 2, 0 from 19 images processed
Core: 3, 0 from 19 images processed
Core: 4, 0 from 19 images processed
Core: 5, 0 from 19 images processed
Core: 6, 0 from 19 images processed
Core: 7, 0 from 19 images processed
Core: 8, 0 from 19 images processed
Core: 9, 0 from 19 images processed
Core: 10, 0 from 19 images processed
Core: 11, 0 from 19 images processed
Core: 12, 0 from 18 images processed
Core: 13, 0 from 18 images processed
Core: 14, 0 from 18 images processed
Core: 15, 0 from 18 images processedCore: 16, 0 fr