In [1]:
from pathlib import Path

# Insert paths of data directory and json-file with labels

#data_main_folder = Path('C:/Users/erikk/Dataset exjobb/BDD100K')
data_main_folder = Path('/home/erik/dataset/bdd100k')
img_folder = data_main_folder / "images/train_and_val_192by320"
labels_folder = data_main_folder / "labels"
labels_file = labels_folder / "bdd100k_labels_images_train_and_val.json"

# Specify attributes as [[attribute type 1, attribute 1 key], [attribute type 2, [attribute 2 key 1, attribute 2 key 2]], etc]
# 
# Available attributes and keys are:
# weather: clear, partly cloudy, overcast, rainy, snowy, foggy, undefined
# scene: highway, residential, gas stations, parking lot, tunnel, city street, undefined
# timeofday: daytime, dawn/dusk, night

attributes_normal = [["weather", ["clear","partly cloudy", "overcast"]],["scene", "highway"],["timeofday", "daytime"]]
attributes_outlier = [["weather", ["rainy", "snowy", "foggy"]],["scene", "highway"],["timeofday",["daytime","dawn/dusk","night"]]]

# Configuration for both cells below
image_height = 192
image_width = 320
channels = 3

num_train = 5000
num_val = 1000
num_test = 2000
outlier_frac = 0.5

In [2]:
# Choose images from json-file
from loadbdd100k import load_bdd100k_data_attribute_spec

# Test to get output as: normal and outlier data
norm_data, out_data = load_bdd100k_data_attribute_spec(img_folder, attributes_normal, attributes_outlier, labels_file, num_train, num_val, num_test, outlier_frac, image_height, image_width, channels, save_name_lists=True, get_norm_and_out_sets = True, shuffle=False)

  from ._conv import register_converters as _register_converters


Loading json data ...
Loaded json data (20.16s)
Parsing json data...
Parsing complete (0.10s)
NORMAL filename list complete
Parsing json data...
Parsing complete (0.08s)
OUTLIER filename list complete
Checking for overlap between NORMAL and OUTLIER classes...
Checking number of available vs requested images...
Choosing which images to load...
Initializing datasets...
Loading NORMAL image data...
NORMAL image data loaded (24.52s)
Loading OUTLIER image data...
OUTLIER image data loaded (3.26s)


In [3]:
# Choose images from files with filenames 
from loadbdd100k import load_bdd100k_data_filename_list

norm_file = 'clear_or_partly_cloudy_or_overcast_and_highway_and_daytime.txt'
out_file = 'rainy_or_snowy_or_foggy_and_highway_and_daytime_or_dawndusk_or_night.txt'
norm_filenames = [line.rstrip('\n') for line in open(norm_file,'r')]
out_filenames = [line.rstrip('\n') for line in open(out_file,'r')]  
    
# Test to get output as train, val and test sets (with outliers only in test set)    
train_data, val_data, test_data, test_labels = load_bdd100k_data_filename_list(img_folder, norm_filenames, out_filenames, num_train, num_val, num_test, outlier_frac, image_height, image_width, channels, get_norm_and_out_sets = False, shuffle=False)

Checking for overlap between NORMAL and OUTLIER classes...
Checking number of available vs requested images...
Choosing which images to load...
Initializing datasets...
Loading NORMAL image data...
NORMAL image data loaded (19.29s)
Loading OUTLIER image data...
OUTLIER image data loaded (2.74s)
Generated train_data (0.00s)
Generated val_data (0.00s)
Generated test_data (0.12s)
Generated test_labels (0.00s)


In [None]:
# Rescale all images and store on disk
# By default images are 1280by720

from pathlib import Path
import os
from skimage.transform import rescale, resize, downscale_local_mean
from skimage.io import imread, imsave

# Change to your specific source and destination directory
data_main_folder = Path('C:/Users/erikk/Dataset exjobb/BDD100K')
img_folder = data_main_folder / "bdd100k_images/bdd100k/images/100k/train_and_val"
resized_img_folder = data_main_folder / "bdd100k_images/bdd100k/images/100k/train_and_val_192by320"

new_height = 192
new_width = 320


all_images = os.listdir(img_folder)
already_resized = os.listdir(resized_img_folder)
num_to_resize = len(all_images)-len(already_resized) # Make sure no files are copied into destination any other way, otherwise this doesn't work
counter = 0

if num_to_resize > 0:
    print("Found %d images in source that are not also in destination. Resizing...")
    for img_name in all_images:
        if img_name not in already_resized:
            img = imread(img_folder / img_name)
            resized_img = resize(img, (new_height,new_width),anti_aliasing = True)
            imsave(resized_img_folder / img_name, resized_img)
            counter += 1
            if counter % (num_to_resize//100) == 0: # print every 1%
                print("Resized %d of %d images (%d%%)"%(counter,num_to_resize,100*counter/num_to_resize))
    print("Complete!")
else:
    print("All images already resized :)")
    