In [42]:
import os
import shutil
from collections import defaultdict

In [61]:
train_dataset_path = 'data/images/train'
train_images = sorted(os.listdir(train_dataset_path))
train_images_paths = [os.path.join(train_dataset_path, image) for image in train_images if image.endswith('.jpg')]

val_dataset_path = 'data/images/val'
val_images = sorted(os.listdir(val_dataset_path))
val_images_paths = [os.path.join(val_dataset_path, image) for image in val_images if image.endswith('.jpg')]

test_dataset_path = 'data/images/test'
test_images = sorted(os.listdir(test_dataset_path))
test_images_paths = [os.path.join(test_dataset_path, image) for image in test_images if image.endswith('.jpg')]

filtered_train_dataset_path = 'filtered-data/images/train'
filtered_train_images = sorted(os.listdir(filtered_train_dataset_path))
filtered_train_images_paths = [os.path.join(filtered_train_dataset_path, image) for image in filtered_train_images if image.endswith('.jpg')]

filtered_val_dataset_path = 'filtered-data/images/val'
filtered_val_images = sorted(os.listdir(filtered_val_dataset_path))
filtered_val_images_paths = [os.path.join(filtered_val_dataset_path, image) for image in filtered_val_images if image.endswith('.jpg')]

filtered_test_dataset_path = 'filtered-data/images/test'
filtered_test_images = sorted(os.listdir(filtered_test_dataset_path))
filtered_test_images_paths = [os.path.join(filtered_test_dataset_path, image) for image in filtered_test_images if image.endswith('.jpg')]

In [44]:
def count_classes(image_paths):
    class_names = {
        0: 'animal',
        1: 'bike',
        2: 'bird',
        3: 'bus',
        4: 'car',
        5: 'dog',
        6: 'face',
        7: 'hydrant',
        8: 'license plate',
        9: 'light',
        10: 'motor',
        11: 'other vehicle',
        12: 'person',
        13: 'rider',
        14: 'scooter',
        15: 'sign',
        16: 'skateboard',
        17: 'stroller',
        18: 'train',
        19: 'truck'
    }
      
    class_counts = defaultdict(int)
    for image_path in image_paths:
      label_path = image_path.replace("images", "labels").replace(".jpg", ".txt")
      with open(label_path, 'r') as file:
        for line in file:
            parts = line.strip().split()
            class_id = int(parts[0])
            #if class_id in [0,2,5,14,16,17,18]:
              #print(label_path)
            class_name = class_names.get(class_id, "Unknown class")
            class_counts[class_id] += 1
          
    sorted_class_counts = sorted((class_names[class_id], count) for class_id, count in class_counts.items())
    return sorted_class_counts

In [54]:
def generate_filtered_data(image_paths):
    filter = [0,2,5,14,16,17,18]
    for image_path in image_paths:
      label_path = image_path.replace("images", "labels").replace(".jpg", ".txt")
      new_image_path = image_path.replace("data", "filtered-data")
      new_label_path = label_path.replace("data", "filtered-data")
      
      new_lines = []
      with open(label_path, 'r') as file:
          for line in file:
              parts = line.strip().split()
              class_id = int(parts[0])
              if class_id not in filter:
                  new_lines.append(line)
      
      with open(new_label_path, 'w') as file:
          file.writelines(new_lines)
      
      shutil.copyfile(image_path, new_image_path)

In [62]:
train_labels_count = count_classes(train_images_paths)
print("Classes in train dataset: ", train_labels_count)

val_labels_count = count_classes(val_images_paths)
print("Classes in val dataset: ", val_labels_count)

test_labels_count = count_classes(test_images_paths)
print("Classes in test dataset: ", test_labels_count)

Classes in train dataset:  [('animal', 8), ('bike', 7237), ('bird', 1), ('bus', 2245), ('car', 73623), ('dog', 4), ('face', 752), ('hydrant', 1095), ('license plate', 270), ('light', 16198), ('motor', 1116), ('other vehicle', 1373), ('person', 44527), ('rider', 5951), ('scooter', 15), ('sign', 20770), ('skateboard', 29), ('stroller', 15), ('train', 5), ('truck', 829)]
Classes in val dataset:  [('bike', 170), ('bus', 179), ('car', 7133), ('face', 73), ('hydrant', 94), ('license plate', 17), ('light', 2005), ('motor', 55), ('other vehicle', 63), ('person', 4309), ('rider', 161), ('sign', 2472), ('skateboard', 3), ('stroller', 6), ('truck', 46)]
Classes in test dataset:  [('bike', 113), ('car', 30517), ('dog', 25), ('face', 142), ('hydrant', 277), ('light', 6758), ('motor', 3314), ('other vehicle', 696), ('person', 11242), ('rider', 1081), ('sign', 5660), ('truck', 2634)]


In [58]:
#generate_filtered_data(train_images_paths)
#generate_filtered_data(val_images_paths)
#generate_filtered_data(test_images_paths)

In [63]:
filtered_train_labels_count = count_classes(filtered_train_images_paths)
print("Classes in train filtered dataset: ", filtered_train_labels_count)

filtered_val_labels_count = count_classes(filtered_val_images_paths)
print("Classes in val filtered dataset: ", filtered_val_labels_count)

filtered_test_labels_count = count_classes(filtered_test_images_paths)
print("Classes in test filtered dataset: ", filtered_test_labels_count)

Classes in train filtered dataset:  [('bike', 7237), ('bus', 2245), ('car', 73623), ('face', 752), ('hydrant', 1095), ('license plate', 270), ('light', 16198), ('motor', 1116), ('other vehicle', 1373), ('person', 44527), ('rider', 5951), ('sign', 20770), ('truck', 829)]
Classes in val filtered dataset:  [('bike', 170), ('bus', 179), ('car', 7133), ('face', 73), ('hydrant', 94), ('license plate', 17), ('light', 2005), ('motor', 55), ('other vehicle', 63), ('person', 4309), ('rider', 161), ('sign', 2472), ('truck', 46)]
Classes in test filtered dataset:  [('bike', 113), ('car', 30517), ('face', 142), ('hydrant', 277), ('light', 6758), ('motor', 3314), ('other vehicle', 696), ('person', 11242), ('rider', 1081), ('sign', 5660), ('truck', 2634)]
