In [38]:
import pandas as pd
import os
import shutil
import re

# GTSRB - German Traffic Sign Recognition Benchmark
Link: `https://www.kaggle.com/datasets/meowmeowmeowmeowmeow/gtsrb-german-traffic-sign/code`  

The German Traffic Sign Benchmark is a multi-class, single-image classification challenge held at the International Joint Conference on Neural Networks (IJCNN) 2011. We cordially invite researchers from relevant fields to participate: The competition is designed to allow for participation without special domain knowledge. Our benchmark has the following properties:

### Grouping the test data into seperate folders

In [25]:
df = pd.read_csv('/Users/bristi/Downloads/archive (1)/Test.csv')
df.head()

Unnamed: 0,Width,Height,Roi.X1,Roi.Y1,Roi.X2,Roi.Y2,ClassId,Path
0,53,54,6,5,48,49,16,Test/00000.png
1,42,45,5,5,36,40,1,Test/00001.png
2,48,52,6,6,43,47,38,Test/00002.png
3,27,29,5,5,22,24,33,Test/00003.png
4,60,57,5,5,55,52,11,Test/00004.png


In [26]:
# Define class-to-folder mapping
class_to_folder = {
    0: "speedlimit20",
    3: "speedlimit60",
    7: "speedlimit100",
    9: "noovertaking",
    14: "stop",
    17: "noentry",
    33: "turnright",
    34: "turnleft",
    35: "oneway",
    38: "exitright",
    39: "exitleft",
    27: "pedestrian",
    26: "trafficsignalahead",
    13: "giveway",
    40: "roundabout"
}

In [27]:
# Initialising new dataset DataFrame
df_new_dataset = pd.DataFrame(columns=['class_id', 'image_path'])

# Initialising counters per class
counters = {class_id: 1 for class_id in class_to_folder}

In [None]:
# Base paths
src_base = '/Users/bristi/Downloads/archive (1)/'
dest_base = '/Users/bristi/Desktop/Projects/Split Federated Learning/Traffic Data'

In [None]:
# Process each row
for _, row in df.iterrows():
    class_id = row['ClassId']
    image_path = os.path.join(src_base, row['Path'])

    if class_id in class_to_folder:
        folder_name = class_to_folder[class_id]
        folder_path = os.path.join(dest_base, folder_name)
        os.makedirs(folder_path, exist_ok=True)

        extension = os.path.splitext(image_path)[1]
        filename = f"{counters[class_id]}{extension}"
        dest_path = os.path.join(folder_path, filename)
        counters[class_id] += 1  # Increment counter after use

        try:
            shutil.move(image_path, dest_path)
            print(f"Moved: {image_path} -> {dest_path}")
            # Append to new dataset
            df_new_dataset = pd.concat([
                df_new_dataset,
                pd.DataFrame({'class_id': [folder_name], 'image_path': [dest_path]})
            ], ignore_index=True)
        except Exception as e:
            print(f"Error moving {image_path}: {e}")


### Grouping the train data into seperate folders

In [37]:
# Base paths
src_base = '/Users/bristi/Downloads/archive (1)/'
dest_base = '/Users/bristi/Desktop/Projects/Split Federated Learning/Traffic Data'

In [39]:
# Function to get next counter value from existing files
def get_starting_counter(folder_path):
    if not os.path.exists(folder_path):
        return 1
    existing_files = os.listdir(folder_path)
    numbers = []
    for file in existing_files:
        match = re.match(r'(\d+)\.\w+', file)
        if match:
            numbers.append(int(match.group(1)))
    return max(numbers, default=0) + 1

In [49]:
# Initialise counters based on existing files
counters = {}
for class_id, folder_name in class_to_folder.items():
    folder_path = os.path.join(dest_base, folder_name)
    counters[class_id] = get_starting_counter(folder_path)

In [52]:
df = pd.read_csv('/Users/bristi/Downloads/archive (1)/Train.csv')
df.head()

Unnamed: 0,Width,Height,Roi.X1,Roi.Y1,Roi.X2,Roi.Y2,ClassId,Path
0,27,26,5,5,22,20,20,Train/20/00020_00000_00000.png
1,28,27,5,6,23,22,20,Train/20/00020_00000_00001.png
2,29,26,6,5,24,21,20,Train/20/00020_00000_00002.png
3,28,27,5,6,23,22,20,Train/20/00020_00000_00003.png
4,28,26,5,5,23,21,20,Train/20/00020_00000_00004.png


In [54]:
# Process each row
for _, row in df.iterrows():
    class_id = row['ClassId']
    image_path = os.path.join(src_base, row['Path'])

    if class_id in class_to_folder:
        folder_name = class_to_folder[class_id]
        folder_path = os.path.join(dest_base, folder_name)
        os.makedirs(folder_path, exist_ok=True)

        extension = os.path.splitext(image_path)[1]
        filename = f"{counters[class_id]}{extension}"
        dest_path = os.path.join(folder_path, filename)
        counters[class_id] += 1  # Increment counter after use

        try:
            shutil.move(image_path, dest_path)
            print(f"Moved: {image_path} -> {dest_path}")
            # Append to new dataset
            df_new_dataset = pd.concat([
                df_new_dataset,
                pd.DataFrame({'class_id': [folder_name], 'image_path': [dest_path]})
            ], ignore_index=True)
        except Exception as e:
            print(f"Error moving {image_path}: {e}")


Moved: /Users/bristi/Downloads/archive (1)/Train/0/00000_00000_00000.png -> /Users/bristi/Desktop/Projects/Split Federated Learning/Traffic Data/speedlimit20/38.png
Moved: /Users/bristi/Downloads/archive (1)/Train/0/00000_00000_00001.png -> /Users/bristi/Desktop/Projects/Split Federated Learning/Traffic Data/speedlimit20/39.png
Moved: /Users/bristi/Downloads/archive (1)/Train/0/00000_00000_00002.png -> /Users/bristi/Desktop/Projects/Split Federated Learning/Traffic Data/speedlimit20/40.png
Moved: /Users/bristi/Downloads/archive (1)/Train/0/00000_00000_00003.png -> /Users/bristi/Desktop/Projects/Split Federated Learning/Traffic Data/speedlimit20/41.png
Moved: /Users/bristi/Downloads/archive (1)/Train/0/00000_00000_00004.png -> /Users/bristi/Desktop/Projects/Split Federated Learning/Traffic Data/speedlimit20/42.png
Moved: /Users/bristi/Downloads/archive (1)/Train/0/00000_00000_00005.png -> /Users/bristi/Desktop/Projects/Split Federated Learning/Traffic Data/speedlimit20/43.png
Moved: /Us

In [55]:
df_new_dataset.to_csv('traffic_dataset.csv', index=False)

In [58]:
df_new_dataset['class_id'].value_counts()

giveway               2880
exitright             2760
noovertaking          1950
speedlimit100         1890
speedlimit60          1860
oneway                1590
noentry               1470
stop                  1050
turnright              899
trafficsignalahead     780
turnleft               540
roundabout             450
exitleft               390
pedestrian             300
speedlimit20           270
Name: class_id, dtype: int64

# Self-Driving Cars Computer Vision Project
Link: `https://universe.roboflow.com/selfdriving-car-qtywx/self-driving-cars-lfjou/dataset/6`  
Acquiring speed limit 20, speed limit 60, speed limit 100, stop sign

In [59]:
#!/usr/bin/env python3
"""
Move YOLO images into class-named folders, based on the first
number in each label file.

Folder layout expected ───────────────────────────────
dataset_root/
└─ train/
   ├─ images/
   └─ labels/
└─ valid/
   ├─ images/
   └─ labels/
└─ test/
   ├─ images/
   └─ labels/

Result (example) ─────────────────────────────────────
output_root/
└─ Green Light/
   ├─ 1.jpg
   ├─ 2.jpg
   └─ …
└─ Red Light/
   ├─ 1.jpg
   └─ …
…
"""

import os
import shutil
from pathlib import Path
from collections import defaultdict
import re

# ─────────────── CONFIGURE HERE ─────────────────────
DATASET_ROOT = Path('/Users/bristi/Downloads/archive (2)/car')        # ← change me
OUTPUT_ROOT  = Path('/Users/bristi/Desktop/Testing')    # ← change me
MOVE_LABELS  = False                           # also move the .txt files?
SPLITS       = ["train", "valid", "test"]

CLASS_NAMES = [
    'Green Light', 'Red Light', 'Speed Limit 10', 'Speed Limit 100',
    'Speed Limit 110', 'Speed Limit 120', 'Speed Limit 20', 'Speed Limit 30',
    'Speed Limit 40', 'Speed Limit 50', 'Speed Limit 60', 'Speed Limit 70',
    'Speed Limit 80', 'Speed Limit 90', 'Stop'
]
# ────────────────────────────────────────────────────

# Map class index → folder Path (spaces kept; Unix & Windows both OK)
idx_to_folder = {i: OUTPUT_ROOT / name for i, name in enumerate(CLASS_NAMES)}
for folder in idx_to_folder.values():
    folder.mkdir(parents=True, exist_ok=True)

# Track the current highest counter in each folder
counters = defaultdict(int)
counter_pattern = re.compile(r"^(\d+)\.[^.]+$")  # e.g. "42.jpg" → 42

for class_idx, folder in idx_to_folder.items():
    # warm-start counters by scanning existing files
    for f in folder.iterdir():
        if f.is_file():
            m = counter_pattern.match(f.name)
            if m:
                counters[class_idx] = max(counters[class_idx],
                                          int(m.group(1)))

def next_filename(class_idx: int, ext: str) -> str:
    """Return the next integer filename (1,2,3…) for this class."""
    counters[class_idx] += 1
    return f"{counters[class_idx]}{ext}"

for split in SPLITS:
    labels_dir = DATASET_ROOT / split / "labels"
    images_dir = DATASET_ROOT / split / "images"

    for label_path in labels_dir.glob("*.txt"):
        # Read the first token (class index) from label
        with label_path.open() as f:
            first_line = f.readline().strip()
        if not first_line:
            print(f"WARNING: empty label file {label_path}")
            continue

        class_idx_str, *_ = first_line.split()
        try:
            class_idx = int(class_idx_str)
            dest_folder = idx_to_folder[class_idx]
        except (ValueError, KeyError):
            print(f"WARNING: unknown class index '{class_idx_str}' "
                  f"in {label_path}")
            continue

        # Source image path (same stem, extension .jpg or .png etc.)
        src_img = images_dir / (label_path.stem + ".jpg")
        if not src_img.exists():
            print(f"WARNING: image {src_img} not found")
            continue

        # Build destination path
        dest_name = next_filename(class_idx, src_img.suffix)
        dest_img  = dest_folder / dest_name

        shutil.move(src_img, dest_img)
        print(f"Moved {src_img}  →  {dest_img}")

        if MOVE_LABELS:
            dest_lbl = dest_folder / (dest_img.stem + ".txt")
            shutil.move(label_path, dest_lbl)

print("\nDone. Images organised by class in:", OUTPUT_ROOT)


Moved /Users/bristi/Downloads/archive (2)/car/train/images/FisheyeCamera_1_00979_png.rf.e39379024f10ed37daf631a9a0dea599.jpg  →  /Users/bristi/Desktop/Testing/Red Light/1.jpg
Moved /Users/bristi/Downloads/archive (2)/car/train/images/00004_00044_00020_png.rf.0fc993868d47688fd1f3fe01cb383aa7.jpg  →  /Users/bristi/Desktop/Testing/Speed Limit 70/1.jpg
Moved /Users/bristi/Downloads/archive (2)/car/train/images/001810_jpg.rf.54a0c297f7db7b282c91020c3696e216.jpg  →  /Users/bristi/Desktop/Testing/Red Light/2.jpg
Moved /Users/bristi/Downloads/archive (2)/car/train/images/road780_png.rf.f0abfb35666fcd80e31b014cf65940dc.jpg  →  /Users/bristi/Desktop/Testing/Speed Limit 40/1.jpg
Moved /Users/bristi/Downloads/archive (2)/car/train/images/001054_jpg.rf.3476e14f01897dae40344efc8623bc49.jpg  →  /Users/bristi/Desktop/Testing/Speed Limit 100/1.jpg
Moved /Users/bristi/Downloads/archive (2)/car/train/images/00014_00007_00000_png.rf.20e7f27f987413f912a488b8c52e4b75.jpg  →  /Users/bristi/Desktop/Testing/St

In [None]:
# Define class-to-folder mapping
class_to_folder = {
    0: "speedlimit20",
    3: "speedlimit60",
    7: "speedlimit100",
    9: "noovertaking",
    14: "stop",
    17: "noentry",
    33: "turnright",
    34: "turnleft",
    35: "oneway",
    38: "exitright",
    39: "exitleft",
    27: "pedestrian",
    26: "trafficsignalahead",
    13: "giveway",
    40: "roundabout"
}

In [None]:
import os
import shutil

# Source and destination folders
source_folder = '/Users/bristi/Desktop/Testing/stop'
destination_folder = '/Users/bristi/Desktop/Projects/Split Federated Learning/Traffic Data/stop'

# Ensure the destination folder exists
os.makedirs(destination_folder, exist_ok=True)

# Move all files from source to destination
for file_name in os.listdir(source_folder):
    source_path = os.path.join(source_folder, file_name)
    new_filename = f"{counters[14]}{extension}"
    destination_path = os.path.join(destination_folder, new_filename)
    counters[14]+=1
    
    if os.path.isfile(source_path):  # Check if it's a file
        shutil.move(source_path, destination_path)
        print(f"Moved: {source_path} -> {destination_path}")

Moved: /Users/bristi/Desktop/Testing/stop/63.jpg -> /Users/bristi/Desktop/Projects/Split Federated Learning/Traffic Data/stop/1051.png
Moved: /Users/bristi/Desktop/Testing/stop/189.jpg -> /Users/bristi/Desktop/Projects/Split Federated Learning/Traffic Data/stop/1052.png
Moved: /Users/bristi/Desktop/Testing/stop/77.jpg -> /Users/bristi/Desktop/Projects/Split Federated Learning/Traffic Data/stop/1053.png
Moved: /Users/bristi/Desktop/Testing/stop/162.jpg -> /Users/bristi/Desktop/Projects/Split Federated Learning/Traffic Data/stop/1054.png
Moved: /Users/bristi/Desktop/Testing/stop/176.jpg -> /Users/bristi/Desktop/Projects/Split Federated Learning/Traffic Data/stop/1055.png
Moved: /Users/bristi/Desktop/Testing/stop/88.jpg -> /Users/bristi/Desktop/Projects/Split Federated Learning/Traffic Data/stop/1056.png
Moved: /Users/bristi/Desktop/Testing/stop/348.jpg -> /Users/bristi/Desktop/Projects/Split Federated Learning/Traffic Data/stop/1057.png
Moved: /Users/bristi/Desktop/Testing/stop/360.jpg -

In [65]:
counters

{0: 514,
 3: 2264,
 7: 2206,
 9: 1951,
 14: 1456,
 17: 1471,
 33: 900,
 34: 541,
 35: 1591,
 38: 2761,
 39: 391,
 27: 301,
 26: 781,
 13: 2881,
 40: 451}

# Traffic Sign Dataset
Link: `https://www.kaggle.com/datasets/ahemateja19bec1025/traffic-sign-dataset-classification/data?select=traffic_Data`  
Acquiring Turn left, pedestrian, one way, speed limit 60


# Traffic Signs Classification
Link: `https://www.kaggle.com/datasets/flo2607/traffic-signs-classification`  
Acquiring Turn left, Exit left, speed limit 20, speed limit 60, speed limit 100, stop, no entry, give way, no entry, traffic signal

# Processing

In [7]:
import os
import pandas as pd

# Mapping from class_id to folder name
class_to_folder = {
    0: "speedlimit20",
    3: "speedlimit60",
    7: "speedlimit100",
    9: "noovertaking",
    14: "stop",
    17: "noentry",
    33: "turnright",
    34: "turnleft",
    35: "oneway",
    38: "exitright",
    39: "exitleft",
    27: "pedestrian",
    26: "trafficsignalahead",
    13: "giveway",
    40: "roundabout"
}

# Reverse mapping: folder name → class_id
folder_to_class = {v: k for k, v in class_to_folder.items()}

# Base path
base_path = '/Users/bristi/Desktop/Projects/Split Federated Learning/Traffic Data'

# List to build rows
rows = []

# Walk through each folder
for folder_name in os.listdir(base_path):
    folder_path = os.path.join(base_path, folder_name)
    if not os.path.isdir(folder_path):
        continue

    class_id = folder_to_class.get(folder_name)
    if class_id is None:
        print(f"Skipping unrecognised folder: {folder_name}")
        continue

    for file in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file)
        if os.path.isfile(file_path):
            rows.append({
                'class': folder_name,
                'image_path': file_path
            })

# Create DataFrame
df_from_folders = pd.DataFrame(rows)

# Preview
print(df_from_folders.head())

          class                                         image_path
0  speedlimit60  /Users/bristi/Desktop/Projects/Split Federated...
1  speedlimit60  /Users/bristi/Desktop/Projects/Split Federated...
2  speedlimit60  /Users/bristi/Desktop/Projects/Split Federated...
3  speedlimit60  /Users/bristi/Desktop/Projects/Split Federated...
4  speedlimit60  /Users/bristi/Desktop/Projects/Split Federated...


In [8]:
df_from_folders['class'].value_counts()

exitright             6127
giveway               4889
turnright             4538
noentry               3681
speedlimit60          3056
speedlimit100         2864
oneway                2701
stop                  2599
trafficsignalahead    1629
turnleft              1384
roundabout            1211
speedlimit20          1099
pedestrian            1065
noovertaking           769
exitleft               303
Name: class, dtype: int64

In [4]:
df_from_folders = df_from_folders[~df_from_folders['class'].isin(['noovertaking', 'exitleft'])]

In [5]:
df_from_folders['class'].value_counts()
df_from_folders.to_csv('final_dataset_from_folders.csv', index=False)