
# Project 2: Data Preparation

**How to run:** Python 3.10+, `pip install -r requirements.txt`  
**Dataset:** `/data/project/MSA8395/mapillary_traffic_sign_dataset`  
**Outputs:** `results/figures/`


In [None]:

from pathlib import Path
import json, csv
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ARC dataset root
ARC_ROOT = Path("/data/project/MSA8395/mapillary_traffic_sign_dataset")
IMAGES_DIR = ARC_ROOT / "images"
ANN_JSON  = ARC_ROOT / "annotations.json"
ANN_CSV   = ARC_ROOT / "annotations.csv"

SAVE_DIR = Path("../results/figures")
SAVE_DIR.mkdir(parents=True, exist_ok=True)

print("Images dir exists:", IMAGES_DIR.exists())
print("JSON exists:", ANN_JSON.exists(), "CSV exists:", ANN_CSV.exists())


In [None]:

def draw_bbox(img_bgr, bbox_xywh, label=None, color=(0,255,0), thickness=2):
    x, y, w, h = map(int, bbox_xywh)
    cv2.rectangle(img_bgr, (x, y), (x+w, y+h), color, thickness)
    if label is not None:
        cv2.putText(img_bgr, str(label), (x, max(0, y-5)),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1, cv2.LINE_AA)
    return img_bgr

def crop(img_bgr, bbox_xywh, pad=4):
    x, y, w, h = map(int, bbox_xywh)
    H, W = img_bgr.shape[:2]
    x0, y0 = max(0, x-pad), max(0, y-pad)
    x1, y1 = min(W, x+w+pad), min(H, y+h+pad)
    return img_bgr[y0:y1, x0:x1]


In [None]:

def clahe_rgb(img_bgr):
    lab = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    l2 = clahe.apply(l)
    lab2 = cv2.merge([l2,a,b])
    return cv2.cvtColor(lab2, cv2.COLOR_LAB2BGR)

def bilateral(img_bgr):
    return cv2.bilateralFilter(img_bgr, d=9, sigmaColor=75, sigmaSpace=75)
