In [4]:
import pandas as pd
from pathlib import Path

RAW = Path("data/raw/dft_road_safety_last_5_years")

# List all CSVs under RAW (recursively)
all_csvs = list(RAW.rglob("*.csv"))
print(f"Found {len(all_csvs)} CSVs under {RAW}:")
for p in all_csvs[:20]:
    print(" -", p)

# Heuristic: find accident/collision, vehicles, casualties by name
def find_first(cands):
    for name in cands:
        for p in all_csvs:
            if p.name.lower() == name.lower():
                return p
    # fallback: contains substring
    for name in cands:
        for p in all_csvs:
            if name.lower() in p.name.lower():
                return p
    return None

acc_path = find_first(["Collisions.csv", "Accidents.csv"])
veh_path = find_first(["Vehicles.csv"])
cas_path = find_first(["Casualties.csv"])

print("\nDetected paths:")
print(" Collisions/Accidents:", acc_path)
print(" Vehicles:", veh_path)
print(" Casualties:", cas_path)

def safe_head(path):
    df = pd.read_csv(path, engine="pyarrow")
    print(f"\n===== {path.name} =====")
    print("Path:", path)
    print("Shape:", df.shape)
    display(df.head(10))
    return df

dfs = {}
for p in [acc_path, veh_path, cas_path]:
    if p is not None and p.exists():
        dfs[p.name] = safe_head(p)

# If any are still None, print a hint
if not acc_path: print("\n⚠️ Could not find Accidents/Collisions CSV.")
if not veh_path: print("⚠️ Could not find Vehicles CSV.")
if not cas_path: print("⚠️ Could not find Casualties CSV.")


Found 0 CSVs under data\raw\dft_road_safety_last_5_years:

Detected paths:
 Collisions/Accidents: None
 Vehicles: None
 Casualties: None

⚠️ Could not find Accidents/Collisions CSV.
⚠️ Could not find Vehicles CSV.
⚠️ Could not find Casualties CSV.
