In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict

In [2]:

# === STEP 1: Update your label paths below ===
train_label_dir = "../traffic_data/train/labels"  # e.g. ../traffic_data/train/labels
val_label_dir = "../traffic_data/valid/labels"    # e.g. ../traffic_data/val/labels

# Class list from the dataset (edit if your YAML uses different order)
class_names = [
'ambulance', 'army vehicle', 'auto rickshaw', 'bicycle', 'bus', 'car', 'garbagevan', 'human hauler', 'minibus', 'minivan', 'motorbike', 'pickup', 'policecar', 'rickshaw', 'scooter', 'suv', 'taxi', 'three wheelers -CNG-', 'truck', 'van', 'wheelbarrow'
]


In [3]:

# === STEP 2: Function to load label data ===
def process_labels(label_dir):
    class_counts = defaultdict(int)
    objects_per_image = []

    for file in os.listdir(label_dir):
        if file.endswith(".txt"):
            with open(os.path.join(label_dir, file), "r") as f:
                lines = f.readlines()
                objects_per_image.append(len(lines))
                for line in lines:
                    class_id = int(line.strip().split()[0])
                    class_counts[class_names[class_id]] += 1
    return class_counts, objects_per_image


In [4]:

# === STEP 3: Process your labels ===
train_counts, train_objs = process_labels(train_label_dir)
val_counts, val_objs = process_labels(val_label_dir)

total_counts = train_counts.copy()
for k, v in val_counts.items():
    total_counts[k] += v


In [5]:
# === STEP 4: Create plots ===
sns.set_theme(style="whitegrid")

# Bar Plot
df_counts = pd.DataFrame(total_counts.items(), columns=["Class", "Count"]).sort_values("Count", ascending=False)
plt.figure(figsize=(12, 6))
sns.barplot(data=df_counts, x="Class", y="Count", palette="magma")
plt.xticks(rotation=45)
plt.title("Class Distribution (Train + Val)")
plt.tight_layout()
plt.savefig("class_distribution.png")
plt.close()

# Pie Chart: Top 5
top5 = df_counts.head(5)
plt.figure(figsize=(6, 6))
plt.pie(top5["Count"], labels=top5["Class"], autopct='%1.1f%%', startangle=140)
plt.title("Top 5 Most Frequent Classes")
plt.tight_layout()
plt.savefig("top5_class_pie.png")
plt.close()

# Histogram: Objects per Image
all_objs = train_objs + val_objs
plt.figure(figsize=(8, 5))
sns.histplot(all_objs, bins=15, kde=True, color="teal")
plt.title("Objects per Image")
plt.xlabel("Number of Objects")
plt.ylabel("Number of Images")
plt.tight_layout()
plt.savefig("objects_per_image.png")
plt.close()



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(data=df_counts, x="Class", y="Count", palette="magma")


In [None]:
y