In [None]:
#imports
import json
import os
from collections import defaultdict
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
#provide path to annotation directory containing coco jsons
annotation_dir = "/shared/vision/dataset/metadata/v8/v3.0onwards_8_cls_70_30split_06_27_15_05/"
train_file = "extended_train_annotations_coco_fmt.json"
val_file = "val_annotations_coco_fmt.json"
test_file = "test_annotations_coco_fmt.json"

annotations = []
images = []
videos = []
def load_annotations(dir, file):
    file_path = os.path.join ( dir, file)
    with open(file_path, "r") as f:
        data = json.load(f)
        la = len(data["annotations"])
        li = len(data["images"])
        lv = 0
        annotations.extend(data["annotations"])
        images.extend(data["images"])
        if "videos" in data:
            videos.extend(data["videos"])
            lv = len(data["videos"])
        print(f"file {file} has {lv} videos {li} images and {la} instances")
        return data

train_data = load_annotations(annotation_dir, train_file)
val_data = load_annotations(annotation_dir, val_file)
test_data = load_annotations(annotation_dir, test_file)

print(f"total number of {len(videos)} videos {len(images)} images {len(annotations)} annotations: ")

In [None]:
from collections import Counter, OrderedDict
vid_lengths = Counter([vid["length"] for vid in test_data["videos"]])
print(OrderedDict(vid_lengths))

In [None]:
from collections import defaultdict
import matplotlib.pyplot as plt

def normalize_key(value):
    if isinstance(value, list):
        parts = [str(v) for v in value]
    else:
        parts = str(value).replace('[', '').replace(']', '').replace("'", "").split('_')
    parts = [p.strip() for p in parts if p.strip()]
    return '_'.join(sorted(parts)) if parts else 'unavailable'

assert len(annotations) > 0, "no annotations found"
attributes = ["class_name", "size_cat", "horizon", "occlusion", "clipping", "primary_terrain", "secondary_terrain", "terrain_modifier", "low_visibility", "annotated_weather", "cloud_coverage", "intruder_lateral_view", "intruder_vertical_view", "image_quality"]

plots_per_fig = 16
rows, cols = 4, 4
plot_count = 0
fig = None
axes = None

for idx, attr in enumerate(attributes):
    print(f"\n\ndistribution of {attr}:")
    dist = defaultdict(int)

    for annot in annotations:
        value = annot["attributes"].get(attr, 'unavailable')
        normalized = normalize_key(value)
        dist[normalized] += 1

    for key, val in dist.items():
        print(f"{key}: {val}, ", end=' ')
    print()  # new line after all values printed

    if plot_count % plots_per_fig == 0:
        if fig:
            plt.tight_layout()
            plt.show()
        fig, axes = plt.subplots(rows, cols, figsize=(20, 20))
        axes = axes.flatten()

    ax = axes[plot_count % plots_per_fig]
    bars = ax.bar(dist.keys(), dist.values(), width=0.9)
    ax.set_title(str(attr))
    ax.set_ylabel("number of instances")
    ax.tick_params(axis='x', rotation=90)
    ax.bar_label(bars)

    plot_count += 1

# Show the last figure
if fig:
    plt.tight_layout()
    plt.show()


In [None]:
from collections import defaultdict
import matplotlib.pyplot as plt

def normalize_key(value):
    if isinstance(value, list):
        parts = [str(v) for v in value]
    else:
        parts = str(value).replace('[', '').replace(']', '').replace("'", "").split('_')
    parts = [p.strip() for p in parts if p.strip()]
    return '_'.join(sorted(parts)) if parts else 'unavailable'

assert len(annotations) > 0, "no annotations found"
attributes = ["class_name", "size_cat", "horizon", "occlusion", "clipping", "annotated_weather", "cloud_coverage", 
              "intruder_lateral_view", "intruder_vertical_view"]

plots_per_fig = 8
rows, cols = 4, 2
plot_count = 0
fig = None
axes = None

for idx, attr in enumerate(attributes):
    print(f"\n\ndistribution of {attr} (with class_name breakdown):")
    
    # For class breakdown: attr_value -> class_name -> count
    dist = defaultdict(lambda: defaultdict(int))

    for annot in annotations:
        attr_val = normalize_key(annot["attributes"].get(attr, 'unavailable'))
        class_val = normalize_key(annot["attributes"].get("class_name", 'unavailable'))
        dist[attr_val][class_val] += 1

    # Print distribution
    for attr_val, class_counts in dist.items():
        print(f"{attr_val}: ", end='')
        for cls, count in class_counts.items():
            print(f"{cls}: {count}, ", end='')
        print()

    # Plotting (skip for class_name itself since it would be redundant)
    if attr != "class_name":
        if plot_count % plots_per_fig == 0:
            if fig:
                plt.tight_layout()
                plt.show()
            fig, axes = plt.subplots(rows, cols, figsize=(20, 20))
            axes = axes.flatten()

        ax = axes[plot_count % plots_per_fig]

        attr_values = list(dist.keys())
        class_names = sorted({cls for counts in dist.values() for cls in counts})
        bar_width = 0.8 / len(class_names)  # width of each bar per class

        for i, cls in enumerate(class_names):
            counts = [dist[val].get(cls, 0) for val in attr_values]
            ax.bar(
                [x + i * bar_width for x in range(len(attr_values))],
                counts,
                bar_width,
                label=cls
            )

        ax.set_title(str(attr))
        ax.set_ylabel("number of instances")
        ax.set_xticks([x + bar_width * len(class_names) / 2 for x in range(len(attr_values))])
        ax.set_xticklabels(attr_values, rotation=90)
        ax.legend(fontsize='small')
        
        plot_count += 1

# Show the last figure
if fig:
    plt.tight_layout()
    plt.show()

In [None]:
import json
import cv2
import os
import random
import matplotlib.pyplot as plt

# Define colors for visualization
random.seed(42)
COLORS = {i: (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) for i in range(16)}

# Load ground truth and prediction data
with open("/home/rgummadi/YOLOV/gt_refined.json", "r") as f:
    gt_data = json.load(f)

with open("/home/rgummadi/YOLOV/refined_pred.json", "r") as f:
    pred_data = json.load(f)

# Mapping of category IDs to class names
category_mapping = {c["id"]: c["name"] for c in gt_data["categories"]}

# Load images into a dictionary for easy access
image_mapping = {img["id"]: img["file_name"] for img in gt_data["images"]}

# Prediction confidence threshold
CONF_THRESH = 0.1

# Function to draw bounding boxes
def draw_boxes(image, annotations, color, label_prefix="GT", is_prediction=False):
    for ann in annotations:
        if is_prediction and ann["score"] < CONF_THRESH:
            continue  # Skip low-confidence predictions

        x, y, w, h = map(int, ann["bbox"])
        category_id = ann["category_id"]
        class_name = category_mapping.get(category_id, "Unknown")
        label = f"{label_prefix}: {class_name}"

        # Add score to prediction labels
        if is_prediction:
            label += f" ({ann['score']:.2f})"

        # Draw rectangle and label
        cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
        cv2.putText(image, label, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

# Create an output directory
os.makedirs("output_visualization", exist_ok=True)

# Process each image
for image_id, image_name in image_mapping.items():
    # Load image
    if not os.path.exists(image_name):
        continue  # Skip if image is missing

    image = cv2.imread(image_name)

    # Get GT annotations
    gt_annotations = [ann for ann in gt_data["annotations"] if ann["image_id"] == image_id]
    
    # Get Prediction annotations (filtered by confidence threshold)
    pred_annotations = [ann for ann in pred_data if ann["image_id"] == image_id and ann["score"] >= CONF_THRESH]

    # Draw GT and prediction boxes
    draw_boxes(image, gt_annotations, (0, 255, 0), "GT")  # Green for GT
    draw_boxes(image, pred_annotations, (0, 0, 255), "Pred", is_prediction=True)  # Red for predictions

    # Save or show the image
    output_path = os.path.join("output_visualization", os.path.basename(image_name))
    cv2.imwrite(output_path, image)

    # Optional: Show image (comment this out if running on a server)
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    plt.axis("off")
    plt.title(image_name)
    plt.show()
