In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!ls /kaggle/input/stanford-dog-database
!ls /kaggle/input/stanford-dog-database/images | head
!ls /kaggle/input/stanford-dog-database/annotation | head
!ls /kaggle/input/stanford-dog-database/annotation/Annotation/n02085620-Chihuahua | head


In [None]:
# 1Ô∏è‚É£ Copy dataset from read-only input folder to writable working directory
!cp -r /kaggle/input/stanford-dog-database /kaggle/working/dogs

# 2Ô∏è‚É£ Verify the copy
!ls /kaggle/working/dogs
!ls /kaggle/working/dogs/annotation/Annotation | head


In [None]:
from pathlib import Path

ann_root = Path("/kaggle/working/dogs/annotation/Annotation")  # <-- change me
count = 0
for p in ann_root.rglob("*"):
    if p.is_file() and p.suffix == "":
        try:
            head = p.read_bytes()[:128].decode("utf-8", errors="ignore")
            if "<annotation" in head:
                new = p.with_name(p.name + ".xml")
                if not new.exists():
                    p.rename(new)
                    count += 1
        except Exception:
            pass
print("Renamed", count, "files to .xml")


In [None]:
!ls /kaggle/working/dogs/annotation/Annotation/n02085620-Chihuahua


In [None]:
!ls /kaggle/working/dogs/images/Images/n02085620-Chihuahua

In [None]:
import os
from pathlib import Path

img_root = Path("/kaggle/working/dogs/images/Images")
ann_root = Path("/kaggle/working/dogs/annotation/Annotation")

missing_xml, missing_jpg = [], []

# loop over each breed folder
for breed in sorted(os.listdir(img_root)):
    img_dir = img_root / breed
    ann_dir = ann_root / breed
    if not ann_dir.exists():
        print(f"‚ö†Ô∏è No annotation folder for {breed}")
        continue

    # collect basenames (without extensions)
    imgs = {p.stem for p in img_dir.glob("*.jpg")}
    xmls = {p.stem for p in ann_dir.glob("*.xml")}

    # compare
    no_xml = imgs - xmls
    no_img = xmls - imgs

    if no_xml:
        missing_xml.extend([(breed, n) for n in sorted(no_xml)])
    if no_img:
        missing_jpg.extend([(breed, n) for n in sorted(no_img)])

print(f"\n‚úÖ Breeds checked: {len(list(img_root.iterdir()))}")
print(f"üü• Images with no XML: {len(missing_xml)}")
print(f"üü¶ XMLs with no Image: {len(missing_jpg)}")

# Show first few mismatches for inspection
print("\n--- First 10 images missing XML ---")
for b, n in missing_xml[:10]:
    print(f"{b}/{n}.jpg")

print("\n--- First 10 XMLs missing Image ---")
for b, n in missing_jpg[:10]:
    print(f"{b}/{n}.xml")


In [None]:
#EDA
# ============================================================
# Exploratory Data Analysis (EDA) for Stanford Dogs Dataset
# for Faster R-CNN / RetinaNet / YOLOv8 Comparative Study
# ============================================================

import os
import cv2
import xml.etree.ElementTree as ET
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
from pathlib import Path
import random

# ------------------------------------------------------------
# 1Ô∏è‚É£ CONFIGURATION
# ------------------------------------------------------------
DATA_ROOT = "/kaggle/working/dogs"  # Adjust if different
IMG_DIR = Path(DATA_ROOT) / "images" / "Images"
ANN_DIR = Path(DATA_ROOT) / "annotation" / "Annotation"

# Directory sanity check
print(f"Images path: {IMG_DIR.exists()}, Annotations path: {ANN_DIR.exists()}")

# ------------------------------------------------------------
# 2Ô∏è‚É£ BASIC DATASET STATS
# ------------------------------------------------------------
breeds = sorted([d for d in os.listdir(IMG_DIR) if os.path.isdir(IMG_DIR / d)])
print(f"‚úÖ Total Breeds: {len(breeds)}")

breed_counts = {}
for breed in breeds:
    breed_counts[breed] = len(list((IMG_DIR / breed).glob("*.jpg")))

total_images = sum(breed_counts.values())
print(f"‚úÖ Total Images: {total_images}")

# Convert to DataFrame for convenience
breed_df = pd.DataFrame(list(breed_counts.items()), columns=["Breed", "ImageCount"])
breed_df["Breed"] = breed_df["Breed"].str.split("-").str[-1]  # Cleaner labels

# ------------------------------------------------------------
# 3Ô∏è‚É£ CLASS DISTRIBUTION
# ------------------------------------------------------------
plt.figure(figsize=(18,5))
sns.barplot(data=breed_df.sort_values("ImageCount", ascending=False).head(30),
            x="Breed", y="ImageCount", palette="viridis")
plt.xticks(rotation=90)
plt.title("Top 30 Breeds by Image Count")
plt.ylabel("Number of Images")
plt.xlabel("Dog Breed")
plt.tight_layout()
plt.show()

# Class imbalance metric
imbalance_ratio = breed_df["ImageCount"].max() / breed_df["ImageCount"].min()
print(f"‚öñÔ∏è Class imbalance ratio: {imbalance_ratio:.2f}")

# ------------------------------------------------------------
# 4Ô∏è‚É£ IMAGE RESOLUTION DISTRIBUTION
# ------------------------------------------------------------
img_sizes = []
for breed in random.sample(breeds, 10):  # sample 10 breeds for speed
    for img_path in (IMG_DIR / breed).glob("*.jpg"):
        img = cv2.imread(str(img_path))
        if img is not None:
            h, w = img.shape[:2]
            img_sizes.append((w, h))
img_sizes = np.array(img_sizes)

plt.figure(figsize=(6,6))
plt.scatter(img_sizes[:,0], img_sizes[:,1], alpha=0.3, color="teal")
plt.xlabel("Width (px)")
plt.ylabel("Height (px)")
plt.title("Image Resolution Distribution (sample of 10 breeds)")
plt.grid(True)
plt.show()

print(f"Mean width: {img_sizes[:,0].mean():.1f}px, Mean height: {img_sizes[:,1].mean():.1f}px")

# ------------------------------------------------------------
# 5Ô∏è‚É£ BOUNDING BOX GEOMETRY ANALYSIS
# ------------------------------------------------------------
areas, ratios = [], []

for breed in random.sample(breeds, 10):  # sample subset for faster analysis
    for xml_file in (ANN_DIR / breed).glob("*.xml"):
        try:
            root = ET.parse(xml_file).getroot()
            for obj in root.findall("object"):
                bbox = obj.find("bndbox")
                xmin, ymin = int(bbox.find("xmin").text), int(bbox.find("ymin").text)
                xmax, ymax = int(bbox.find("xmax").text), int(bbox.find("ymax").text)
                w, h = xmax - xmin, ymax - ymin
                if w > 0 and h > 0:
                    areas.append(w * h)
                    ratios.append(w / h)
        except:
            continue

# Bounding box area distribution
plt.figure(figsize=(6,4))
sns.histplot(areas, bins=40, color="orange", kde=True)
plt.title("Bounding Box Area Distribution")
plt.xlabel("Area (pixels¬≤)")
plt.ylabel("Count")
plt.show()

# Bounding box aspect ratio
plt.figure(figsize=(6,4))
sns.histplot(ratios, bins=40, color="green", kde=True)
plt.title("Bounding Box Aspect Ratio Distribution (w/h)")
plt.xlabel("Aspect Ratio")
plt.ylabel("Frequency")
plt.show()

print(f"Average box area: {np.mean(areas):.1f}, Average aspect ratio: {np.mean(ratios):.2f}")

# ------------------------------------------------------------
# 6Ô∏è‚É£ SAMPLE VISUALIZATION WITH BOXES
# ------------------------------------------------------------
def show_sample_with_boxes(breed=None):
    if breed is None:
        breed = random.choice(breeds)
    img_files = list((IMG_DIR / breed).glob("*.jpg"))
    img_path = random.choice(img_files)
    xml_path = ANN_DIR / breed / (Path(img_path).stem + ".xml")

    img = cv2.cvtColor(cv2.imread(str(img_path)), cv2.COLOR_BGR2RGB)
    root = ET.parse(xml_path).getroot()
    for obj in root.findall("object"):
        bbox = obj.find("bndbox")
        xmin, ymin = int(bbox.find("xmin").text), int(bbox.find("ymin").text)
        xmax, ymax = int(bbox.find("xmax").text), int(bbox.find("ymax").text)
        cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
        cv2.putText(img, obj.find("name").text, (xmin, ymin-5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1)

    plt.figure(figsize=(6,6))
    plt.imshow(img)
    plt.title(f"Sample: {breed}")
    plt.axis("off")
    plt.show()

# Show random samples
for _ in range(3):
    show_sample_with_boxes()

# ------------------------------------------------------------
# 7Ô∏è‚É£ SAVE METRICS SUMMARY
# ------------------------------------------------------------
summary = {
    "Total Breeds": len(breeds),
    "Total Images": total_images,
    "Imbalance Ratio": round(imbalance_ratio, 2),
    "Mean Image Width": round(img_sizes[:,0].mean(), 1),
    "Mean Image Height": round(img_sizes[:,1].mean(), 1),
    "Mean Box Area": round(np.mean(areas), 1),
    "Mean Aspect Ratio": round(np.mean(ratios), 2)
}

summary_df = pd.DataFrame(list(summary.items()), columns=["Metric", "Value"])
print("\n=== EDA Summary ===")
print(summary_df.to_string(index=False))

# Optionally save summary as CSV for your report
summary_df.to_csv("eda_summary.csv", index=False)
print("\n‚úÖ EDA summary saved as eda_summary.csv")


In [None]:
import os
import cv2
import torch
import xml.etree.ElementTree as ET
from torch.utils.data import Dataset
from torchvision import transforms

class StanfordDogsDataset(Dataset):
    def __init__(self, root_dir, transforms=None):
        """
        Custom PyTorch Dataset for the Stanford Dogs dataset.

        Args:
            root_dir (str): Path to dataset root (contains 'images' and 'annotation' folders)
            transforms (callable, optional): Transformations to apply to images
        """
        self.root_dir = root_dir
        self.img_dir = os.path.join(root_dir, "images", "Images")
        self.ann_dir = os.path.join(root_dir, "annotation", "Annotation")
        self.transforms = transforms

        self.image_paths = []
        self.annotation_paths = []

        # --- Collect all image + XML annotation pairs ---
        for breed_folder in os.listdir(self.img_dir):
            img_folder = os.path.join(self.img_dir, breed_folder)
            ann_folder = os.path.join(self.ann_dir, breed_folder)

            if not os.path.isdir(img_folder) or not os.path.isdir(ann_folder):
                continue

            for img_file in os.listdir(img_folder):
                if img_file.lower().endswith((".jpg", ".jpeg")):
                    base = os.path.splitext(img_file)[0]
                    xml_file = os.path.join(ann_folder, base + ".xml")

                    if os.path.exists(xml_file):
                        self.image_paths.append(os.path.join(img_folder, img_file))
                        self.annotation_paths.append(xml_file)

        # --- Build breed-to-index mapping (based on folder names) ---
        self.classes = sorted(os.listdir(self.img_dir))
        self.class_to_idx = {cls_name: i + 1 for i, cls_name in enumerate(self.classes)}  # +1 for background

        print(f"‚úÖ Dataset initialized: {len(self.image_paths)} image-annotation pairs found across {len(self.classes)} breeds.")

    def __len__(self):
        """Return total number of samples."""
        return len(self.image_paths)

    def __getitem__(self, idx):
        """Load one image and its corresponding annotation."""
        img_path = self.image_paths[idx]
        ann_path = self.annotation_paths[idx]

        # --- Load image ---
        img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)

        # --- Parse XML annotation ---
        root = ET.parse(ann_path).getroot()
        boxes, labels = [], []

        # Handle known naming inconsistencies
        corrections = {"Pekinese": "Pekingese"}

        for obj in root.findall("object"):
            breed = obj.find("name").text.strip()
            breed = corrections.get(breed, breed)

            # Folder name includes both WordNet ID and breed (e.g., n02086079-Pekingese)
            folder_name = os.path.basename(os.path.dirname(img_path))

            # Main label assignment
            if folder_name in self.class_to_idx:
                label = self.class_to_idx[folder_name]
            elif breed in self.class_to_idx:
                label = self.class_to_idx[breed]
            else:
                # Skip if breed not found
                continue

            bbox = obj.find("bndbox")
            xmin = int(bbox.find("xmin").text)
            ymin = int(bbox.find("ymin").text)
            xmax = int(bbox.find("xmax").text)
            ymax = int(bbox.find("ymax").text)

            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(label)

        # Convert to tensors
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        target = {"boxes": boxes, "labels": labels}

        if self.transforms:
            img = self.transforms(img)

        return img, target


In [None]:
transform = transforms.ToTensor()
root_path = "/kaggle/working/dogs"
dataset = StanfordDogsDataset(root_path, transforms=transform)
print("‚úÖ Total samples:", len(dataset))


In [None]:
img, target = dataset[0]
print("Image shape:", img.shape)
print("Boxes:", target["boxes"])
print("Labels:", target["labels"])


In [None]:
from torch.utils.data import random_split, DataLoader

torch.manual_seed(42)
train_size = int(0.8 * len(dataset))
test_size  = len(dataset) - train_size
train_ds, test_ds = random_split(dataset, [train_size, test_size])

train_dl = DataLoader(train_ds, batch_size=2, shuffle=True,
                      collate_fn=lambda x: tuple(zip(*x)))
test_dl  = DataLoader(test_ds, batch_size=1, shuffle=False,
                      collate_fn=lambda x: tuple(zip(*x)))

print(f"Train: {len(train_ds)}  Test: {len(test_ds)}")


In [None]:
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = fasterrcnn_resnet50_fpn(weights="COCO_V1")

num_classes = len(dataset.classes) + 1   # +1 for background
in_feats = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_feats, num_classes)

model.to(device)


In [None]:
optimizer = torch.optim.Adam([p for p in model.parameters()
                              if p.requires_grad], lr=5e-4)

num_epochs = 2  # start small to test

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, (imgs, tgts) in enumerate(train_dl):
        imgs  = [im.to(device) for im in imgs]
        tgts  = [{k:v.to(device) for k,v in t.items()} for t in tgts]

        loss_dict = model(imgs, tgts)
        loss = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 10 == 0:
            print(f"Epoch[{epoch+1}/{num_epochs}] Step[{i}] Loss: {loss.item():.4f}")
    print(f"Epoch[{epoch+1}] Avg Loss: {running_loss/len(train_dl):.4f}")


In [None]:
#Visualize predictions
import matplotlib.pyplot as plt

model.eval()
img, _ = test_ds[0]
with torch.no_grad():
    pred = model([img.to(device)])

plt.imshow(img.permute(1,2,0))
for box, score, label in zip(pred[0]["boxes"], pred[0]["scores"], pred[0]["labels"]):
    if score > 0.5:
        x1,y1,x2,y2 = box
        plt.gca().add_patch(plt.Rectangle((x1,y1), x2-x1, y2-y1,
                                          fill=False, color="lime", lw=2))
        plt.text(x1, y1-5, f"{dataset.classes[label-1]} ({score:.2f})",
                 color="white", fontsize=8,
                 bbox=dict(facecolor="lime", alpha=0.5))
plt.axis("off"); plt.show()


In [None]:
#save
torch.save(model.state_dict(), "/kaggle/working/fasterrcnn_dogs.pth")
print("Model saved.")


In [None]:
#EDA
# ============================================================
# Exploratory Data Analysis (EDA) for Stanford Dogs Dataset
# for Faster R-CNN / RetinaNet / YOLOv8 Comparative Study
# ============================================================

import os
import cv2
import xml.etree.ElementTree as ET
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
from pathlib import Path
import random

# ------------------------------------------------------------
# 1Ô∏è‚É£ CONFIGURATION
# ------------------------------------------------------------
DATA_ROOT = "/kaggle/working/dogs"  # Adjust if different
IMG_DIR = Path(DATA_ROOT) / "images" / "Images"
ANN_DIR = Path(DATA_ROOT) / "annotation" / "Annotation"

# Directory sanity check
print(f"Images path: {IMG_DIR.exists()}, Annotations path: {ANN_DIR.exists()}")

# ------------------------------------------------------------
# 2Ô∏è‚É£ BASIC DATASET STATS
# ------------------------------------------------------------
breeds = sorted([d for d in os.listdir(IMG_DIR) if os.path.isdir(IMG_DIR / d)])
print(f"‚úÖ Total Breeds: {len(breeds)}")

breed_counts = {}
for breed in breeds:
    breed_counts[breed] = len(list((IMG_DIR / breed).glob("*.jpg")))

total_images = sum(breed_counts.values())
print(f"‚úÖ Total Images: {total_images}")

# Convert to DataFrame for convenience
breed_df = pd.DataFrame(list(breed_counts.items()), columns=["Breed", "ImageCount"])
breed_df["Breed"] = breed_df["Breed"].str.split("-").str[-1]  # Cleaner labels

# ------------------------------------------------------------
# 3Ô∏è‚É£ CLASS DISTRIBUTION
# ------------------------------------------------------------
plt.figure(figsize=(18,5))
sns.barplot(data=breed_df.sort_values("ImageCount", ascending=False).head(30),
            x="Breed", y="ImageCount", palette="viridis")
plt.xticks(rotation=90)
plt.title("Top 30 Breeds by Image Count")
plt.ylabel("Number of Images")
plt.xlabel("Dog Breed")
plt.tight_layout()
plt.show()

# Class imbalance metric
imbalance_ratio = breed_df["ImageCount"].max() / breed_df["ImageCount"].min()
print(f"‚öñÔ∏è Class imbalance ratio: {imbalance_ratio:.2f}")

# ------------------------------------------------------------
# 4Ô∏è‚É£ IMAGE RESOLUTION DISTRIBUTION
# ------------------------------------------------------------
img_sizes = []
for breed in random.sample(breeds, 10):  # sample 10 breeds for speed
    for img_path in (IMG_DIR / breed).glob("*.jpg"):
        img = cv2.imread(str(img_path))
        if img is not None:
            h, w = img.shape[:2]
            img_sizes.append((w, h))
img_sizes = np.array(img_sizes)

plt.figure(figsize=(6,6))
plt.scatter(img_sizes[:,0], img_sizes[:,1], alpha=0.3, color="teal")
plt.xlabel("Width (px)")
plt.ylabel("Height (px)")
plt.title("Image Resolution Distribution (sample of 10 breeds)")
plt.grid(True)
plt.show()

print(f"Mean width: {img_sizes[:,0].mean():.1f}px, Mean height: {img_sizes[:,1].mean():.1f}px")

# ------------------------------------------------------------
# 5Ô∏è‚É£ BOUNDING BOX GEOMETRY ANALYSIS
# ------------------------------------------------------------
areas, ratios = [], []

for breed in random.sample(breeds, 10):  # sample subset for faster analysis
    for xml_file in (ANN_DIR / breed).glob("*.xml"):
        try:
            root = ET.parse(xml_file).getroot()
            for obj in root.findall("object"):
                bbox = obj.find("bndbox")
                xmin, ymin = int(bbox.find("xmin").text), int(bbox.find("ymin").text)
                xmax, ymax = int(bbox.find("xmax").text), int(bbox.find("ymax").text)
                w, h = xmax - xmin, ymax - ymin
                if w > 0 and h > 0:
                    areas.append(w * h)
                    ratios.append(w / h)
        except:
            continue

# Bounding box area distribution
plt.figure(figsize=(6,4))
sns.histplot(areas, bins=40, color="orange", kde=True)
plt.title("Bounding Box Area Distribution")
plt.xlabel("Area (pixels¬≤)")
plt.ylabel("Count")
plt.show()

# Bounding box aspect ratio
plt.figure(figsize=(6,4))
sns.histplot(ratios, bins=40, color="green", kde=True)
plt.title("Bounding Box Aspect Ratio Distribution (w/h)")
plt.xlabel("Aspect Ratio")
plt.ylabel("Frequency")
plt.show()

print(f"Average box area: {np.mean(areas):.1f}, Average aspect ratio: {np.mean(ratios):.2f}")

# ------------------------------------------------------------
# 6Ô∏è‚É£ SAMPLE VISUALIZATION WITH BOXES
# ------------------------------------------------------------
def show_sample_with_boxes(breed=None):
    if breed is None:
        breed = random.choice(breeds)
    img_files = list((IMG_DIR / breed).glob("*.jpg"))
    img_path = random.choice(img_files)
    xml_path = ANN_DIR / breed / (Path(img_path).stem + ".xml")

    img = cv2.cvtColor(cv2.imread(str(img_path)), cv2.COLOR_BGR2RGB)
    root = ET.parse(xml_path).getroot()
    for obj in root.findall("object"):
        bbox = obj.find("bndbox")
        xmin, ymin = int(bbox.find("xmin").text), int(bbox.find("ymin").text)
        xmax, ymax = int(bbox.find("xmax").text), int(bbox.find("ymax").text)
        cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
        cv2.putText(img, obj.find("name").text, (xmin, ymin-5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 1)

    plt.figure(figsize=(6,6))
    plt.imshow(img)
    plt.title(f"Sample: {breed}")
    plt.axis("off")
    plt.show()

# Show random samples
for _ in range(3):
    show_sample_with_boxes()

# ------------------------------------------------------------
# 7Ô∏è‚É£ SAVE METRICS SUMMARY
# ------------------------------------------------------------
summary = {
    "Total Breeds": len(breeds),
    "Total Images": total_images,
    "Imbalance Ratio": round(imbalance_ratio, 2),
    "Mean Image Width": round(img_sizes[:,0].mean(), 1),
    "Mean Image Height": round(img_sizes[:,1].mean(), 1),
    "Mean Box Area": round(np.mean(areas), 1),
    "Mean Aspect Ratio": round(np.mean(ratios), 2)
}

summary_df = pd.DataFrame(list(summary.items()), columns=["Metric", "Value"])
print("\n=== EDA Summary ===")
print(summary_df.to_string(index=False))

# Optionally save summary as CSV for your report
summary_df.to_csv("eda_summary.csv", index=False)
print("\n‚úÖ EDA summary saved as eda_summary.csv")
