In [1]:
import os
import json
import urllib.request
from tqdm.notebook import tqdm

#Set up folders
base_dir = os.path.expanduser("~/intruder_dataset")
os.makedirs(base_dir, exist_ok=True)
img_dir = os.path.join(base_dir, "images")
ann_dir = os.path.join(base_dir, "annotations")
os.makedirs(img_dir, exist_ok=True)
os.makedirs(ann_dir, exist_ok=True)
#Download annotations JSON
ann_url = "http://images.cocodataset.org/annotations/annotations_trainval2017.zip"
ann_zip = os.path.join(ann_dir, "annotations_trainval2017.zip")
if not os.path.exists(ann_zip):
    print("Downloading annotations...")
    urllib.request.urlretrieve(ann_url, ann_zip)
    print("Annotations downloaded.")

#Unzip annotations
import zipfile
with zipfile.ZipFile(ann_zip, 'r') as zip_ref:
    zip_ref.extractall(ann_dir)
#Load the annotation JSON
with open(os.path.join(ann_dir, "annotations/instances_train2017.json")) as f:
    coco = json.load(f)
#Define target categories
target_classes = ["person", "backpack", "handbag", "suitcase", "cell phone", "laptop"]

# Map class names to IDs
category_id_map = {cat["name"]: cat["id"] for cat in coco["categories"]}
target_ids = [category_id_map[name] for name in target_classes if name in category_id_map]
print("Target class IDs:", target_ids)
#Get image IDs that contain those classes
image_ids = set()
for ann in coco["annotations"]:
    if ann["category_id"] in target_ids:
        image_ids.add(ann["image_id"])
#corresponding image metadata
image_map = {img["id"]: img for img in coco["images"]}
target_images = [image_map[i] for i in image_ids]
print(f"Found {len(target_images)} images with target classes.")

#Download images one-by-one
base_img_url = "http://images.cocodataset.org/train2017/"
for img in tqdm(target_images, desc="Downloading images"):
    filename = img["file_name"]
    img_url = base_img_url + filename
    dest_path = os.path.join(img_dir, filename)
    if not os.path.exists(dest_path):
        urllib.request.urlretrieve(img_url, dest_path)
    print( filename, img_url)
    break
    


Downloading annotations...
Annotations downloaded.
Target class IDs: [1, 27, 31, 33, 77, 73]
Found 67910 images with target classes.


Downloading images:   0%|          | 0/67910 [00:00<?, ?it/s]

000000262145.jpg http://images.cocodataset.org/train2017/000000262145.jpg


In [1]:
import os
import json
import urllib.request
from tqdm import tqdm  

# Step 1: Set up folders
base_dir = os.path.expanduser("~/intruder_dataset")
os.makedirs(base_dir, exist_ok=True)
img_dir = os.path.join(base_dir, "images")
ann_dir = os.path.join(base_dir, "annotations")
os.makedirs(img_dir, exist_ok=True)
os.makedirs(ann_dir, exist_ok=True)

# Step 2: Download annotations JSON
ann_url = "http://images.cocodataset.org/annotations/annotations_trainval2017.zip"
ann_zip = os.path.join(ann_dir, "annotations_trainval2017.zip")

if not os.path.exists(ann_zip):
    print("Downloading annotations...")
    urllib.request.urlretrieve(ann_url, ann_zip)
    print("Annotations downloaded.")

# Step 3: Unzip annotations
import zipfile
with zipfile.ZipFile(ann_zip, 'r') as zip_ref:
    zip_ref.extractall(ann_dir)

# Step 4: Load the annotation JSON
with open(os.path.join(ann_dir, "annotations/instances_train2017.json")) as f:
    coco = json.load(f)

# Step 5: Define target categories
target_classes = ["person", "backpack", "handbag", "suitcase", "cell phone", "laptop"]

# Map class names to IDs
category_id_map = {cat["name"]: cat["id"] for cat in coco["categories"]}
target_ids = [category_id_map[name] for name in target_classes if name in category_id_map]

print("Target class IDs:", target_ids)

# Step 6: Get image IDs that contain those classes
image_ids = set()
for ann in coco["annotations"]:
    if ann["category_id"] in target_ids:
        image_ids.add(ann["image_id"])

# Step 7: Get corresponding image metadata
image_map = {img["id"]: img for img in coco["images"]}
target_images = [image_map[i] for i in image_ids]

print(f"Found {len(target_images)} images with target classes.")

# Step 8: Download images one-by-one
base_img_url = "http://images.cocodataset.org/train2017/"
for img in tqdm(target_images, desc="Downloading images"):
    filename = img["file_name"]
    img_url = base_img_url + filename
    dest_path = os.path.join(img_dir, filename)
    if not os.path.exists(dest_path):
        urllib.request.urlretrieve(img_url, dest_path)


🎯 Target class IDs: [1, 27, 31, 33, 77, 73]
Found 67910 images with target classes.


Downloading images: 100%|██████████| 67910/67910 [00:01<00:00, 37288.15it/s]


In [2]:
import os
import json
from collections import defaultdict
from tqdm import tqdm

# Set your base dataset path (ensure this points to where your images and annotations are stored)
base_dir = os.path.expanduser("~/intruder_dataset")
img_dir = os.path.join(base_dir, "images")
ann_dir = os.path.join(base_dir, "annotations")

# Create the output labels directory
label_dir = os.path.join(base_dir, "labels")
os.makedirs(label_dir, exist_ok=True)

# Load the annotation JSON file
with open(os.path.join(ann_dir, "annotations/instances_train2017.json")) as f:
    coco = json.load(f)

# Define target class IDs (this part should be done already, assuming you've filtered for your classes)
target_classes = ["person", "backpack", "handbag", "suitcase", "cell phone", "laptop"]
category_id_map = {cat["name"]: cat["id"] for cat in coco["categories"]}
target_ids = [category_id_map[name] for name in target_classes if name in category_id_map]

# Build mapping from COCO category_id to YOLO class index
target_id_to_index = {cat_id: idx for idx, cat_id in enumerate(target_ids)}

# Create a map from image_id to annotations
ann_map = defaultdict(list)
for ann in coco["annotations"]:
    if ann["category_id"] in target_ids:
        ann_map[ann["image_id"]].append(ann)

# Get the images that we filtered earlier (the ones matching target classes)
image_map = {img["id"]: img for img in coco["images"]}
target_images = [image_map[i] for i in ann_map.keys()]

# Convert each annotation to YOLO format
for img in tqdm(target_images, desc="Converting annotations"):
    img_id = img["id"]
    file_name = os.path.splitext(img["file_name"])[0]
    width, height = img["width"], img["height"]

    yolo_lines = []
    for ann in ann_map[img_id]:
        x, y, w, h = ann["bbox"]
        x_center = (x + w / 2) / width
        y_center = (y + h / 2) / height
        w_norm = w / width
        h_norm = h / height
        class_id = target_id_to_index[ann["category_id"]]
        yolo_lines.append(f"{class_id} {x_center:.6f} {y_center:.6f} {w_norm:.6f} {h_norm:.6f}")

    # Write to .txt file
    label_path = os.path.join(label_dir, file_name + ".txt")
    with open(label_path, "w") as f:
        f.write("\n".join(yolo_lines))

print("COCO annotations successfully converted to YOLO format!")


Converting annotations: 100%|██████████| 67910/67910 [00:10<00:00, 6236.50it/s]

COCO annotations successfully converted to YOLO format!





In [3]:
# Define the YAML content as a string
yaml_content = """
path: ~/intruder_dataset
train: images/train
val: images/val

nc: 6
names: [ "person", "backpack", "handbag", "suitcase", "cell phone", "laptop" ]
"""

# Set the path
yaml_path = os.path.expanduser("~/intruder_dataset/dataset.yaml")

# Write the content to the YAML file
with open(yaml_path, "w") as yaml_file:
    yaml_file.write(yaml_content)

print(f"YAML file created at: {yaml_path}")

YAML file created at: /root/intruder_dataset/dataset.yaml


In [1]:
import os
import random
import shutil

base_dir = os.path.expanduser('~/intruder_dataset')
img_dir = os.path.join(base_dir, 'images')
label_dir = os.path.join(base_dir, 'labels')

# Make train/val folders
for split in ['train', 'val']:
    os.makedirs(os.path.join(img_dir, split), exist_ok=True)
    os.makedirs(os.path.join(label_dir, split), exist_ok=True)

# List all images
images = [f for f in os.listdir(img_dir) if f.endswith('.jpg')]
random.shuffle(images)

split_idx = int(0.8 * len(images))  # 80% for training
train_imgs = images[:split_idx]
val_imgs = images[split_idx:]

# Move files
for img in train_imgs:
    shutil.move(os.path.join(img_dir, img), os.path.join(img_dir, 'train', img))
    shutil.move(os.path.join(label_dir, img.replace('.jpg', '.txt')), os.path.join(label_dir, 'train', img.replace('.jpg', '.txt')))

for img in val_imgs:
    shutil.move(os.path.join(img_dir, img), os.path.join(img_dir, 'val', img))
    shutil.move(os.path.join(label_dir, img.replace('.jpg', '.txt')), os.path.join(label_dir, 'val', img.replace('.jpg', '.txt')))

print("Images and labels split into train/val folders.")

Images and labels split into train/val folders.


In [2]:
import os
import json
from collections import defaultdict

# Setup
base_dir = os.path.expanduser("~/intruder_dataset")
img_dir = os.path.join(base_dir, "images")
ann_dir = os.path.join(base_dir, "annotations")
label_dir = os.path.join(base_dir, "modified_annotation")
os.makedirs(label_dir, exist_ok=True)

# Mapping COCO → YOLO class index (for COCO-trained models)

coco_to_yolo_index = {
    1: 0, 27: 1, 31: 2, 33: 3, 77: 4, 73: 5
}
# Load annotations
print("Loading COCO annotations...")
with open(os.path.join(ann_dir, "annotations/instances_train2017.json")) as f:
    coco = json.load(f)

# Filter annotations and group by image
print("Filtering annotations by target category IDs...")
target_ids = set(coco_to_yolo_index.keys())
ann_map = defaultdict(list)
for ann in coco["annotations"]:
    if ann["category_id"] in target_ids:
        ann_map[ann["image_id"]].append(ann)

print(f"Matched annotations for {len(ann_map)} images.")

# Map image ID to metadata
image_map = {img["id"]: img for img in coco["images"]}
target_images = [image_map[i] for i in ann_map]

# Convert and write to YOLO format
print("Converting to YOLO format and writing .txt files...")
file_count = 0
ann_count = 0

for img in target_images:
    img_id = img["id"]
    width, height = img["width"], img["height"]
    file_stem = os.path.splitext(img["file_name"])[0]

    lines = []
    for ann in ann_map[img_id]:
        class_id = coco_to_yolo_index[ann["category_id"]]
        x, y, w, h = ann["bbox"]
        x_center = (x + w / 2) / width
        y_center = (y + h / 2) / height
        w_norm = w / width
        h_norm = h / height
        lines.append(f"{class_id} {x_center:.6f} {y_center:.6f} {w_norm:.6f} {h_norm:.6f}")
        ann_count += 1

    with open(os.path.join(label_dir, file_stem + ".txt"), "w") as f_out:
        f_out.write("\n".join(lines))
    file_count += 1

print(f"\n Done! {file_count} YOLO annotation files created with {ann_count} total annotations.")
print(f"Saved to: {label_dir}")


Loading COCO annotations...
Filtering annotations by target category IDs...
Matched annotations for 67910 images.
Converting to YOLO format and writing .txt files...

 Done! 67910 YOLO annotation files created with 301135 total annotations.
Saved to: /root/intruder_dataset/modified_annotation


In [6]:
!pwd
!ls -l ~/intruder_dataset/

/Project
total 2216
drwxr-xr-x 3 root root    4096 May  4 16:37 annotations
drwxr-xr-x 2 root root    4096 May  4 16:38 images
drwxr-xr-x 2 root root 2256896 May  4 18:09 modified_annotations


In [3]:
import shutil
import os

src = os.path.expanduser("~/intruder_dataset/modified_annotation")
dst = os.path.join(os.getcwd(), "modified_annotation")

shutil.move(src, dst)
print(f"Moved to Jupyter folder: {dst}")

Moved to Jupyter folder: /Project/modified_annotation


In [3]:
import os

# Path to your YOLO annotations
label_dir = os.path.expanduser("/Project/modified_annotation")  # or your actual folder

max_class_id = -1
class_ids_found = set()

# Loop through all label files
for file in os.listdir(label_dir):
    if file.endswith(".txt"):
        with open(os.path.join(label_dir, file)) as f:
            for line in f:
                parts = line.strip().split()
                if parts:
                    class_id = int(parts[0])
                    class_ids_found.add(class_id)
                    max_class_id = max(max_class_id, class_id)

print("✅ Class IDs found in annotation files:", sorted(class_ids_found))
print("🔢 Maximum class ID:", max_class_id)


✅ Class IDs found in annotation files: [0, 1, 2, 3, 4, 5]
🔢 Maximum class ID: 5


In [6]:
import os
import shutil

# Paths
base_dir = os.path.expanduser('/intruder_dataset')
img_dir = os.path.join(base_dir, 'images')
label_dir = os.path.join(base_dir, 'labels')
src_ann_dir = '/Project/modified_annotation'  # source of your fixed annotations
# 1. Clear existing label folders
for split in ['train', 'val']:
    split_dir = os.path.join(label_dir, split)
    if os.path.exists(split_dir):
        for f in os.listdir(split_dir):
            f_path = os.path.join(split_dir, f)
            if os.path.isfile(f_path):
                os.remove(f_path)
    else:
        os.makedirs(split_dir)

# 2. Copy annotations based on image names
for split in ['train', 'val']:
    img_split_dir = os.path.join(img_dir, split)
    label_split_dir = os.path.join(label_dir, split)

    for fname in os.listdir(img_split_dir):
        if fname.endswith('.jpg'):
            label_file = fname.replace('.jpg', '.txt')
            src_label_path = os.path.join(src_ann_dir, label_file)
            dst_label_path = os.path.join(label_split_dir, label_file)

            if os.path.exists(src_label_path):
                shutil.copy(src_label_path, dst_label_path)

print("✅ Annotations split and copied based on image folders.")


✅ Annotations split and copied based on image folders.
