In [1]:
# Dependencies
import numpy as np
import matplotlib.pyplot as plt
import os
import requests
import zipfile
from pycocotools.coco import COCO
import os
import shutil

In [7]:
# URLs for downloading the datasets
train_url = "http://images.cocodataset.org/zips/train2017.zip"
val_url = "http://images.cocodataset.org/zips/val2017.zip"
test_url = "http://images.cocodataset.org/zips/test2017.zip"  # Added URL for test dataset
annotations_url = "http://images.cocodataset.org/annotations/annotations_trainval2017.zip"

In [8]:
# File names for saving the datasets
train_filename = "coco_train2017.zip"
val_filename = "coco_val2017.zip"
test_filename = "coco_test2017.zip"  # Added filename for test dataset
annotations_filename = "coco_ann2017.zip"

In [9]:
# Directories to store the extracted datasets
train_dir = "train2017"
val_dir = "val2017"
test_dir = "test2017"  # Added directory for test dataset
annotations_dir = "annotations"

In [10]:
# Create directories to store the extracted datasets
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)  # Added creation of test directory
os.makedirs(annotations_dir, exist_ok=True)

In [11]:
# Function to download and extract datasets
def download_and_extract(url, filename, extract_dir):
    print(f"Downloading {filename}...")
    with requests.get(url, stream=True) as response:
        with open(filename, "wb") as f:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)
    print(f"{filename} downloaded.")
    
    print(f"Extracting {filename}...")
    with zipfile.ZipFile(filename, 'r') as zip_ref:
        zip_ref.extractall(extract_dir)
    print(f"{filename} extracted.")

In [12]:
# Download and extract train dataset
download_and_extract(train_url, train_filename, train_dir)

# Download and extract validation dataset
download_and_extract(val_url, val_filename, val_dir)

# Download and extract test dataset
download_and_extract(test_url, test_filename, test_dir)  # Added download and extraction of test dataset

# Download and extract annotations
download_and_extract(annotations_url, annotations_filename, annotations_dir)


Downloading coco_train2017.zip...
coco_train2017.zip downloaded.
Extracting coco_train2017.zip...
coco_train2017.zip extracted.
Downloading coco_val2017.zip...
coco_val2017.zip downloaded.
Extracting coco_val2017.zip...
coco_val2017.zip extracted.
Downloading coco_test2017.zip...
coco_test2017.zip downloaded.
Extracting coco_test2017.zip...
coco_test2017.zip extracted.
Downloading coco_ann2017.zip...
coco_ann2017.zip downloaded.
Extracting coco_ann2017.zip...
coco_ann2017.zip extracted.


In [14]:
data_dir = './coco_datasets/'  # Assuming 'coco_datasets' is in the current directory
annotations_dir = os.path.join(data_dir, 'annotations')

In [15]:
# Annotation files
train_ann_file = os.path.abspath(os.path.join(annotations_dir, 'instances_train2017.json'))
val_ann_file = os.path.abspath(os.path.join(annotations_dir, 'instances_val2017.json'))

In [16]:
print("Train Annotation File Path:", train_ann_file)
print("Validation Annotation File Path:", val_ann_file)

Train Annotation File Path: C:\Users\Pontus\Desktop\DL_Projects\DL_Car_Detection\coco_datasets\annotations\instances_train2017.json
Validation Annotation File Path: C:\Users\Pontus\Desktop\DL_Projects\DL_Car_Detection\coco_datasets\annotations\instances_val2017.json


In [18]:
# Initialize COCO instances
train_coco = COCO(train_ann_file)
val_coco = COCO(val_ann_file)

loading annotations into memory...
Done (t=18.10s)
creating index...
index created!
loading annotations into memory...
Done (t=0.78s)
creating index...
index created!


In [21]:
# Categories mapping
categories = ['car']
cat_ids = train_coco.getCatIds(catNms=categories)
img_ids_train = train_coco.getImgIds(catIds=cat_ids)
img_ids_val = val_coco.getImgIds(catIds=cat_ids)

In [28]:
# Directories to save filtered images
train_filtered_dir = './filtered_images/train_fil/'
val_filtered_dir = './filtered_images/val_fil/'
train_image_dir = './coco_datasets/train2017/'
val_image_dir = './coco_datasets/val2017/'

In [29]:
# Function to copy filtered images to new directory
def copy_filtered_images(coco, img_ids, image_dir, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    for img_id in img_ids:
        img_info = coco.loadImgs(img_id)[0]
        img_file = os.path.join(image_dir, img_info['file_name'])
        shutil.copy(img_file, output_dir)

In [30]:
# Copy filtered train images
copy_filtered_images(train_coco, img_ids_train, train_image_dir, train_filtered_dir)

# Copy filtered validation images
copy_filtered_images(val_coco, img_ids_val, val_image_dir, val_filtered_dir)

print("Filtered images copied successfully.")

Filtered images copied successfully.
