# Import Libraries

In [1]:
from pycocotools.coco import COCO
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import os
import json
import tqdm  # Progress bar


In [2]:
torch.cuda.is_available()

True

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


# Dataset Preparation

In [4]:
# Function to initialize the COCO API
def initialize_coco(data_dir,data_type):
    ann_file = os.path.join(data_dir, 'annotations','captions_{}.json'.format(data_type))  # COCO captions file
    coco = COCO(ann_file)
    return coco


In [5]:
# Define the image preprocessing transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize image to match ResNet input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize with ImageNet statistics
])

# Function to load and preprocess the image
def load_image(image_path):
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0).to(device)  # Add batch dimension
    return image

# Feature Extraction (CNN)

In [6]:
# Define the CNN model for feature extraction (e.g., ResNet-50)
class CNNFeatureExtractor(nn.Module):
    def __init__(self, weights='ResNet50_Weights.DEFAULT'):
        super(CNNFeatureExtractor, self).__init__()
        # Load a pre-trained ResNet-50 model
        resnet = models.resnet50(weights=weights)
        # Remove the final fully connected layer
        self.resnet = nn.Sequential(*list(resnet.children())[:-1])  # Keep up to the global avg pool layer
        self.resnet = self.resnet.to(device)  # Move model to GPU if available

    def forward(self, images):
        # Extract features
        images = images.to(device)
        features = self.resnet(images)  # Output shape: (batch_size, 2048, 1, 1)
        features = features.view(features.size(0), -1)  # Reshape to (batch_size, 2048)
        return features



In [7]:
# Function to extract features
def extract_coco_features(coco,data_dir,data_type):
    # Initialize the feature extraction model
    model = CNNFeatureExtractor()
    model.eval()

    # Dictionary to store image features and URLs
    all_features = {}

    # Get all image IDs in the dataset
    img_ids = coco.getImgIds()

    # Loop over each image ID
    for img_id in tqdm.tqdm(img_ids, desc="Extracting features"):
        # Get image metadata
        img_info = coco.loadImgs(img_id)[0]
        
        # Construct the full image URL
        image_url = f"http://images.cocodataset.org/{data_type}/{img_info['file_name']}"
        image_path = os.path.join(data_dir,data_type,img_info['file_name'])
        
        # Load and preprocess the image
        image = Image.open(image_path).convert('RGB')
        image = transform(image).unsqueeze(0).to(device)  # Add batch dimension

        # Extract features
        with torch.no_grad():
            features = model(image)

        # Store features and image URL
        all_features[image_url] = features.squeeze().tolist()

    return all_features


# Save Extracted Features

In [8]:
# function to save and store features as a JSON file
def save_coco_features(output_json,data_dir,features):
    # Save all features to a JSON file
    output_location = os.path.join(data_dir,'extracted_features',output_json)
    with open(output_location, 'w') as f:
        json.dump(features, f)
    
    print(f"Features saved to {output_location}")

# Execute Feature Extractions

### Train Set

In [9]:
data_dir = os.path.join('data','COCO')
data_type = 'train2017'
output_json= 'train_image_features.json'

coco = initialize_coco(data_dir,data_type)
image_features = extract_coco_features(coco,data_dir,data_type)
save_coco_features(output_json,data_dir,image_features)


loading annotations into memory...
Done (t=1.35s)
creating index...
index created!


Extracting features: 100%|██████████| 118287/118287 [1:21:12<00:00, 24.28it/s]


Features saved to data\COCO\extracted_features\train_image_features.json


### Validation Set

In [10]:
data_dir = os.path.join('data','COCO')
data_type = 'val2017'
output_json= 'val_image_features.json'

coco = initialize_coco(data_dir,data_type)
image_features = extract_coco_features(coco,data_dir,data_type)
save_coco_features(output_json,data_dir,image_features)

loading annotations into memory...
Done (t=0.14s)
creating index...
index created!


Extracting features: 100%|██████████| 5000/5000 [02:50<00:00, 29.28it/s]


Features saved to data\COCO\extracted_features\val_image_features.json
