In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import os
import glob
import numpy as np
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import h5py  # Import h5py for handling HDF5 files

# SPECIFY PATH TO THE DATASET
path_to_dataset = 'H:/My Drive/EE 243/Assignments/Assignment 3/tiny-UCF101/tiny-UCF101'

def main():
    # Initialize feature and label lists
    feature = []
    label = []

    # Check if the dataset path exists
    if not os.path.exists(path_to_dataset):
        print(f"Dataset path {path_to_dataset} does not exist.")
        return

    # Get category directories
    categories = sorted(os.listdir(path_to_dataset))
    print(f'Found categories: {categories}')

    # Define the transformation pipeline
    transform_test = transforms.Compose([
        transforms.Resize(256),  # Resize the input Image to some size
        transforms.CenterCrop(224),  # Crop a central square patch of 224x224 pixels
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalization for ResNet
    ])

    # Load the pretrained ResNet50 model
    extractor = models.resnet50(pretrained=True)
    extractor.fc = torch.nn.Identity()  # Modify the model to output 2048-dimensional features
    extractor.eval()  # Set the model to evaluation mode
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    extractor.to(device)  # Move model to GPU if available
    print(f'Model loaded and moved to device: {device}')

    # Iterate over each category
    for i, c in enumerate(categories):
        category_path = os.path.join(path_to_dataset, c)
        if not os.path.isdir(category_path):
            print(f'Skipping non-directory category: {category_path}')
            continue

        path_to_images = sorted(glob.glob(os.path.join(category_path, '*.jpg')))
        print(f'Processing category: {c}, Number of images: {len(path_to_images)}')

        if not path_to_images:
            print(f'No images found in category directory: {category_path}')
            continue

        # Process each image in the category
        for p in path_to_images:
            try:
                img = Image.open(p).convert('RGB')  # Load and convert to RGB
            except Exception as e:
                print(f'Error loading image {p}: {e}')
                continue

            input_tensor = transform_test(img)  # Apply transformations
            input_tensor = input_tensor.unsqueeze(0)  # Add batch dimension

            # Move tensor to the appropriate device
            input_tensor = input_tensor.to(device)

            with torch.no_grad():  # Disable gradient computation
                output = extractor(input_tensor)  # Get the model output

            F = output.squeeze(0).cpu().numpy()  # Remove batch dimension and move data to CPU
            feature.append(F)
            label.append(i)  # Append category index

    # Save the features and labels to an HDF5 file
    with h5py.File('ucf101dataset_extraction.h5', 'w') as h5f:
        h5f.create_dataset('feature', data=np.array(feature))
        h5f.create_dataset('label', data=np.array(label))
    print('Features and labels saved to ucf101dataset_extraction.h5')

if __name__ == "__main__":
    main()


Found categories: ['ApplyEyeMakeup', 'ApplyLipstick', 'Archery', 'BabyCrawling', 'BalanceBeam', 'BandMarching', 'BaseballPitch', 'Basketball', 'BasketballDunk', 'BenchPress', 'Biking', 'Billiards', 'BlowDryHair', 'BlowingCandles', 'BodyWeightSquats', 'Bowling', 'BoxingPunchingBag', 'BoxingSpeedBag', 'BreastStroke', 'BrushingTeeth', 'CleanAndJerk', 'CliffDiving', 'CricketBowling', 'CricketShot', 'CuttingInKitchen', 'Diving', 'Drumming', 'Fencing', 'FieldHockeyPenalty', 'FloorGymnastics', 'FrisbeeCatch', 'FrontCrawl', 'GolfSwing', 'Haircut', 'HammerThrow', 'Hammering', 'HandStandPushups', 'HandstandWalking', 'HeadMassage', 'HighJump', 'HorseRace', 'HorseRiding', 'HulaHoop', 'IceDancing', 'JavelinThrow', 'JugglingBalls', 'JumpRope', 'JumpingJack', 'Kayaking', 'Knitting', 'LongJump', 'Lunges', 'MilitaryParade', 'Mixing', 'MoppingFloor', 'Nunchucks', 'ParallelBars', 'PizzaTossing', 'PlayingCello', 'PlayingDaf', 'PlayingDhol', 'PlayingFlute', 'PlayingGuitar', 'PlayingPiano', 'PlayingSitar', 