In [31]:
import torch
from torchvision.models import resnet50
import numpy as np
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm  # Import tqdm for progress bar

In [32]:
# Load pre-trained ResNet50
resnet = resnet50(pretrained=True)

In [33]:
def process_image(image_path):
    preprocessed_data = np.load(image_path)
    tokenized_image = torch.tensor(preprocessed_data)  # Convert NumPy array to PyTorch tensor
    return tokenized_image


In [34]:
# Update the paths to the directory containing preprocessed images and tokenized images
preprocessed_images_dir = "/home/sklaptop/Downloads/OneDrive_2023-12-18/P25 - Visual Search/Split_Folder_5"
tokenized_images_dir = "/home/sklaptop/Downloads/OneDrive_2023-12-18/P25 - Visual Search/tokenized_images"

In [35]:
# Get a list of all the preprocessed image files in the directory
image_files = [file for file in os.listdir(preprocessed_images_dir) if file.endswith('.npy')]

In [36]:
# Process images using ThreadPoolExecutor with tqdm progress bar
with ThreadPoolExecutor() as executor:
    future_to_image = {executor.submit(process_image, os.path.join(preprocessed_images_dir, file)): file for file in image_files}
    for future in tqdm(as_completed(future_to_image), total=len(image_files), desc="Tokenizing Images"):
        image_path = future_to_image[future]
        tokenized_image = future.result()
        
        # Extract modified filename without prefix and save the tokenized image
        filename_without_prefix = image_path.replace('preprocessed_single_image_', '')  # Modify this line according to the prefix used
        tokenized_file_path = os.path.join(tokenized_images_dir, f"tokenized_images_{filename_without_prefix}")
        torch.save(tokenized_image, tokenized_file_path)

Tokenizing Images: 100%|██████████| 5896/5896 [00:08<00:00, 722.82it/s]
