In [1]:
from transformers import ViTImageProcessor, ViTModel
from PIL import Image
import pickle
import os
import torch


In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    print("Using GPU.")
else:
    print("CUDA is not available. Using CPU.")

CUDA is not available. Using CPU.


In [2]:
images = {}

COCO = "/Users/mihnea/_workspace_/_uni/workshop/BOLD5000/Scene_Stimuli/Presented_Stimuli/COCO"  #directory with the COCO presented stimuli
ImageNet = "/Users/mihnea/_workspace_/_uni/workshop/BOLD5000/Scene_Stimuli/Presented_Stimuli/ImageNet" #directory with the ImageNet presented stimuli
Scene = "/Users/mihnea/_workspace_/_uni/workshop/BOLD5000/Scene_Stimuli/Presented_Stimuli/Scene"   #directory with the Scene presented stimuli

paths = [COCO, ImageNet, Scene]

for path in paths:
    for filename in os.listdir(path):
        img_path = os.path.join(path, filename)

        # Use PIL to open and load the image
        img = Image.open(img_path)

        # Optionally convert the image to RGB if it's not already
        img = img.convert("RGB")

        # Store the image in the dictionary with its filename as key
        images[filename] = img
    

In [3]:
for image in images:
    print (images[image])

<PIL.Image.Image image mode=RGB size=375x375 at 0x16C5C5E50>
<PIL.Image.Image image mode=RGB size=375x375 at 0x16CF0C1D0>
<PIL.Image.Image image mode=RGB size=375x375 at 0x16DCE59D0>
<PIL.Image.Image image mode=RGB size=375x375 at 0x16DE2A4D0>
<PIL.Image.Image image mode=RGB size=375x375 at 0x16DE2B110>
<PIL.Image.Image image mode=RGB size=375x375 at 0x16DE2B250>
<PIL.Image.Image image mode=RGB size=375x375 at 0x16CEA3490>
<PIL.Image.Image image mode=RGB size=375x375 at 0x16C29B590>
<PIL.Image.Image image mode=RGB size=375x375 at 0x16DE2B5D0>
<PIL.Image.Image image mode=RGB size=375x375 at 0x16DE2B6D0>
<PIL.Image.Image image mode=RGB size=375x375 at 0x16DE2B810>
<PIL.Image.Image image mode=RGB size=375x375 at 0x16DE2B950>
<PIL.Image.Image image mode=RGB size=375x375 at 0x16DE2BA90>
<PIL.Image.Image image mode=RGB size=375x375 at 0x16DE2BBD0>
<PIL.Image.Image image mode=RGB size=375x375 at 0x16DE2BD10>
<PIL.Image.Image image mode=RGB size=375x375 at 0x16DE2BE50>
<PIL.Image.Image image m

In [4]:
processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224-in21k')
model = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')



In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    print("Using GPU")
else:
    print("Using CPU")
model.to(device)  # Move the model to GPU if available

Using CPU


ViTModel(
  (embeddings): ViTEmbeddings(
    (patch_embeddings): ViTPatchEmbeddings(
      (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    )
    (dropout): Dropout(p=0.0, inplace=False)
  )
  (encoder): ViTEncoder(
    (layer): ModuleList(
      (0-11): 12 x ViTLayer(
        (attention): ViTAttention(
          (attention): ViTSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
          )
          (output): ViTSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
          )
        )
        (intermediate): ViTIntermediate(
          (dense): Linear(in_features=768, out_features=3072, bias=True)
          (intermediate_act_fn): GELUActivation(

In [5]:
one_dim_embeddings = {}
max_length = 500

for image_filename in images:
    if len(one_dim_embeddings) == max_length:
        break
    else:
        # Process images and prepare inputs using the ViT image processor
        inputs = processor(images=images[image_filename], return_tensors="pt")
        inputs = inputs.to(device)  # Move input tensors to GPU if available

        # Compute model outputs
        outputs = model(**inputs)

        # Store embeddings, making sure to detach and move them to CPU for storage
        one_dim_embeddings[image_filename] = outputs.last_hidden_state.detach().cpu().reshape(-1)


In [6]:
#saves the one dimensional embeddings as a .pkl file in the current working directory
embeddings_base_folder = "/Users/mihnea/_workspace_/_uni/workshop/BOLD5000/embeddings"  # Base folder 
dataset_subfolders = ["COCO", "ImageNet", "Scene"]  # Names of dataset subfolders

# Create folders if they don't exist
for subfolder in dataset_subfolders:
    folder_path = os.path.join(embeddings_base_folder, subfolder)
    os.makedirs(folder_path, exist_ok=True)

# Save embeddings with proper naming and absolute paths
for image, embedding in one_dim_embeddings.items():
    # Determine the dataset subfolder based on the image name
    for subfolder in dataset_subfolders:
        if subfolder in image:  
            break  # Exit the loop once the subfolder is found
    embedding_filename = "embedding_" + image + ".pkl"
    embedding_path = os.path.join(embeddings_base_folder, subfolder, embedding_filename)
    with open(embedding_path, 'wb') as emb:
        pickle.dump(embedding, emb)
