In [1]:
import torch
import clip 
from PIL import Image

##### Loading CLIP-RN50 model

In [2]:
#load the model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("RN50", device=device)


#loading in the attention pool layer
model.visual.attnpool = model.visual.attnpool.to(device)

##### Defining the function for extracting the model representations

In [3]:
# Function to get latent visual representations for an image
def get_latent_representation(image_path):
    image = Image.open(image_path)
    image_input = preprocess(image).unsqueeze(0).to(device)  # Add batch dimension and move to device
    with torch.no_grad():
        image_features = model.encode_image(image_input)
    image_features /= image_features.norm(dim=-1, keepdim=True)
    return image_features.cpu().numpy()

In [4]:
from  os.path import join as pjoin
def get_batch_latent_representation(
        image_base_dir : str,
        image_path: list
):
    #for each image in the list 
    #preprocess and add into a tensor
    image_batch=[preprocess(Image.open(pjoin(image_base_dir,image_path))) for image_path in image_path]
    image_batch = torch.stack(image_batch).to(device)
    #get the latent representation
    with torch.no_grad():
        image_features = model.encode_image(image_batch)
        image_features /= image_features.norm(dim=-1, keepdim=True)
    return image_features.cpu().numpy()         

In [None]:
#creating the function for extracting the intermediary functions
attnpool_output={}
def get_intermediate_output(module,input,output):
    attnpool_output['attnpool']=output
#regiter the hook
attnpool_layer=model.visual.attnpool
attnpool_layer.register_forward_hook(get_intermediate_output)

In [6]:
#defining fucntion to extract the representation
def extract_attnpool_repr(
    image_base_dir : str,
    image_path: list
):
    #for each image in the list 
    #preprocess and add into a tensor
    image_batch=[preprocess(Image.open(pjoin(image_base_dir,image_path))) for image_path in image_path]
    image_batch = torch.stack(image_batch).to(device)
    
    #get the latent representation
    with torch.no_grad():
        image_features = model.encode_image(image_batch)
        image_features /= image_features.norm(dim=-1, keepdim=True)
        #extract the output of the attention pool layer
    return attnpool_output['attnpool'].cpu().numpy()




#### Loading the image data filenames

In [7]:
#image_path
image_dir='/DATA1/satwick22/Documents/fMRI/multimodal_concepts/images'
#load the numpy array of image
import numpy as np
from os.path import join as pjoin
image_names=np.load(pjoin(image_dir,'perceptionTest-image_filenames.npy'),allow_pickle=True)
test_image_dir=pjoin(image_dir,'test')



In [8]:
#extract the attention pool representation
image_repr_attn=extract_attnpool_repr(test_image_dir,image_names)

In [9]:
img_file=pjoin(test_image_dir,image_names[0])

In [None]:
import torch
import clip
from PIL import Image

# Load the model and preprocessing function
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("RN50", device=device)

# Dictionary to store the outputs of intermediate layers
intermediate_outputs = {}

# Function to save intermediate outputs
def get_intermediate_output(module, input, output):
    intermediate_outputs['attnpool'] = output

# Register the hook
attnpool_layer = model.visual.attnpool
attnpool_layer.register_forward_hook(get_intermediate_output)

# Function to get latent visual representations for an image
def get_attnpool_representation(image_path):
    image = Image.open(image_path)
    image_input = preprocess(image).unsqueeze(0).to(device)  # Add batch dimension and move to device
    with torch.no_grad():
        model.encode_image(image_input)
    return intermediate_outputs['attnpool'].cpu().numpy()

# Example usage
attnpool_representation = get_attnpool_representation(img_file)
print(attnpool_representation)


In [None]:
attnpool_representation.shape

#### Getting latent representations of each image individually

In [12]:
#read one image
img_file=pjoin(test_image_dir,image_names[0])

In [None]:
latent_repr=get_latent_representation(img_file)
print(latent_repr.shape)

#### Getting the image representations for all test images

In [14]:
#send the list and get the latent representation
image_repr=get_batch_latent_representation(test_image_dir,image_names)

In [None]:
image_repr.shape

#### Define the function for saving the images in disk

In [16]:
def save_representation(bidsrooot:str,
                        outdirname:str,
                        image_batch:np.ndarray):
    """
    Gets 2d tensors as representations and saves it in the disk"""
    import os
    save_path_dir=pjoin(bidsrooot,'derivatives','model-representations',outdirname)
    if not os.path.exists(save_path_dir):
        os.makedirs(save_path_dir)
    out_file=pjoin(save_path_dir,f'PerceptionTest-image_representation.npy')
    np.save(out_file,image_batch)

In [17]:
bidsroot='/DATA1/satwick22/Documents/fMRI/multimodal_concepts/generic_object_decoding_bids'
outdirname='model-representations/CLIP/attention_pool'
save_representation(bidsroot,outdirname,image_repr_attn)

#### Virtex

In [None]:
import torch
model = torch.hub.load("kdexd/virtex", "resnet50", pretrained=True)

# This is a torchvision-like resnet50 model, with ``avgpool`` and ``fc``
# layers replaced with ``nn.Identity`` module.
image_batch = torch.randn(1, 3, 224, 224)  # batch tensor of one image.
features_batch = model(image_batch)  # shape: (1, 2048, 7, 7)

In [19]:
import torch
import torchvision.models as models
from torchvision import transforms
from PIL import Image

In [20]:
#hook onto the avgpool layer
def create_forward_hook(module):
    def hook(module, input, output):
        hook.output = output.detach()
    hook.output = None
    module.register_forward_hook(hook)
    return hook
# output={}
# def hook_fn(module,input,output):
#     output['layer']=output


# def get_latent_representation(image_path):
#     image = Image.open(image_path)
#     image_input = preprocess(image).unsqueeze(0)
#     with torch.no_grad():
#         model(image_input)
#     return output['layer'].cpu().numpy()

In [None]:
model = torch.hub.load("kdexd/virtex", "resnet50", pretrained=True)
model.eval()

hook= create_forward_hook(model.avgpool)

# This is a torchvision-like resnet50 model, with ``avgpool`` and ``fc``
# layers replaced with ``nn.Identity`` module.
image_batch = torch.randn(1, 3, 224, 224)  # batch tensor of one image.
features_batch = model(image_batch)  # shape: (1, 2048, 7, 7)

In [22]:
def get_latent_representation(
        image_base_dir : str,
        image_path : list
):
    from torchvision import transforms
    preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    #get the preprocessed image
    image_batch=[preprocess(Image.open(pjoin(image_base_dir,image_path))) for image_path in image_path]
    image_batch = torch.stack(image_batch)

    with torch.no_grad():
        model(image_batch)
    return hook.output.cpu().numpy()



In [23]:

#image_path
image_dir='/DATA1/satwick22/Documents/fMRI/multimodal_concepts/images'
#load the numpy array of image
import numpy as np
from os.path import join as pjoin
image_names=np.load(pjoin(image_dir,'perceptionTest-image_filenames.npy'),allow_pickle=True)
test_image_dir=pjoin(image_dir,'test')


In [24]:
image_repr_avgpool=get_latent_representation(test_image_dir,image_names)

In [None]:
image_repr_avgpool.shape

In [None]:
#save the image representations
outdirname='virtex/avgpool'
save_representation(bidsroot,outdirname,image_repr_avgpool)

#### Utility functions for saving the representations

In [6]:
import torch
import torchvision.models as models
from torchvision import transforms
from PIL import Image

In [7]:
#function for creating the forward hook
def create_forward_hook(module):
    def hook(module, input, output):
        hook.output = output.detach()
    hook.output = None
    module.register_forward_hook(hook)
    return hook

In [8]:
#fucntion for extracting the latent representation from the hooked layer
def extract_representation(model,pool_layer,preprocess,image_base_dir,image_path):
    #hook onto the layer
    hook= create_forward_hook(pool_layer)
    #get the preprocessed image
    image_batch=[preprocess(Image.open(pjoin(image_base_dir,image_path))) for image_path in image_path]
    image_batch = torch.stack(image_batch)
    #move the image to the device
    image_batch=image_batch.to(device)

    with torch.no_grad():
        model(image_batch)
    return hook.output.cpu().numpy()

In [9]:
#loading in the data file for images
#image_path
image_dir='/DATA1/satwick22/Documents/fMRI/multimodal_concepts/images'
#load the numpy array of image
import numpy as np
from os.path import join as pjoin
image_names=np.load(pjoin(image_dir,'perceptionTest-image_filenames.npy'),allow_pickle=True)
test_image_dir=pjoin(image_dir,'test')

In [10]:
#function for saving the representation

def save_representation(bidsrooot:str,
                        outdirname:str,
                        image_batch:np.ndarray):
    """
    Gets 2d tensors as representations and saves it in the disk"""
    import os
    save_path_dir=pjoin(bidsrooot,'derivatives','model-representations',outdirname)
    if not os.path.exists(save_path_dir):
        os.makedirs(save_path_dir)
    out_file=pjoin(save_path_dir,f'PerceptionTest-image_representation.npy')
    np.save(out_file,image_batch)

In [11]:
#loading in the important directories

bidsroot='/DATA1/satwick22/Documents/fMRI/multimodal_concepts/generic_object_decoding_bids'

#### For Virtex

In [None]:
model = torch.hub.load("kdexd/virtex", "resnet50", pretrained=True)
model.eval()

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
print('Model loaded successfully')

In [48]:
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [49]:
#get the model avgpool layer reference
pool_layer=model.avgpool

In [50]:
#extract the latent representation
image_repr_avgpool=extract_representation(model,pool_layer,preprocess,test_image_dir,image_names)

In [51]:
#save the image representations
outdirname='virtex/avgpool'
save_representation(bidsroot,outdirname,image_repr_avgpool)

#### For Resnet 50


In [None]:
#creating the model object
model = models.resnet50(pretrained=True)
model.eval()

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
print('Model loaded succesfully')

In [5]:
# get the avgpool layer
avgpool_layer = model.avgpool

In [7]:
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [12]:
#calling the function to extract the latent representation
image_repr_avgpool=extract_representation(model,avgpool_layer,preprocess,test_image_dir,image_names)

In [None]:
image_repr_avgpool.shape

In [16]:
#save the image representations
save_representation(bidsroot,'resnet/avgpool',image_repr_avgpool)

#### For BiT-M

In [None]:
#load the model
import timm  
# Load the pretrained BiT-M model
model = timm.create_model('resnetv2_50x1_bitm', pretrained=True)
model.eval()

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
print('Model loaded successfully')



In [9]:
#extract the target layer object
head_avg_layer = model.get_submodule('head.global_pool')

In [10]:
#define the preprocess function
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),  # Mean and std for BiT-M
])

In [11]:
#calling the function to extract the latent representation
image_repr_avgpool=extract_representation(model,head_avg_layer,preprocess,test_image_dir,image_names)

In [12]:
#save the image representations
save_representation(bidsroot,'BiT-M/head.avg',image_repr_avgpool)

#### For TSM model 

In [7]:
import sys
sys.path.append('/DATA1/satwick22/Documents/fMRI/multimodal_concepts/code/temporal-shift-module')

In [9]:
from ops.models import TSN
from ops.transforms import *

In [None]:
# Load the TSM ResNet-50 model
num_class = 400  # Number of classes, e.g., 400 for Kinetics-400
model = TSN(num_class, 1, 'RGB',
            base_model='resnet50',
            consensus_type='avg',
            img_feature_dim=256,
            print_spec=False,
            pretrain='imagenet',
            is_shift=True, shift_div=8, shift_place='blockres',
            fc_lr5=True)

# Set model to evaluation mode
model.eval()

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)


In [None]:
#get the model
from pytorchvideo.models.hub import tsm_resnet50
#from pytorchvideo.models.hub import tsm_resnet50
# Load the pretrained TSM ResNet-50 model
model = tsm_resnet50(pretrained=True)
model.eval()

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)


#### For ICMLM-attfc

In [22]:
#importing the necessary libraries
import torch.nn as nn
class ICMLMattFCModel(nn.Module):
    def __init__(self):
        super(ICMLMattFCModel, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            # Add more layers to match the 'cnn' part of the state_dict if needed
        )
        self.step = nn.Sequential(
            nn.Linear(64 * 56 * 56, 512),  # Assuming the output of cnn is 64x56x56
            nn.ReLU(inplace=True),
            nn.Linear(512, 1000)  # Adjust the output dimension based on the actual model
        )

    def forward(self, x):
        x = self.cnn(x)
        x = torch.flatten(x, 1)
        x = self.step(x)
        return x

In [None]:
# Load the model
#model_att_fc = ICMLMattFCModel()
from os.path import join as pjoin
model_att_fc=torch.load(pjoin('/DATA1/satwick22/Documents/fMRI/multimodal_concepts/models','icmlm-attfc_r50_coco_5K.pth'))
#model_att_fc.eval()

device = "cuda" if torch.cuda.is_available() else "cpu"
model_att_fc.to(device)

In [None]:
state_dict=torch.load(pjoin('/DATA1/satwick22/Documents/fMRI/multimodal_concepts/models','icmlm-attfc_r50_coco_5K.pth'))
print(state_dict['cnn'])

In [None]:
import torch
import torch.nn as nn

# Define the model architecture
class ICMLMattFCModel(nn.Module):
    def __init__(self):
        super(ICMLMattFCModel, self).__init__()
        
        # Assuming `cnn` is a pre-trained model, like ResNet50
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            # Add more layers to match the actual architecture...
        )
        
        # Define the attention mechanism
        self.attention = nn.Linear(in_features=512, out_features=512)  # Example dimensions
        self.fc = nn.Linear(in_features=512, out_features=10)  # Example output dimensions

    def forward(self, x):
        x = self.cnn(x)
        
        # Flatten the output from CNN
        x = torch.flatten(x, 1)
        
        # Apply attention mechanism
        x = self.attention(x)
        x = nn.ReLU()(x)
        
        # Fully connected layer
        x = self.fc(x)
        
        return x

# Initialize the model
model = ICMLMattFCModel()

model.load_state_dict(state_dict, strict=False)

# Now you can use the model to extract representations
input_image = torch.randn(1, 3, 224, 224)  # Example input image
output_representation = model(input_image)

print(output_representation)


#### For Adversarially Trained Models


In [1]:
import torch
from robustness.model_utils import make_and_restore_model
import torchvision.transforms as transforms
from PIL import Image
import os

In [2]:
from os.path import join as pjoin
base_weight_path='/DATA1/satwick22/Documents/fMRI/multimodal_concepts/models/AR'

In [19]:

# Dummy dataset object for loading the model
from robustness import datasets

class DummyImageNet(datasets.ImageNet):
    def __init__(self, *args, **kwargs):
        super().__init__('/path/to/dummy/dataset')

dummy_dataset = DummyImageNet()

##### For ARL2 

In [20]:
# specify the weights path
weight_path = pjoin(base_weight_path, 'imagenet_l2_3_0.pt')


In [None]:
#load the model
# Load the model
model, _ = make_and_restore_model(arch='resnet50', dataset=dummy_dataset, resume_path=weight_path)
model.eval()
model = model.model  # Extract the inner model

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)
print("Model loaded successfully")

In [23]:
# Define the preprocessing transformation
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=dummy_dataset.mean.tolist(), std=dummy_dataset.std.tolist()),
])

In [24]:
#extract the avgpool layer
avgpool_layer = model.avgpool

In [25]:
#extract the representations from the avgpool layer
image_repr_avgpool=extract_representation(model,avgpool_layer,preprocess,test_image_dir,image_names)

In [26]:
save_representation(bidsroot,'madryl23/avgpool',image_repr_avgpool)

##### For AR linf , $ \epsilon = 8/255$

In [27]:
#load the weight path
weight_path = pjoin(base_weight_path, 'imagenet_linf_8.pt')

In [None]:
#extract the model
model, _ = make_and_restore_model(arch='resnet50', dataset=dummy_dataset, resume_path=weight_path)
model.eval()
model = model.model  # Extract the inner model

In [None]:
device='cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)
print("Model loaded successfully")


In [31]:

# Define the preprocessing transformation
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=dummy_dataset.mean.tolist(), std=dummy_dataset.std.tolist()),
])

In [32]:
avgpool_layer = model.avgpool

In [33]:
#extract the representations from the avgpool layer
image_repr_avgpool=extract_representation(model,avgpool_layer,preprocess,test_image_dir,image_names)

In [34]:
save_representation(bidsroot,'madryli8/avgpool',image_repr_avgpool)

##### For AR linf , $ \epsilon = 4/255$

In [35]:
weight_path=pjoin(base_weight_path,'imagenet_linf_4.pt')

In [None]:
#extract the model
model, _ = make_and_restore_model(arch='resnet50', dataset=dummy_dataset, resume_path=weight_path)
model.eval()
model = model.model  # Extract the inner model

In [None]:
#move the model to the device
device='cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)
print("Model loaded successfully")


In [38]:
# Define the preprocessing transformation
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=dummy_dataset.mean.tolist(), std=dummy_dataset.std.tolist()),
])

In [39]:
#extract the avgpool layer
avgpool_layer = model.avgpool

In [40]:
#extract the representations from the avgpool layer
image_repr_avgpool=extract_representation(model,avgpool_layer,preprocess,test_image_dir,image_names)

In [41]:
#save the image representations
save_representation(bidsroot,'madryli4/avgpool',image_repr_avgpool)

#### For Stylized ImageNet Trained Resnet Models

In [45]:
#adding the path of the models 
import sys
sys.path.append('/DATA1/satwick22/Documents/fMRI/multimodal_concepts/texture-vs-shape/models')

##### SIN Model

In [None]:
#import the model
from load_pretrained_models import load_model
model_name="resnet50_trained_on_SIN"
model=load_model(model_name)

In [None]:
#move the model to device
device='cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)
print("Model loaded successfully")

In [49]:
# Define the preprocessing transformation
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [52]:
#extract the avgpool layer
avgpool_layer = model.module.avgpool


In [53]:
#extract the representations from the avgpool layer
image_repr_avgpool=extract_representation(model,avgpool_layer,preprocess,test_image_dir,image_names)

In [54]:
#save the image representations
save_representation(bidsroot,'geirhos_sin/avgpool',image_repr_avgpool)

##### For SIN_IN trained models

In [None]:

#import the model
from load_pretrained_models import load_model
model_name="resnet50_trained_on_SIN_and_IN"
model=load_model(model_name)

In [None]:
#move the model to device
device='cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)
print("Model loaded successfully")

In [70]:
# Define the preprocessing transformation
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [71]:
#extract the avgpool layer
avgpool_layer = model.module.avgpool

In [72]:
#extract the representations from the avgpool layer
image_repr_avgpool=extract_representation(model,avgpool_layer,preprocess,test_image_dir,image_names)

In [73]:
#save the image representations
save_representation(bidsroot,'geirhos_sinin/avgpool',image_repr_avgpool)

##### For SIN_IN_IN trained models

In [None]:
#load the model

from load_pretrained_models import load_model
model_name="resnet50_trained_on_SIN_and_IN_then_finetuned_on_IN"
model=load_model(model_name)

In [None]:

#move the model to device
device='cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)
print("Model loaded successfully")

In [63]:
# Define the preprocessing transformation
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


In [64]:
#extract the avgpool layer
avgpool_layer = model.module.avgpool

In [65]:

#extract the representations from the avgpool layer
image_repr_avgpool=extract_representation(model,avgpool_layer,preprocess,test_image_dir,image_names)

In [67]:
#save the image representations
save_representation(bidsroot,'geirhos_sininfin/avgpool',image_repr_avgpool)

### For Language Models

#### For GPT-2

In [None]:
from transformers import GPT2Model, GPT2Tokenizer
import torch

# Load pre-trained GPT-2 model and tokenizer
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2Model.from_pretrained(model_name)


In [None]:
#transfer the model to the device
model.eval()
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
print("Model loaded successfully")

In [23]:
def preprocess_texts(texts,tokenizer):
    # Tokenize and pad the texts to the same length
    inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
    # Move inputs to the GPU
    inputs = {key: value.to(device) for key, value in inputs.items()}
    return inputs
# Set the padding token as the eos_token
tokenizer.pad_token = tokenizer.eos_token
text_inputs=preprocess_texts(text_input,tokenizer)

In [28]:
# Disable gradient calculation for efficiency
with torch.no_grad():
    outputs = model(**text_inputs)

# Extract the hidden states
last_hidden_state = outputs.last_hidden_state

# Average the hidden states to get a single representation per input
latent_representations = last_hidden_state.mean(dim=1).cpu().numpy()

In [None]:
latent_representations.shape

In [31]:
#save the image representations
save_representation(bidsroot,'GPT2/avg',latent_representations)

#### For BERT

In [None]:
from transformers import BertModel, BertTokenizer
import torch

# Load pre-trained BERT model and tokenizer
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertModel.from_pretrained(model_name)
# Set the model to evaluation mode
model.eval()
# Move the model to the GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print("Model loaded successfully")



In [35]:
def preprocess_texts(texts):
    # Tokenize and pad the texts to the same length
    inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512)
    # Move inputs to the GPU
    inputs = {key: value.to(device) for key, value in inputs.items()}
    return inputs
text_inputs = preprocess_texts(text_input)

In [37]:
# Disable gradient calculation for efficiency
with torch.no_grad():
    outputs = model(**text_inputs)

# Extract the hidden states
last_hidden_state = outputs.last_hidden_state

# Average the hidden states to get a single representation per input
latent_representations = last_hidden_state.mean(dim=1).cpu().numpy()

In [38]:
save_representation(bidsroot,'BERT/avg',latent_representations)

#### For CLIP-L

In [43]:
import torch
import clip

# Load the CLIP model and tokenizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model, preprocess = clip.load("", device=device)

def preprocess_texts(texts):
    # Tokenize the texts using CLIP's tokenizer
    inputs = clip.tokenize(texts).to(device)
    return inputs

# Example batch of texts
text_inputs = preprocess_texts(text_input)

# Disable gradient calculation for efficiency
with torch.no_grad():
    # Get the text embeddings
    text_embeddings = model.encode_text(text_inputs).cpu().numpy()

#save the image representations
save_representation(bidsroot,'CLIP-L/avg',text_embeddings)
