In [26]:
from transformers import RobertaTokenizer, RobertaModel
import torch
import pandas as pd
import numpy as np

In [30]:
train_df = pd.read_pickle("fhm_features_train.pkl")
test_df = pd.read_pickle("fhm_features_test.pkl")

In [35]:
train_df_blip2 = pd.read_pickle("blip2_augmented_fhm_train.pkl")
test_df_blip2 = pd.read_pickle("blip2_augmented_fhm_test.pkl")

In [28]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [29]:
tokenizer = RobertaTokenizer.from_pretrained('roberta-large')
model = RobertaModel.from_pretrained('roberta-large').to(device)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [41]:
def encode_text(row):
    encoded_input = tokenizer(row["text"], return_tensors='pt').to(device)
    
    with torch.no_grad():
        outputs = model(**encoded_input)
    
    last_hidden_states = outputs.last_hidden_state
    embedding = last_hidden_states[:, 0, :].cpu().numpy()
    
    return embedding

def encode_caption(row):
    encoded_input = tokenizer(row["blip2_caption"], return_tensors='pt').to(device)
    
    with torch.no_grad():
        outputs = model(**encoded_input)
    
    last_hidden_states = outputs.last_hidden_state
    embedding = last_hidden_states[:, 0, :].cpu().numpy()
    
    return embedding

In [33]:
train_df['text_features'] = train_df.apply(encode_text, axis=1)
test_df['text_features'] = test_df.apply(encode_text, axis=1)

In [42]:
train_df_blip2['text_features'] = train_df['text_features']
test_df_blip2['text_features'] = test_df['text_features']
train_df_blip2['blip_features'] = train_df_blip2.apply(encode_caption, axis=1)
test_df_blip2['blip_features'] = test_df_blip2.apply(encode_caption, axis=1)

In [47]:
blip2_img_train = pd.read_pickle("fhm_blip_features_train.pkl")
blip2_img_test = pd.read_pickle("fhm_blip_features_test.pkl")
train_df_blip2['vit-l-14-img_features'] = blip2_img_train['vit-l-14-img_features']
test_df_blip2['vit-l-14-img_features'] = blip2_img_test['vit-l-14-img_features']

In [48]:
train_df.to_pickle("fhm_features_train_roberta.pkl")
test_df.to_pickle("fhm_features_test_roberta.pkl")
train_df_blip2.to_pickle("fhm_blip_features_train_roberta.pkl")
test_df_blip2.to_pickle("fhm_blip_features_test_roberta.pkl")