In [2]:
import torch
from transformers import CLIPProcessor, CLIPModel
import pandas as pd

model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

emotion_templates = [
    "This artwork conveys a feeling of anger.",
    "This artwork conveys a feeling of disgust.",
    "This artwork conveys a feeling of fear.",
    "This artwork conveys a feeling of sadness.",
    "This artwork conveys a feeling of amusement.",
    "This artwork conveys a feeling of awe.",
    "This artwork conveys a feeling of contentment.",
    "This artwork conveys a feeling of excitement.",
]

df = pd.read_csv('1k_batch.csv')

MAX_SEQ_LENGTH = 75

results = []

for index, row in df.iterrows():
    utterance = row['utterance']

    emotions = []
    for template in emotion_templates:
        full_text = f"{utterance} {template}"
        tokens = processor.tokenizer(full_text, truncation=True, max_length=MAX_SEQ_LENGTH, return_tensors="pt")["input_ids"]
        truncated_text = processor.tokenizer.decode(tokens[0], skip_special_tokens=True)
        emotions.append(truncated_text)

    inputs = processor(text=emotions, return_tensors="pt", padding=True)

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        text_features = model.get_text_features(inputs['input_ids'])

    probs = torch.softmax(text_features @ text_features.T, dim=-1)
    
    similarities = probs.cpu().numpy()[0]
    max_idx = similarities.argmax()
    predicted_emotion = emotion_templates[max_idx]

    results.append({
        'painting': row['painting'],
        'predicted_emotion': predicted_emotion,
        'similarity': similarities[max_idx]
    })

    print(f"Processed {index + 1}/{len(df)}: {row['painting']} -> {predicted_emotion}")

results_df = pd.DataFrame(results)

results_df.to_csv('predicted_emotions_only_on_utterance.csv', index=False)


Processed 1/1000: agostino-carracci_venus-and-mars-1600 -> This artwork conveys a feeling of anger.
Processed 2/1000: diego-velazquez_self-portrait-1 -> This artwork conveys a feeling of anger.
Processed 3/1000: felicien-rops_tienne-soubre -> This artwork conveys a feeling of disgust.
Processed 4/1000: koloman-moser_animal-motif-for-a-picture-book -> This artwork conveys a feeling of disgust.
Processed 5/1000: martiros-saryan_woman-with-mask-s-i-dymshits-1913 -> This artwork conveys a feeling of disgust.
Processed 6/1000: max-pechstein_am-ufer-at-the-riverbank-1920 -> This artwork conveys a feeling of disgust.
Processed 7/1000: andy-warhol_mobil -> This artwork conveys a feeling of fear.
Processed 8/1000: joshua-reynolds_john-russel-4th-duke-of-bedford-1762 -> This artwork conveys a feeling of disgust.
Processed 9/1000: marc-chagall_peasant-with-a-clock-1968 -> This artwork conveys a feeling of anger.
Processed 10/1000: allen-jones_chest-1968 -> This artwork conveys a feeling of anger.