In [None]:
import torch
from transformers import IdeficsForVisionText2Text, AutoProcessor
import pandas as pd
from tqdm import tqdm
import time
import re

In [1]:
device = "cuda" if torch.cuda.is_available() else "cpu"

checkpoint = "HuggingFaceM4/idefics-9b-instruct"
model = IdeficsForVisionText2Text.from_pretrained(checkpoint, torch_dtype=torch.bfloat16).to(device)
processor = AutoProcessor.from_pretrained(checkpoint)

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 2/2 [00:47<00:00, 23.66s/it]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [2]:
def check_inference(model, processor, prompts, max_new_tokens=50):
    tokenizer = processor.tokenizer
    bad_words = ["<image>", "<fake_token_around_image>"]
    if len(bad_words) > 0:
        bad_words_ids = tokenizer(bad_words, add_special_tokens=False).input_ids

    eos_token = "</s>"
    eos_token_id = tokenizer.convert_tokens_to_ids(eos_token)

    inputs = processor(prompts, return_tensors="pt").to(device)
    generated_ids = model.generate(**inputs, eos_token_id=[eos_token_id], bad_words_ids=bad_words_ids, max_new_tokens=max_new_tokens, early_stopping=False)
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return generated_text

## Run Inference on 1k_batch

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [12]:
# Load Sample Dataset

data = '../Dataset/1k_batch.csv'
sample_df = pd.read_csv(data)
sample_df = sample_df.sample(frac=1).reset_index(drop=True) # shuffle the batch
sample_df = sample_df.head(10)

Unnamed: 0,painting,art_style,emotion,repetition,emotion_label,pne,Link,utterance
0,henri-matisse_goldfish-1911,Post_Impressionism,contentment,6,2,0,https://uploads6.wikiart.org/images/henri-mati...,the flowers look pretty with the orange color ...
1,august-macke_street-with-church-in-kandern,Expressionism,excitement,5,3,0,https://uploads3.wikiart.org/images/august-mac...,This piece is very Impressionistic and the col...
2,el-greco_marriage-at-cana,Mannerism_Late_Renaissance,excitement,5,3,0,https://uploads4.wikiart.org/images/el-greco/m...,Looks like a fun and fancy party where they ar...
3,michelangelo_study-of-christ-on-the-cross-betw...,High_Renaissance,sadness,6,7,1,https://uploads3.wikiart.org/images/michelange...,The simplicity in the colors of the crussifict...
4,edvard-munch_from-thuringewald-1905,Expressionism,contentment,5,2,0,https://uploads5.wikiart.org/images/edvard-mun...,The contrast of colors reminds me of trips tak...
5,marc-chagall_meditation-1979,Naive_Art_Primitivism,sadness,6,7,1,https://uploads7.wikiart.org/images/marc-chaga...,The dark images are boring and hard to see. He...
6,max-pechstein_modellpause-1925,Expressionism,contentment,10,2,0,https://uploads0.wikiart.org/images/max-pechst...,the figure here seems to be at peace with the ...
7,miriam-schapiro_popova-1992,Cubism,disgust,5,5,1,https://uploads6.wikiart.org/images/miriam-sch...,I hate this picture because the yellow color i...
8,orest-kiprensky_portrait-of-alexander-pushkin-...,Romanticism,contentment,5,2,0,https://uploads2.wikiart.org/images/orest-kipr...,The man has innocent mutton chops and his arms...
9,marc-chagall_blue-horse-with-the-couple-1982,Naive_Art_Primitivism,contentment,5,2,0,https://uploads5.wikiart.org/images/marc-chaga...,The expressions of the characters look childis...


In [14]:
# Iterate through each URL and run inference
results = []
start_time = time.time()

tqdm.pandas()

for index, row in tqdm(sample_df.iterrows(), total=sample_df.shape[0]):
  utterance = row['utterance']
  url = row['Link']
  prompts = [
    [
        "User:",
        url,
        f"{utterance}. Choose one emotion from: Amusement, Awe, Contentment, Excitement, Fear, Sadness, Anger.",
        "\nAssistant:",
    ],
]
  generated_text = check_inference(model, processor, prompts)
  print(generated_text)
  results.append(generated_text)

end_time = time.time()
elapsed_time = end_time - start_time

sample_df['generated_emotion_image'] = results

  0%|          | 0/1000 [02:53<?, ?it/s]


KeyboardInterrupt: 

In [9]:
sample_df.head()

Unnamed: 0,painting,art_style,emotion,repetition,emotion_label,pne,Link,utterance,generated_emotion_image
0,gustave-loiseau_cliffs-of-etretat-1902,Post_Impressionism,contentment,5,2,0,https://uploads8.wikiart.org/images/gustave-lo...,Gorgeous- Like coming home time and time again...,User: the figures are grotesque and animal-lik...
1,edgar-degas_mary-cassatt-at-the-louvre,Impressionism,contentment,5,2,0,https://uploads8.wikiart.org/images/edgar-dega...,I like the shaping of the ladies figures and t...,User: the figures are grotesque and animal-lik...
2,thomas-gainsborough_portrait-of-grace-dalrympl...,Rococo,awe,5,1,0,https://uploads7.wikiart.org/images/thomas-gai...,The woman appears to be lost in thought while ...,User: the figures are grotesque and animal-lik...
3,sandro-botticelli_young-man-greeted-by-seven-l...,Early_Renaissance,contentment,5,2,0,https://uploads8.wikiart.org/images/sandro-bot...,The calming colors as well as the contentment ...,User: the figures are grotesque and animal-lik...
4,pablo-picasso_jacqueline-with-flowers-1954,Cubism,contentment,6,2,0,https://uploads3.wikiart.org/images/pablo-pica...,I love the bold primary colors and the abstrac...,User: the figures are grotesque and animal-lik...


## Clean up the results

In [43]:
def extract_assistant_emotion(text):
    # Find all occurrences of "Assistant: " in the text
    parts = re.split(r'Assistant: ', text)

    # If there are at least two occurrences
    if len(parts) > 1:
        # Extract the part after the second "Assistant: "
        last_assistant_part = parts[-1]

        # Find the first emotion word from the specified list
        emotions = ['Amusement', 'Awe', 'Contentment', 'Excitement', 'Fear', 'Sadness', 'Anger', 'Disgust']
        for emotion in emotions:
            if emotion in last_assistant_part:
                return emotion

    # Return None if no emotion is found
    return None

In [12]:
sample_df['second_assistant_emotion'] = sample_df['generated_emotion_image'].apply(extract_assistant_emotion)

## Evaluation

In [17]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report

In [19]:
sample_df = sample_df.drop('generated_emotion_image',axis=1)

In [26]:
sample_df.rename(columns={'second_assistant_emotion':'generated_emotion_image'}, inplace=True)

In [27]:
sample_df.head()

Unnamed: 0,painting,art_style,emotion,repetition,emotion_label,pne,Link,utterance,generated_emotion_image
0,gustave-loiseau_cliffs-of-etretat-1902,Post_Impressionism,contentment,5,2,0,https://uploads8.wikiart.org/images/gustave-lo...,Gorgeous- Like coming home time and time again...,
1,edgar-degas_mary-cassatt-at-the-louvre,Impressionism,contentment,5,2,0,https://uploads8.wikiart.org/images/edgar-dega...,I like the shaping of the ladies figures and t...,Disgust
2,thomas-gainsborough_portrait-of-grace-dalrympl...,Rococo,awe,5,1,0,https://uploads7.wikiart.org/images/thomas-gai...,The woman appears to be lost in thought while ...,
3,sandro-botticelli_young-man-greeted-by-seven-l...,Early_Renaissance,contentment,5,2,0,https://uploads8.wikiart.org/images/sandro-bot...,The calming colors as well as the contentment ...,Awe
4,pablo-picasso_jacqueline-with-flowers-1954,Cubism,contentment,6,2,0,https://uploads3.wikiart.org/images/pablo-pica...,I love the bold primary colors and the abstrac...,Amusement


In [28]:
sample_df.to_csv('/content/drive/My Drive/EC_Project/data/1k_generation1.csv', index=False)

In [29]:
sample_df = sample_df.dropna(subset=['generated_emotion_image', 'emotion'])

In [31]:
sample_df['generated_emotion_image'] = sample_df['generated_emotion_image'].str.lower()

In [33]:
# Calculate accuracy, F1 score, precision, recall
accuracy = accuracy_score(sample_df['emotion'], sample_df['generated_emotion_image'])
f1 = f1_score(sample_df['emotion'], sample_df['generated_emotion_image'], average='weighted')
precision = precision_score(sample_df['emotion'], sample_df['generated_emotion_image'], average='weighted')
recall = recall_score(sample_df['emotion'], sample_df['generated_emotion_image'], average='weighted')
report = classification_report(sample_df['emotion'], sample_df['generated_emotion_image'])

print(f"Accuracy: {accuracy}")
print(f"F1 Score: {f1}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print("Classification Report:")
print(report)

Accuracy: 0.228486646884273
F1 Score: 0.21218983167778258
Precision: 0.41614024692086754
Recall: 0.228486646884273
Classification Report:
              precision    recall  f1-score   support

   amusement       0.27      0.12      0.17        80
       anger       0.00      0.00      0.00         1
         awe       0.29      0.47      0.36       111
 contentment       0.67      0.06      0.11       242
     disgust       0.07      0.50      0.13        28
  excitement       0.21      0.14      0.17        44
        fear       0.40      0.56      0.47        90
     sadness       0.23      0.10      0.14        78

    accuracy                           0.23       674
   macro avg       0.27      0.24      0.19       674
weighted avg       0.42      0.23      0.21       674

