In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#  %%capture
!pip install datasets
!pip install accelerate
!pip install xlsxwriter

In [None]:
import random
from datasets import load_dataset

dataset = load_dataset("HuggingFaceM4/A-OKVQA")

In [None]:
dataset

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

def imshow(img, ax=None, caption=""):
    if ax is None:
        fig, ax = plt.subplots()
    if isinstance(img, Image.Image):
        img = np.array(img)
    # Ensure the image data type is uint8 or float32
    if img.dtype != np.uint8 and img.dtype != np.float32:
        img = img.astype(np.uint8)
    # Expand dimensions if necessary
    if img.ndim == 2:
        img = np.expand_dims(img, axis=-1)
    ax.imshow(img)
    ax.set_title(caption)
    ax.axis('off')
    return ax

def plot_images_with_captions(images, captions):
    fig, axes = plt.subplots(nrows=len(images), ncols=1, figsize=(10, 10))
    # If only one image, axes may not be a list
    if len(images) == 1:
        axes = [axes]
    for i, (img, caption) in enumerate(zip(images, captions)):
        ax = imshow(img, axes[i])
        ax.set_title(caption)
    plt.show()

# Load images properly
images = [np.array(dataset['train'][i]['image']) for i in range(2)]
captions = [dataset['train'][i]['question'] for i in range(2)]
plot_images_with_captions(images, captions)


# Load model 🏋

In [None]:
import torch
# Load model directly
from transformers import AutoProcessor, AutoModelForPreTraining, LlavaForConditionalGeneration

MODEL_ID = "llava-hf/llava-1.5-7b-hf"
model = LlavaForConditionalGeneration.from_pretrained("llava-hf/llava-1.5-7b-hf")
processor = AutoProcessor.from_pretrained("llava-hf/llava-1.5-7b-hf")

model.to("cuda")

In [None]:
# prompt: get total model parameters in human readable format

total_params = sum(p.numel() for p in model.parameters())
human_readable_params = f"{total_params / 1e6:.2f}M"
print(f"Total Parameters: {human_readable_params}")


# running inference on A-OKVQA 🏃

In [None]:
from typing import List, Dict, Any

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
def inference(image, question, mode = "qa", hyperparams = None, max_new_tokens=40):
  # inputs = processor(images=image, text=make_prompt({'question': question}), return_tensors="pt").to("cuda", torch.float16)
  inputs = processor(text=question, images=image, return_tensors="pt").to("cuda")#, torch.float16)
  if mode == "qa":
      outputs = model.generate(**inputs,
                               num_beams=5,
                               length_penalty=-1,
                               max_new_tokens=max_new_tokens)
                              #  max_length=max_new_tokens)
  elif mode == "rationale":
      outputs = model.generate(**inputs,
                               num_beams=5,
                               length_penalty=1.1, # choose from [1, 1.5, 2]
                               max_new_tokens=max_new_tokens,
                               )
                            #  max_length=max_new_tokens,
                            #  )

  # Decode and print the answer
  answer = processor.decode(outputs[0], skip_special_tokens=True)
  return answer

def make_prompt(x):
  """made so we can do some preprocessing editing to the question. left for the future"""
  return f"{x['question']} Choices: {', '.join(x['choices'])}"


# Helper methods

# Generating Sheets for Annotation

In [None]:
import pandas as pd
from tqdm.auto import tqdm
from PIL import Image
import numpy as np

RES_PATH = "/content/drive/MyDrive/<hidden>/results/"
SAVE_PATH = "/content/drive/MyDrive/<hidden>/img"

import os
if not os.path.exists(RES_PATH):
    os.makedirs(RES_PATH)
    os.makedirs(RES_PATH + "img\\")
    os.makedirs(RES_PATH + "result\\")

meta_prompt =  "Please explain the reasoning behind your answer?"
N_SAMPLES = 250 # -1
N = len(dataset['train'])

results = []

for idx, x in enumerate(tqdm(dataset['train'], total = N_SAMPLES)):
  if idx >= N_SAMPLES:
    break

  # save image to make sheet
  image = x['image']

  # Show the image
  plt.imshow(image)
  plt.axis('off')
  plt.show()

  # save image
  image.save(f'{SAVE_PATH}/{idx}.jpg')

  # run inference
  question = make_prompt(x) #f"{x['question']} Choices: {str(x['choices'])}"
  print(question)
  qa_question = f"<image>\nUSER:Question: {question}.\nASSISTANT:"
  qa_answer = inference(image, qa_question, mode = "qa")
  qa_answer = qa_answer[len(qa_question)-6:]
  print("QA_ANSWER:")
  print(qa_answer)
  ra_question = f"<image>\nUSER:Question: {question}. Answer: {qa_answer}. {meta_prompt}\nASSISTANT:"
  answer = inference(image, ra_question, mode="rationale", max_new_tokens=100)
  answer = answer[len(ra_question)-6:]
  print()
  print(f"question: {ra_question}\nrationale: {answer}")

  results.append({
      'question': question,
      'correct_answer' : x['choices'][x['correct_choice_idx']],
      'predicted_answer' : qa_answer,
      'is_correct': 1 if qa_answer.lower() == x['choices'][x['correct_choice_idx']].lower() else 0,
      'groundtruth_rationale' : x['rationales'],
      'direct_answer' : x['direct_answers'],
      'rationale_prompt' : ra_question,
      'generated_rationale' : answer,
      'image_path': f'{SAVE_PATH}/{idx}.jpg',
  })

  # print('-'*100)




In [None]:
RES_NAME = f"test.xlsx"
df = pd.DataFrame(results)
writer = pd.ExcelWriter(RES_PATH + RES_NAME, engine='xlsxwriter')
# Convert the dataframe to an XlsxWriter Excel object.
df.to_excel(writer, sheet_name='Sheet1')

# Get the xlsxwriter workbook and worksheet objects.
workbook  = writer.book
worksheet = writer.sheets['Sheet1']

# worksheet.insert_image('R1', f'/content/drive/MyDrive/<hidden>/img/{idx}.jpg')

# # Close the Pandas Excel writer and output the Excel file.
# writer.save()

In [None]:
import xlsxwriter

RES_NAME = f"{MODEL_ID.split('/')[-1].strip()}_inference.xlsx"
df = pd.DataFrame(results)
print(RES_PATH, RES_NAME)

with pd.ExcelWriter(RES_PATH + RES_NAME, engine='xlsxwriter') as writer:
    # Write the DataFrame to the worksheet, excluding the 'ImagePath' column if you don't want it in the Excel file.
    df.to_excel(writer, sheet_name='Sheet1', index=False)

    # Access the XlsxWriter workbook and worksheet objects from the DataFrame.
    workbook = writer.book
    worksheet = writer.sheets['Sheet1']

    # Assuming 'ImagePath' is the name of the column containing the image paths.
    # Iterate over the DataFrame to insert images.
    for index, data in df.iterrows():
        # The cell where to insert the image is in column 'R', on the row corresponding to the DataFrame's index + 2
        # (because DataFrame's index starts at 0 and Excel rows start at 1, and there's a header row).
        cell = f'R{index + 2}'
        image_path = data['image_path']

        # Insert the image.
        worksheet.insert_image(cell, image_path)
        # worksheet.insert_image(cell, image_path, {'x_scale': 0.5, 'y_scale': 0.5})
