In [None]:
! pip install openai pandas scikit-learn nltk rouge-score

In [2]:
import pandas as pd
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer
from sklearn.metrics import accuracy_score

In [3]:
import os

# Dataset Loading

In [4]:
df = pd.read_csv('../../../../Data/VisDoM-main/spiqa/spiqa.csv')

# Helper Functions

In [5]:
# Evaluation metrics
def compute_metrics(pred, true):
    rouge = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
    rouge_score = rouge.score(true, pred)['rougeL'].fmeasure

    bleu_score = sentence_bleu([true.split()], pred.split())
    exact_match = int(pred.strip().lower() == true.strip().lower())
    
    return {"bleu": bleu_score, "rougeL": rouge_score, "exact_match": exact_match}


# Runnning Answer Generation

## Text only

### Generation Function

In [None]:
from openai import OpenAI

client = OpenAI()

def generate_answer_mistral(question, caption):
    prompt = f"""Caption: {caption}

Please provide a brief and accurate answer to the following question based on the above caption.

Question: {question}

Answer:"""

    response = client.chat.completions.create(
        model="mistral",  # make sure this matches your LM Studio model name
        messages=[{"role": "user", "content": prompt}],
        temperature=0.7,
        max_tokens=512,
    )
    
    return response.choices[0].message.content.strip()

# 🔍 Example
caption = "Figure 4. Visualizations of the preferred inputs for different class units on layer fc8..."
question = "How many hyperparameter combinations were used for the random hyperparameter search?"

print(generate_answer_mistral(question, caption))



### Evaluation function

In [7]:
def evaluate_text_only_spiqa(df):
    results = []
    for idx, row in df.iterrows():
        question = row['question']
        caption = row['caption']  # assumes pre-extracted caption
        true_answer = row['answer']

        pred_answer = generate_answer_mistral(question, caption)
        metrics = compute_metrics(pred_answer, true_answer)

        results.append({
            "q_id": row['q_id'],
            "question": question,
            "true_answer": true_answer,
            "generated_answer": pred_answer,
            **metrics
        })

    return pd.DataFrame(results)

### Simple Example Run

In [None]:
caption = "Figure 4. Visualizations of the preferred inputs for different class units on layer fc8..."
question = "How many hyperparameter combinations were used for the random hyperparameter search?"

print(generate_answer_mistral(question, caption))

In [None]:
example = {
    "question": "How many hyperparameter combinations were used for the random hyperparameter search?",
    "answer": "300 sets of possible hyperparameter combinations then choose four of them that complement each other well.",
    "caption": "To pick a reasonable set of hyperparameters for all methods at once, we ran a random hyperparameter search of 300 possible combinations and settled on four that complement each other well. The four selected combinations are listed in Table 1..."
}

# Run
question = example["question"]
caption = example["caption"]
true_answer = example["answer"]

generated = generate_answer_mistral(question, caption)
metrics = compute_metrics(generated, true_answer)

print("Q:", question)
print("Generated:", generated)
print("True:", true_answer)
print("Metrics:", metrics)

In [None]:
row = df.iloc[0]
image_path = os.path.join("/your/images/folder", row["reference_figure"])
caption = row["caption"]
question = row["question"]
true_answer = row["answer"]

# LLaVA example
# gen_answer = generate_llava_answer(image_path, caption, question)

# Mistral example
gen_answer = generate_answer_mistral(caption, question)

print("Question:", question)
print("Generated:", gen_answer)
print("Gold:", true_answer)


### Batch based Running the Code

In [13]:
import pandas as pd
import os
from openai import OpenAI
from tqdm.auto import tqdm

# === Config ===
CSV_PATH = "../../../../Data/VisDoM-main/spiqa/spiqa.csv"
# IMAGE_FOLDER = "/path/to/your/image/folder"  # not used here, but kept for consistency
RESULTS_PATH = "../../../../Data/spiqa_eval/mistral_only"
BATCH_SIZE = 10

# === Connect to LM Studio ===
client = OpenAI(base_url="http://127.0.0.1:1235/v1", api_key="lm-studio")

In [11]:
# === Mistral generation function ===
def generate_mistral_answer(caption, question):
    prompt = f"Caption: {caption}\n\nPlease answer the following question based on the caption above:\nQuestion: {question}\n\nAnswer:"
    try:
        response = client.chat.completions.create(
            model="mistral",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.7,
            max_tokens=512,
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"[Error] {str(e)}"

In [12]:
START_INDEX = 0  # Change this for each run: 0, 10, 20, etc.

In [None]:
# === Ensure output folder exists ===
os.makedirs(RESULTS_PATH, exist_ok=True)
end_index = START_INDEX + BATCH_SIZE
batch_file = os.path.join(RESULTS_PATH, f"mistral_generated_batch_{START_INDEX}_{end_index}.csv")

In [14]:

# === Load the dataset and select the batch ===
df = pd.read_csv(CSV_PATH)
batch_df = df.iloc[START_INDEX:end_index]

# === Process each row in the batch ===
results = []
for idx, row in tqdm(batch_df.iterrows()):
    generated = generate_mistral_answer(row["caption"], row["question"])
    results.append({
        "q_id": row["q_id"],
        "doc_id": row["doc_id"],
        "question": row["question"],
        "true_answer": row["answer"],
        "generated_answer": generated,
        "reference_figure": row["reference_figure"],
        "caption": row["caption"]
    })

# === Save batch results ===
pd.DataFrame(results).to_csv(batch_file, index=False)
print(f"✅ Mistral batch saved to {batch_file}")


0it [00:00, ?it/s]

✅ Mistral batch saved to ../../../../Data/spiqa_eval/mistral_only\mistral_generated_batch_0_10.csv


mistral only 1st batch 

## Vision Model Function and Prompt

### Generation Function

In [8]:
import base64

def image_to_base64(image_path, size=(336, 336)):
    with Image.open(image_path) as img:
        img = img.resize(size)
        return base64.b64encode(img.tobytes()).decode("utf-8")


In [9]:
from openai import OpenAI
import base64

client = OpenAI(base_url="http://127.0.0.1:1235/v1", api_key="lm-studio")

def generate_answer_llava(image_path, caption, question, prompt_template):
    prompt = prompt_template.replace("<caption>", caption).replace("<question>", question)

    # prompt = f"<image>\nCaption: {caption} Please provide a brief answer to the following question after looking into the input image and caption.\nQuestion: {question}.\nASSISTANT:"

    # Encode image to base64
    with open(image_path, "rb") as f:
        image_base64 = base64.b64encode(f.read()).decode("utf-8")

    messages = [{
        "role": "user",
        "content": [
            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_base64}"}},
            {"type": "text", "text": prompt}
        ]
    }]
    
    response = client.chat.completions.create(
        model="llava-v1.5-7b@q5_k_m",  # replace with your model name
        messages=messages,
        temperature=0.7,
        max_tokens=512
    )

    return response.choices[0].message.content.strip()

### Evaluation Function

In [10]:
# Evaluation loop
def evaluate_generation(df, image_base_path):
    results = []
    for idx, row in df.iterrows():
        question = row['question']
        true_answer = row['answer']
        fig_ids = row.get('reference_figure', [])

        # Full paths to the referenced images
        image_paths = [f"{image_base_path}/{fig}" for fig in fig_ids if isinstance(fig, str)]

        # Generate using LLaVA
        pred_answer = generate_answer_llava(question, image_paths)
        
        metrics = compute_metrics(pred_answer, true_answer)
        results.append({
            "q_id": row['q_id'],
            "question": question,
            "true_answer": true_answer,
            "generated_answer": pred_answer,
            **metrics
        })
    
    return pd.DataFrame(results)

### Simple Example Run

In [None]:
IMG_FOLDER_PATH = "../../../../Data/spiqa/test-B/Images/SPIQA_testB_Images/SPIQA_testB_Images/"

In [23]:
caption = "Figure 4. Visualizations of the preferred inputs for different class units on layer fc8..."
question = "How many hyperparameter combinations were used for the random hyperparameter search?"
image_path = IMG_FOLDER_PATH+"1b5a24639fa80056d1a17b15f6997d10e76cc731/7-Figure4-1.png"

_PROMPT = """<image>\n Caption: <caption> Please provide a brief answer to the following question after looking into the input image and caption. Question: <question>.\nASSISTANT:"""

answer = generate_answer_llava(image_path, caption, question, _PROMPT)
print("Answer:", answer)


Answer: The text in the image is not clear enough to discern the number of hyperparameter combinations that were used for the random hyperparameter search.


In [19]:
image_path = IMG_FOLDER_PATH+"1b5a24639fa80056d1a17b15f6997d10e76cc731/7-Figure4-1.png"
caption = "Figure 4. Visualizations of the preferred inputs for different class units on layer fc8..."
question = "How many hyperparameter combinations were used for the random hyperparameter search?"

print(generate_answer_llava(image_path, caption, question))

25


In [31]:
row = df.iloc[0]
image_path = os.path.join("../../../../Data/spiqa/test-A/SPIQA_testA_Images/SPIQA_testA_Images",row['doc_id'], row["reference_figure"])
caption = row["caption"]
question = row["question"]
true_answer = row["answer"]

# LLaVA example
gen_answer = generate_answer_llava(image_path, caption, question, _PROMPT)

# Mistral example
# gen_answer = generate_mistral_answer(caption, question)

print("Question:", question)
print("Generated:", gen_answer)
print("Gold:", true_answer)


Question:  

What is the difference between the original and pre-processed SMD Navigate data? 
Generated: The original SMD Navigate data contains unstructured textual information, which is difficult to process and analyze. On the other hand, the pre-processed SMD Navigate data has been transformed into a structured format that can be easily understood and analyzed. This transformation involves steps such as tokenization, lemmatization, stemming or stopword removal, and converting the text into numerical vectors suitable for machine learning algorithms. By doing so, the pre-processed data becomes more accessible and easier to work with, which facilitates tasks like classification or clustering of the textual content in the dataset.
Gold:  

The pre-processed SMD Navigate data combines all the properties (such as distance, address) of a point of interest (POI) into a single subject with the object being "poi". The original data had separate entries for each property. 


### Total Answer Generation for -SPIQA - ViSDOMRAG

In [None]:
# Load modified SPIQA DataFrame here:
df = pd.read_csv("../../../../Data/VisDoM-main/spiqa/spiqa.csv")  # or .json, or load however you have it

_PROMPT = """<image>\n Caption: <caption> Please provide a brief answer to the following question after looking into the input image and caption. Question: <question>.\nASSISTANT:"""

# Path to image folder
image_folder = "../../../../Data/spiqa/test-A/SPIQA_testA_Images/SPIQA_testA_Images"

# Evaluate all rows
results = []
for idx, row in df.iterrows():
    image_path = os.path.join(image_folder,row['doc_id'], row["reference_figure"])
    generated = generate_answer_llava(image_path, row["caption"], row["question"],_PROMPT)
    results.append({
        "q_id": row["q_id"],
        "doc_id": row["doc_id"],
        "question": row["question"],
        "true_answer": row["answer"],
        "generated_answer": generated,
        "reference_figure": row["reference_figure"],
        "caption": row["caption"]
    })

# Save to CSV or further process
results_df = pd.DataFrame(results)
results_df.to_csv("llava_generated_answers_spiqa_visdom_vision.csv", index=False)


: 

### Batchwise Generation for LLava SpiQA

In [5]:
from tqdm.auto import tqdm

In [1]:
import pandas as pd
import os
import base64
from openai import OpenAI

client = OpenAI(base_url="http://127.0.0.1:1235/v1", api_key="lm-studio")

# === Config ===
CSV_PATH = "../../../../Data/VisDoM-main/spiqa/spiqa.csv"
RESULTS_PATH = "../../../../Data/spiqa_eval/llava_only"
IMAGE_FOLDER = "../../../../Data/spiqa/test-A/SPIQA_testA_Images/SPIQA_testA_Images"


In [2]:

# === Prompt ===
_PROMPT = """<image>\n Caption: <caption> Please provide a brief answer to the following question after looking into the input image and caption. Question: <question>.\nASSISTANT:"""

# === Generation Function ===
def generate_answer_llava(image_path, caption, question, prompt_template):
    prompt = prompt_template.replace("<caption>", caption).replace("<question>", question)
    try:
        with open(image_path, "rb") as f:
            image_b64 = base64.b64encode(f.read()).decode("utf-8")
    except FileNotFoundError:
        return "[Image not found]"

    try:
        response = client.chat.completions.create(
            model="llava",
            messages=[{
                "role": "user",
                "content": [
                    {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_b64}"}},
                    {"type": "text", "text": prompt}
                ]
            }],
            temperature=0.7,
            max_tokens=512,
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"[Error] {str(e)}"


In [3]:
BATCH_SIZE = 10
START_INDEX = 0  # Change this for each run: 0, 10, 20...


In [6]:

# === Load Data ===
df = pd.read_csv(CSV_PATH)
end_index = min(START_INDEX + BATCH_SIZE, len(df))
batch_df = df.iloc[START_INDEX:end_index]

# === Process Batch ===
results = []
for idx, row in tqdm(batch_df.iterrows()):
    
    image_path = os.path.join(IMAGE_FOLDER, row['doc_id'], row["reference_figure"])
    generated = generate_answer_llava(image_path, row["caption"], row["question"], _PROMPT)
    results.append({
        "q_id": row["q_id"],
        "doc_id": row["doc_id"],
        "question": row["question"],
        "true_answer": row["answer"],
        "generated_answer": generated,
        "reference_figure": row["reference_figure"],
        "caption": row["caption"]
    })


0it [00:00, ?it/s]

In [7]:

# === Save Batch ===
os.makedirs(RESULTS_PATH, exist_ok=True)
batch_file = os.path.join(RESULTS_PATH,f"llava_generated_batch_{START_INDEX}_{end_index}.csv")
pd.DataFrame(results).to_csv(batch_file, index=False)
print(f"✅ Batch saved to {batch_file}")


✅ Batch saved to ../../../../Data/spiqa_eval/llava_only\llava_generated_batch_0_10.csv


In [None]:
# 1st batch  - 82m 35s - llava only