In [1]:
import os
import time
from tqdm import tqdm
from image_generator import LlavaGenerator
from utils import load_data, save_batch_output, log_time
from tqdm.auto import tqdm

In [2]:
# Config
BATCH_SIZE = 10
INPUT_CSV = "../Data/VisDoM-main/spiqa/spiqa.csv"  # must contain 'question', 'caption', 'doc_id', 'reference_figure'
IMAGE_FOLDER = "../Data/spiqa/test-A/SPIQA_testA_Images/SPIQA_testA_Images"     # where all your images live
OUTPUT_DIR = "../Eval_outputs/SPIQA/vision_only"
os.makedirs(OUTPUT_DIR, exist_ok=True)


In [6]:
BASE_URL = os.getenv('LOCAL_URL')

In [7]:
API_KEY = "lm-studio"

In [8]:
# Vision RAG prompt
vision_prompt = """<image>

Use the image provided to answer the question as accurately as possible.

Question: <question>

Answer:"""

In [9]:
# Init generator
generator = LlavaGenerator(base_url=BASE_URL,api_key=API_KEY,prompt_template=vision_prompt)
df = load_data(INPUT_CSV)

In [10]:
# Detect completed batches
existing_batches = {
    int(f.split("_")[1])
    for f in os.listdir(OUTPUT_DIR)
    if f.startswith("batch_") and f.endswith(".csv")
}
existing_batches

{0,
 10,
 20,
 30,
 40,
 50,
 60,
 70,
 80,
 90,
 100,
 110,
 120,
 130,
 140,
 150,
 160,
 170,
 180,
 190,
 200,
 210,
 220,
 230,
 240,
 250,
 260,
 270,
 280}

In [13]:
# Batch processing
for i in tqdm(range(0, len(df), BATCH_SIZE)):
    if i in existing_batches:
        print(f"Skipping batch {i} (already processed)")
        continue

    batch_df = df.iloc[i:i + BATCH_SIZE]
    results = []
    start = time.time()

    for idx, row in tqdm(batch_df.iterrows(), total=len(batch_df), desc=f"Batch {i}"):
        try:
            image_path = os.path.join(IMAGE_FOLDER, row["doc_id"], row["reference_figure"])
            question = row["old_question"]
    #         # caption = row["caption"]
            answer = generator.generate_answer(image_path, 
                                            #    caption, 
                                               question)

            results.append({
                "index": idx,
                "question": question,
                # "caption": caption,
                "image": image_path,
                "response": answer
            })
        except Exception as e:
            print(f"Error processing row {idx}: {e}")
            continue
    # results = {}

    duration = time.time() - start
    save_batch_output(results, OUTPUT_DIR, i)
    log_time(OUTPUT_DIR, i, duration, prefix="vision_only")

  0%|          | 0/59 [00:00<?, ?it/s]

Skipping batch 0 (already processed)
Skipping batch 10 (already processed)
Skipping batch 20 (already processed)
Skipping batch 30 (already processed)
Skipping batch 40 (already processed)
Skipping batch 50 (already processed)
Skipping batch 60 (already processed)
Skipping batch 70 (already processed)
Skipping batch 80 (already processed)
Skipping batch 90 (already processed)
Skipping batch 100 (already processed)
Skipping batch 110 (already processed)
Skipping batch 120 (already processed)
Skipping batch 130 (already processed)
Skipping batch 140 (already processed)
Skipping batch 150 (already processed)
Skipping batch 160 (already processed)
Skipping batch 170 (already processed)
Skipping batch 180 (already processed)
Skipping batch 190 (already processed)
Skipping batch 200 (already processed)
Skipping batch 210 (already processed)
Skipping batch 220 (already processed)
Skipping batch 230 (already processed)
Skipping batch 240 (already processed)
Skipping batch 250 (already processe

Batch 290:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_290_20250422_090943.csv


Batch 300:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_300_20250422_092112.csv


Batch 310:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_310_20250422_093259.csv


Batch 320:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_320_20250422_094521.csv


Batch 330:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_330_20250422_095724.csv


Batch 340:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_340_20250422_100544.csv


Batch 350:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_350_20250422_101347.csv


Batch 360:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_360_20250422_102247.csv


Batch 370:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_370_20250422_103158.csv


Batch 380:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_380_20250422_104005.csv


Batch 390:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_390_20250422_104741.csv


Batch 400:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_400_20250422_105552.csv


Batch 410:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_410_20250422_110355.csv


Batch 420:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_420_20250422_111210.csv


Batch 430:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_430_20250422_112156.csv


Batch 440:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_440_20250422_113205.csv


Batch 450:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_450_20250422_114028.csv


Batch 460:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_460_20250422_114917.csv


Batch 470:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_470_20250422_115713.csv


Batch 480:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_480_20250422_120522.csv


Batch 490:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_490_20250422_121431.csv


Batch 500:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_500_20250422_122327.csv


Batch 510:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_510_20250422_123337.csv


Batch 520:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_520_20250422_124910.csv


Batch 530:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_530_20250422_132534.csv


Batch 540:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_540_20250422_133702.csv


Batch 550:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_550_20250422_134707.csv


Batch 560:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_560_20250422_135513.csv


Batch 570:   0%|          | 0/10 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_570_20250422_140454.csv


Batch 580:   0%|          | 0/6 [00:00<?, ?it/s]

File saved to:  ../Eval_outputs/SPIQA/vision_only/batch_580_20250422_141201.csv
