In [None]:
!pip install wandb

In [None]:
!wandb login

In [None]:
import wandb

wandb.init(
    project="Prompting-Experiments",
    name="Cardd- Pixtral12B - 2",
    group="prompting-experiments",
    tags=["pixtral", "prompting", "cardd"],
    notes="FT",
    config={
        "model": "Pixtral12B - 2",
        "prompting": "FT",
    },
)

In [None]:
import itertools
from inf_util import evaluate_batch
import pandas as pd
from load_dataframe import create_image_caption_dataset
import torch
import time
import os

# Clear GPU cache
torch.cuda.empty_cache()
print("✅ Cleared GPU cache")

# Log initial VRAM usage
print("📊 Initial VRAM Usage:")
print(f"  Reserved: {torch.cuda.memory_reserved() / 1024**3:.2f} GB")
print(f"  Allocated: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")

# Load CarDD dataset
image_folder = "/workspace/Test_data/test_dataset"
captions_json = "/workspace/Test_data/test_set.json"
print("🔄 Loading CarDD dataset...")
test_dataset = create_image_caption_dataset(image_folder, captions_json)
print("Dataset head:")
print(test_dataset.head())

# Select specified images
test_indices = [1, 3, 5, 7, 9, 12, 13, 22, 24, 35, 38]
# Verify indices are valid
if max(test_indices) >= len(test_dataset):
    raise ValueError(f"Test indices {test_indices} exceed dataset size {len(test_dataset)}")

image_details = [(i, test_dataset.iloc[i]["filename"] if "filename" in test_dataset else f"image_{i}") for i in test_indices]
for idx, filename in image_details:
    print(f"Index: {idx}, Filename: {filename}")
with open("selected_images.txt", "w") as f:
    for idx, filename in image_details:
        f.write(f"Index: {idx}, Filename: {filename}\n")
print("✅ Saved selected images to selected_images.txt")

# Define prompt
prompt_list = ["Describe this image"]
prompts_total_list = [prompt_list for _ in range(len(test_indices))]
multiple_refs = False  # CarDD has single reference captions

# Run inference with timing
print("\n🔄 Running Inference...")
torch.cuda.reset_peak_memory_stats()  # Reset peak memory stats
start_time = time.time()
dfs = evaluate_batch(prompts_total_list, test_dataset, test_indices, multiple_refs)
end_time = time.time()
total_inference_time = end_time - start_time
avg_inference_time = total_inference_time / len(test_indices) if test_indices else 0

# Log inference time
print(f"\n⏱️ Inference Time:")
print(f"  Total: {total_inference_time:.2f} seconds")
print(f"  Average per image: {avg_inference_time:.2f} seconds")

# Log final VRAM usage
print(f"\n📊 Final VRAM Usage:")
print(f"  Reserved: {torch.cuda.memory_reserved() / 1024**3:.2f} GB")
print(f"  Peak Allocated: {torch.cuda.max_memory_allocated() / 1024**3:.2f} GB")

# Report results
print(f"\nResults for Run (Prompt: 'Describe this image') ")
for i, df in enumerate(dfs, start=1):
    # Ensure generated is a string in DataFrame
    df['generated'] = df['generated'].apply(lambda x: x[0] if isinstance(x, list) else x)
    avg_cider = df["CIDEr"].mean() if "CIDEr" in df else 0.0
    avg_meteor = df["METEOR"].mean() if "METEOR" in df else 0.0
    avg_cos_sim = df["semantic_similarity"].mean() if "semantic_similarity" in df else 0.0
    avg_spice = df["SPICE"].mean() if "SPICE" in df else 0.0
    avg_inf_time = df["inference_time"].mean()
    avg_vram_reserved = df["vram_reserved_gb"].mean()
    avg_vram_allocated = df["vram_allocated_gb"].mean()
    print(f"  Average CIDEr: {avg_cider:.4f}")
    print(f"  Average METEOR: {avg_meteor:.4f}")
    print(f"  Average Semantic Similarity: {avg_cos_sim:.4f}")
    print(f"  Average SPICE: {avg_spice:.4f}")
    print(f"  Average Inference Time: {avg_inf_time:.2f} seconds")
    print(f"  Average VRAM Reserved: {avg_vram_reserved:.2f} GB")
    print(f"  Average VRAM Allocated: {avg_vram_allocated:.2f} GB")
    # Log captions and metrics for output verification
    print("  Sample Captions and Metrics:")
    for idx, row in df.iterrows():
        original_idx = int(row['sample_index'])
        print(f"    Index {original_idx}: Generated: '{row['generated']}', Reference: '{row['reference_captions']}', "
              f"Inference Time: {row['inference_time']:.2f} s, VRAM Reserved: {row['vram_reserved_gb']:.2f} GB, "
              f"VRAM Allocated: {row['vram_allocated_gb']:.2f} GB")

# Save results
dfs[0].to_csv("evaluation_results_run1.csv", index=False)
print("✅ Saved evaluation results to evaluation_results_run1.csv")

# Initialize wandb table
table = wandb.Table(columns=["sample_index", "reference", "generated", "semantic_similarity", "spice", "cider"])

# Log results to wandb
for idx, row in dfs[0].iterrows():
    table.add_data(
        str(int(row['sample_index'])),
        row['reference_captions'],
        row['generated'],
        row['semantic_similarity'] if 'semantic_similarity' in row else 0.0,
        row['SPICE'] if 'SPICE' in row else 0.0,
        row['CIDEr'] if 'CIDEr' in row else 0.0
    )

# Log the table to wandb
wandb.log({"Results": table})

# Finish wandb run
wandb.finish()