In [2]:
import torch
from torch import nn
device_count = torch.cuda.device_count()
print(f"Number of GPUs available: {device_count}")

Number of GPUs available: 2


In [3]:
import json
from transformers import pipeline, AutoTokenizer, LlavaForConditionalGeneration, AutoProcessor, AutoModelForImageTextToText
from PIL import Image
import accelerate

In [4]:
# Function to load the pipeline
def get_pipeline():
    return pipeline(task="image-text-to-text", model="bczhou/tiny-llava-v1-hf", device_map="auto")

In [5]:
def format_input(image_path, question):
    return f"<image> {image_path} </image>\n<user> {question} </user>\n<assistant>"

In [6]:
# Load JSON file
input_file = "/home/g2/ChartQA/ChartQA Dataset/test/test_human.json"  # Replace with your input JSON file path
output_file = "output.json"  # File to save generated answers

In [7]:
from transformers import pipeline

pipe = pipeline(task="image-text-to-text", model="Salesforce/blip-image-captioning-base")
pipe("https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png", text="A photo of")

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.50, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
Device set to use cuda:0


[{'input_text': 'A photo of', 'generated_text': 'A photo of two birds'}]

In [15]:
import torch
from transformers import pipeline, AutoTokenizer
from PIL import Image
import json

# Function to load the pipeline for MiniLava
def get_mini_lava_pipeline():
    return pipeline(task="image-text-to-text", model="bczhou/mini-lava-v1-hf", device_map="auto")

# Initialize pipeline and tracking variables
tokenizer = AutoTokenizer.from_pretrained("bczhou/mini-lava-v1-hf")  # Change to MiniLava model
pipe = get_mini_lava_pipeline()
previous_file_name = None

# Load JSON file
input_file = "/home/g2/ChartQA/ChartQA Dataset/test/test_human.json"  # Replace with your input JSON file path
output_file = "output.json"  # File to save generated answers

with open(input_file, "r") as f:
    data = json.load(f)

# List to store results
results = []

# Process each entry in the JSON file
for entry in data:
    file_name = entry["imgname"]
    file_path = "/home/g2/ChartQA/ChartQA Dataset/test/png/{}".format(file_name)
    question = entry["query"]
    expected_answer = entry["label"]

    print(type(question))

    # Reload pipeline if the file name changes
    if file_name != previous_file_name:
        print(f"Loading new image: {file_name}")
        pipe = get_mini_lava_pipeline()  # Reload MiniLava pipeline
        previous_file_name = file_name  # Update tracking variable

    # Open the image and verify it's valid
    try:
        image = Image.open(file_path)
        width, height = image.size
        # Validate image dimensions (width and height must not be None)
        if width is None or height is None:
            print(f"Invalid image dimensions for {file_path}")
            continue  # Skip this image if dimensions are invalid
    except (IOError, SyntaxError) as e:
        print(f"Error loading image {file_path}: {e}")
        continue  # Skip this image and move to the next one

    # Tie weights before using the model for inference (if required by MiniLava)
    try:
        model = pipe.model
        model.tie_weights()  # Tie the weights explicitly before inference

        # Use mixed precision to save memory
        with torch.no_grad():
            # Pass the image to the pipeline
            output = pipe(image, question)

        # Extract the generated answer
        generated_answer = output if isinstance(output, str) else output[0]["generated_text"]

        # Store the result
        results.append({
            "imgname": file_name,
            "query": question,
            "label": expected_answer,
            "generated_answer": generated_answer
        })

        # Print results
        print(f"Question: {question}")
        print(f"Expected Answer: {expected_answer}")
        print(f"Generated Answer: {generated_answer}")
        print("-" * 50)

    except Exception as e:
        print(f"Error during pipeline processing for image {file_name}: {e}")
        continue  # Skip this image

    # Clear memory after each inference
    torch.cuda.empty_cache()

# Save results to output JSON
with open(output_file, "w") as f:
    json.dump(results, f, indent=4)

print(f"Results saved to {output_file}")


OSError: bczhou/mini-lava-v1-hf is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`