In [2]:
from transformers import AutoProcessor, AutoModelForCausalLM
import requests
from PIL import Image
import pandas as pd
from tqdm import tqdm

# Load the processor and model
processor = AutoProcessor.from_pretrained("microsoft/git-base-coco")
model = AutoModelForCausalLM.from_pretrained("microsoft/git-base-coco")

# Read the CSV file
csv_file = 'sampled_products_5_each_img_front.csv'  # Adjust to your file path
df = pd.read_csv(csv_file)

# Column name containing the URLs
url_column = 'ItemDocumentValue'

# List to store generated captions
captions = []

# Process each image URL with a progress bar
for url in tqdm(df[url_column], desc="Processing images"):
    try:
        # Load and process the image
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
        response = requests.get(url, headers=headers, stream=True)
        response.raise_for_status()  # Ensure the request was successful
        image = Image.open(response.raw)
        pixel_values = processor(images=image, return_tensors="pt").pixel_values

        # Generate caption
        generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
        generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

        # Append the caption to the list
        captions.append(generated_caption)
    except Exception as e:
        # Handle any exceptions
        captions.append(f"Error processing image: {e}")

# Create a new DataFrame with only the desired columns and captions
output_df = pd.DataFrame({
    'ItemSku': df['ItemSku'],
    'ItemDocumentValue': df['ItemDocumentValue'],
    'generated_captions': captions
})

# Save the new DataFrame to a CSV file
output_df.to_csv('MS-git_coco_output_captions.csv', index=False)

Processing images: 100%|███████████████████████████████████████████████████████████████| 40/40 [01:24<00:00,  2.11s/it]
