In [8]:
import os
import io
import torch
import boto3
import sagemaker
import pandas as pd
import datetime as dt
from PIL import Image
from io import BytesIO
from transformers import pipeline
from transformers import BlipProcessor, BlipForConditionalGeneration

In [2]:

s3 = boto3.client('s3')

bucket_name = "sagemaker-studio-619071335465-8h7owh9eftx"
main_image_dir = 'image datasets/'


def get_all_images(bucket, prefix):
    continuation_token = None
    image_keys = []

    while True:
        list_params = {
            'Bucket': bucket,
            'Prefix': prefix,
        }
        if continuation_token:
            list_params['ContinuationToken'] = continuation_token

        response = s3.list_objects_v2(**list_params)

        if 'Contents' in response:
            for obj in response['Contents']:
                key = obj['Key']
                if key.endswith('.jpg'):
                    image_keys.append(key)

        if not response.get('IsTruncated'):
            break

        continuation_token = response.get('NextContinuationToken')

    return image_keys


image_paths = get_all_images(bucket_name, main_image_dir)


if len(image_paths) > 0:
    print(f"Total images found: {len(image_paths)}")
    print("Example image paths:")
    for path in image_paths[:5]:
        print(path)

Total images found: 5807
Example image paths:
image datasets/fastfood images/Fastfood Images/Highly Rated/-0CTxYw82SWnJfzPOBBIOQ.jpg
image datasets/fastfood images/Fastfood Images/Highly Rated/-0fa0mOVKrJW90MFFxVImg.jpg
image datasets/fastfood images/Fastfood Images/Highly Rated/-4PTjFxdyR-tkxDhVeuAfQ.jpg
image datasets/fastfood images/Fastfood Images/Highly Rated/-6wM47iMcw_wjW3gZYaz-g.jpg
image datasets/fastfood images/Fastfood Images/Highly Rated/-7Z1mIroHNK6IJKHMLfnJg.jpg


In [10]:
import time


models_to_test = [
    {"type": "pipeline", "model": "nlpconnect/vit-gpt2-image-captioning"},
    {"type": "blip", "model": "Salesforce/blip-image-captioning-large"},
    {"type": "blip", "model": "Salesforce/blip-image-captioning-base"}
]


loaded_models = []

for model_info in models_to_test:
    if model_info["type"] == "pipeline":
        loaded_models.append({
            "type": "pipeline",
            "model_name": model_info["model"],
            "model": pipeline("image-to-text", model=model_info["model"])
        })
    elif model_info["type"] == "blip":
        processor = BlipProcessor.from_pretrained(model_info["model"])
        model = BlipForConditionalGeneration.from_pretrained(model_info["model"]).to("cuda")
        loaded_models.append({
            "type": "blip",
            "model_name": model_info["model"],
            "processor": processor,
            "model": model
        })


results = []


def load_image_from_s3(s3_key):
    response = s3.get_object(Bucket=bucket_name, Key=s3_key)
    image_data = response['Body'].read()
    image = Image.open(io.BytesIO(image_data)).convert("RGB")
    return image

def caption_image_with_models_s3(s3_key):
    raw_image = load_image_from_s3(s3_key)
    image_id = os.path.basename(s3_key)

    for model_data in loaded_models:
        if model_data["type"] == "pipeline":
            result = model_data["model"](raw_image)
            caption_text = result[0]['generated_text']
        elif model_data["type"] == "blip":
            processor = model_data["processor"]
            model = model_data["model"]

            inputs = processor(raw_image, return_tensors="pt").to("cuda")
            out = model.generate(**inputs)
            caption_text = processor.decode(out[0], skip_special_tokens=True)

        results.append({
            "photo_id": image_id,
            "model_name": model_data["model_name"],
            "caption": caption_text
        })


start_time = time.time()

for i, s3_key in enumerate(image_paths, start=1):
    caption_image_with_models_s3(s3_key)

    if i % 100 == 0:
        elapsed_time = time.time() - start_time
        print(f"Processed {i} images so far. Elapsed time: {elapsed_time:.2f} seconds.")


df_results = pd.DataFrame(results)



Processed 100 images so far. Elapsed time: 146.15 seconds.
Processed 200 images so far. Elapsed time: 288.40 seconds.
Processed 300 images so far. Elapsed time: 431.32 seconds.
Processed 400 images so far. Elapsed time: 575.35 seconds.
Processed 500 images so far. Elapsed time: 721.07 seconds.
Processed 600 images so far. Elapsed time: 866.12 seconds.
Processed 700 images so far. Elapsed time: 1012.02 seconds.
Processed 800 images so far. Elapsed time: 1155.97 seconds.
Processed 900 images so far. Elapsed time: 1299.35 seconds.
Processed 1000 images so far. Elapsed time: 1441.80 seconds.
Processed 1100 images so far. Elapsed time: 1586.78 seconds.
Processed 1200 images so far. Elapsed time: 1731.87 seconds.
Processed 1300 images so far. Elapsed time: 1876.10 seconds.
Processed 1400 images so far. Elapsed time: 2019.75 seconds.
Processed 1500 images so far. Elapsed time: 2163.18 seconds.
Processed 1600 images so far. Elapsed time: 2305.31 seconds.
Processed 1700 images so far. Elapsed t

In [11]:
df_results.shape

(17421, 3)

In [12]:
df_results.head()

Unnamed: 0,photo_id,model_name,caption
0,-0CTxYw82SWnJfzPOBBIOQ.jpg,nlpconnect/vit-gpt2-image-captioning,a refrigerator with a picture of a pizza on it
1,-0CTxYw82SWnJfzPOBBIOQ.jpg,Salesforce/blip-image-captioning-large,there is a large salad bar with a bunch of veg...
2,-0CTxYw82SWnJfzPOBBIOQ.jpg,Salesforce/blip-image-captioning-base,a kitchen with a large sign above it
3,-0fa0mOVKrJW90MFFxVImg.jpg,nlpconnect/vit-gpt2-image-captioning,a hot dog with mustard and ketchup on a bun
4,-0fa0mOVKrJW90MFFxVImg.jpg,Salesforce/blip-image-captioning-large,araffe with a pickle and a side of french fries


In [13]:
df_results.to_csv("final_image_to_text_results.csv", index=False)
s3.upload_file('final_image_to_text_results.csv', Bucket=bucket_name, Key='training/image classification/final_image_to_text_results.csv')
print("All images processed. Results saved to 'final_image_to_text_results.csv'.")

All images processed. Results saved to 'final_image_to_text_results.csv'.
