In [1]:
import os
import torch
from PIL import Image
from tqdm import tqdm
from transformers import Blip2Processor, Blip2ForConditionalGeneration, BitsAndBytesConfig
from pythainlp.translate import Translate
import pandas as pd

In [None]:
translate_model = Translate('en', 'th', use_gpu=True)

In [None]:
processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-6.7b-coco")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, 
    bnb_4bit_use_double_quant=False, 
    bnb_4bit_quant_type="nf4"
)

model = Blip2ForConditionalGeneration.from_pretrained(
    "Salesforce/blip2-opt-6.7b-coco",
    quantization_config=bnb_config,
    device_map='cuda',
    torch_dtype=torch.float16
)

In [None]:
def process_image(image_path):
    """ประมวลผลภาพและสร้างคำบรรยายภาษาไทย"""
    try:
        image = Image.open(image_path).convert('RGB')
        image_id = os.path.basename(image_path)

        inputs = processor(images=image, return_tensors="pt").to("cuda", torch.float16)
        outputs = model.generate(**inputs)

        caption_en = processor.decode(outputs[0], skip_special_tokens=True)
        caption_th = translate_model.translate(caption_en)

        return image_id, caption_th

    except Exception as e:
        print(f"Error processing image {image_path}: {e}")
        return None, None

In [None]:
def process_images_in_folder(folder_path):
    """ประมวลผลภาพทั้งหมดในโฟลเดอร์"""
    image_paths = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(('.jpg', '.png', '.jpeg'))]
    results = []

    for image_path in tqdm(image_paths):
        image_id, caption = process_image(image_path)
        if image_id and caption:
            results.append((image_id, caption))

    return results

In [None]:
if __name__ == '__main__':

    path_img = "/kaggle/input/coco-2017-dataset/coco2017/test2017"
    results = process_images_in_folder(path_img)

    # บันทึกผลลัพธ์ลงในไฟล์ CSV
    submission = pd.DataFrame(results, columns=['image_id', 'caption'])
    submission.to_csv('submission.csv', index=False)
