In [None]:
!pip install gradio

In [None]:
import torch
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import gradio as gr

# Cek perangkat (GPU jika tersedia)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load processor dan model BLIP
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)

# Ukuran standar untuk gambar input
STANDARD_SIZE = (384, 384)  # width, height

# Fungsi untuk membuat caption
def generate_captions(image):
    # Resize gambar ke ukuran standar dan ubah ke RGB
    image = image.resize(STANDARD_SIZE).convert("RGB")

    # Conditional captioning
    text = "a photography of"
    inputs = processor(image, text, return_tensors="pt").to(device)
    out_cond = model.generate(**inputs)
    caption_conditional = processor.decode(out_cond[0], skip_special_tokens=True)

    # Unconditional captioning
    inputs = processor(image, return_tensors="pt").to(device)
    out_uncond = model.generate(**inputs)
    caption_unconditional = processor.decode(out_uncond[0], skip_special_tokens=True)

    return caption_conditional, caption_unconditional

# UI dengan Gradio
interface = gr.Interface(
    fn=generate_captions,
    inputs=gr.Image(
        type="pil",
        label="Upload Image",
        image_mode="RGB",
        width=STANDARD_SIZE[0],
        height=STANDARD_SIZE[1]
    ),
    outputs=[
        gr.Textbox(label="Conditional Caption (with prompt: 'a photography of')"),
        gr.Textbox(label="Unconditional Caption"),
    ],
    title="Image Captioning with BLIP",
    description="Upload an image to get both conditional and unconditional captions using the BLIP model. All images will be resized to 384x384 pixels."
)

# Jalankan Gradio UI
if __name__ == "__main__":
    interface.launch()
