In [None]:
!pip install transformers fastapi uvicorn pyngrok
!pip install "git+https://github.com/huggingface/transformers.git"  # for latest BLIP2 support


In [None]:
#  load vision model
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"

processor = BlipProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16 if device=="cuda" else torch.float32)
model.to(device)

def ask_question(image_path, question):
    image = Image.open(image_path).convert('RGB')
    inputs = processor(image, question, return_tensors="pt").to(device, torch.float16 if device=="cuda" else torch.float32)
    out = model.generate(**inputs)
    return processor.decode(out[0], skip_special_tokens=True)



In [None]:
# Build Fast api APP
from fastapi import FastAPI, File, UploadFile, Form
import shutil

app = FastAPI()

@app.post("/ask/")
async def ask(file: UploadFile = File(...), question: str = Form(...)):
    with open("temp.jpg", "wb") as f:
        shutil.copyfileobj(file.file, f)
    answer = ask_question("temp.jpg", question)
    return {"answer": answer}


In [None]:
# tunnel with ngrok
from pyngrok import ngrok

public_url = ngrok.connect(8000)
print("Your API is live at:", public_url)

In [None]:
# Run API server
!uvicorn app:app --host 0.0.0.0 --port 8000


---

In [None]:
# Accessing locally
# curl -X POST https://abc123.ngrok.io/ask/ \
#   -F "file=@/path/to/image.jpg" \
#   -F "question=What is the person doing?"


# or 
import requests

url = "https://abc123.ngrok.io/ask/"
files = {'file': open('image.jpg', 'rb')}
data = {'question': "What is happening in this picture?"}

res = requests.post(url, files=files, data=data)
print(res.json())

