In [59]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from langchain_core.prompts import PromptTemplate
from dotenv import load_dotenv
from langchain_core.tools import tool

import base64
import io
import sys
from pathlib import Path
from typing import List, Literal, Optional

from PIL import Image
from pydantic import BaseModel, Field

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.messages import HumanMessage
from langchain_core.runnables import RunnableLambda, RunnablePassthrough

In [60]:
load_dotenv()

chat_model = ChatGoogleGenerativeAI(model="gemini-2.5-flash-lite", temperature=0.7)

In [61]:
load_dotenv()

llm = HuggingFaceEndpoint(
    repo_id = "openai/gpt-oss-120b",
    task = "text-generation",  
)

model = ChatHuggingFace(llm = llm)

In [62]:
# Structured output schema
class PlaceGuess(BaseModel):
    name: Optional[str] = None
    city: Optional[str] = None
    country: Optional[str] = None


class PlaceAnalysis(BaseModel):
    input_type: Literal["place_photo", "not_a_place", "ambiguous"]
    place_guess: PlaceGuess
    confidence: float = Field(ge=0.0, le=1.0)
    what_i_see: List[str]
    significance: List[str]
    response: str

In [63]:
parser = JsonOutputParser(pydantic_object=PlaceAnalysis)

In [64]:
#  Image to data URL (base64) helper
def image_to_data_url(image_path: str, max_size=(1600, 1600), quality=90) -> str:
    p = Path(image_path)
    if not p.exists():
        raise FileNotFoundError(f"Image not found: {image_path}")

    img = Image.open(p).convert("RGB")
    img.thumbnail(max_size)

    buf = io.BytesIO()
    img.save(buf, format="JPEG", quality=quality)
    b64 = base64.b64encode(buf.getvalue()).decode("utf-8")

    return f"data:image/jpeg;base64,{b64}"

In [65]:
prompt = PromptTemplate(
    template=(
        "You are a careful image analyst. The user provides ONLY a photo.\n\n"
        "Task:\n"
        "1) Decide if this photo is a PLACE or NOT A PLACE (object/person/food/document/etc.) or AMBIGUOUS.\n"
        "2) If it is a place, guess the most likely location (name/city/country) and explain the evidence.\n"
        "3) If you cannot identify confidently, say ambiguous and do NOT invent details.\n"
        "4) Provide significance ONLY when you are confident itâ€™s a real identified place.\n"
        "5) Output MUST be valid JSON and match the schema.\n\n"
        "Return only JSON. No markdown. No extra keys.\n\n"
        "{format_instructions}\n"
    ),
    input_variables=["format_instructions"],
)

In [66]:
image_path = "img/img3.jpg"

In [67]:
def to_multimodal_message(inputs: dict) -> list:
    """
    inputs: {"image_path": "..."}
    returns: [HumanMessage(...)] ready for llm.invoke()
    """
    prompt_text = prompt.format(format_instructions=parser.get_format_instructions())
    data_url = image_to_data_url(inputs["image_path"])

    msg = HumanMessage(
        content=[
            {"type": "text", "text": prompt_text},
            {"type": "image_url", "image_url": {"url": data_url}},
        ]
    )
    return [msg]

In [68]:
chain = (
    RunnableLambda(to_multimodal_message)
    | chat_model
    | (lambda ai_msg: ai_msg.content)  # extract raw text from AIMessage
    | parser
)

In [None]:
result: PlaceAnalysis = chain.invoke({"image_path": image_path})

AttributeError: 'dict' object has no attribute 'model_dump_json'

In [70]:
print(result)

{'input_type': 'place_photo', 'place_guess': {'name': 'Aparajeyo Bangla', 'city': 'Dhaka', 'country': 'Bangladesh'}, 'confidence': 0.8, 'what_i_see': ['A tall, abstract brick monument with a central opening and two towering vertical elements.', 'Paved pathways made of red brick leading towards the monument.', 'Manicured green lawns and hedges surrounding the pathways and monument.', 'Buildings with brick exteriors in the background, suggesting an institutional or educational setting.', 'A bright blue sky with scattered white clouds.'], 'significance': ['Aparajeyo Bangla is a significant sculpture located at the University of Dhaka, Bangladesh.', 'It symbolizes the resistance and sacrifice of the people during the Bangladesh Liberation War of 1971.', 'The sculpture was designed by sculptor Syed Abdullah Khalid.'], 'response': 'The photo appears to be of Aparajeyo Bangla, a well-known monument at the University of Dhaka in Bangladesh. The distinctive red brick architecture, the style of 

In [71]:
result1 = PlaceAnalysis(**result)
print(result1.model_dump_json(indent=2))

{
  "input_type": "place_photo",
  "place_guess": {
    "name": "Aparajeyo Bangla",
    "city": "Dhaka",
    "country": "Bangladesh"
  },
  "confidence": 0.8,
  "what_i_see": [
    "A tall, abstract brick monument with a central opening and two towering vertical elements.",
    "Paved pathways made of red brick leading towards the monument.",
    "Manicured green lawns and hedges surrounding the pathways and monument.",
    "Buildings with brick exteriors in the background, suggesting an institutional or educational setting.",
    "A bright blue sky with scattered white clouds."
  ],
  "significance": [
    "Aparajeyo Bangla is a significant sculpture located at the University of Dhaka, Bangladesh.",
    "It symbolizes the resistance and sacrifice of the people during the Bangladesh Liberation War of 1971.",
    "The sculpture was designed by sculptor Syed Abdullah Khalid."
  ],
  "response": "The photo appears to be of Aparajeyo Bangla, a well-known monument at the University of Dhaka