In [1]:
import base64
import json
import os
from pathlib import Path
from typing import List, Literal, Optional, Tuple
import exifread
import requests
from dotenv import load_dotenv
from PIL import Image
from PIL.ExifTags import TAGS, GPSTAGS
from pydantic import BaseModel, Field
from geopy.geocoders import Nominatim

from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.runnables import RunnableLambda

load_dotenv()

True

In [2]:
# -----------------------------
# 1) Output schema
# -----------------------------
class PlaceGuess(BaseModel):
    name: Optional[str] = None
    city: Optional[str] = None
    country: Optional[str] = None


class PlaceAnalysis(BaseModel):
    input_type: Literal["place_photo", "not_a_place", "ambiguous"]
    place_guess: PlaceGuess
    confidence: float = Field(ge=0.0, le=1.0)
    what_i_see: List[str]
    significance: List[str]
    response: str


parser = PydanticOutputParser(pydantic_object=PlaceAnalysis)

In [3]:
# -----------------------------
# 2) Image preprocessing for ImgBB
# -----------------------------
def preprocess_image_to_b64(image_path: str) -> str:
    p = Path(image_path)
    if not p.exists():
        raise FileNotFoundError(f"Image not found: {image_path}")

    img = Image.open(p).convert("RGB")
    img.thumbnail((1600, 1600))

    tmp = p.with_suffix(".tmp.jpg")
    img.save(tmp, format="JPEG", quality=90)

    b64 = base64.b64encode(tmp.read_bytes()).decode("utf-8")
    tmp.unlink(missing_ok=True)
    return b64

In [4]:
def image_to_data_url(image_path: str) -> str:
    b64 = preprocess_image_to_b64(image_path)
    return f"data:image/jpeg;base64,{b64}"


In [5]:
# -----------------------------
# 3) EXIF GPS extraction
# -----------------------------
def _ratio_to_float(r):
    # exifread Ratio -> float
    return float(r.num) / float(r.den)

def _dms_to_deg(dms_vals, ref):
    d = _ratio_to_float(dms_vals[0])
    m = _ratio_to_float(dms_vals[1])
    s = _ratio_to_float(dms_vals[2])
    deg = d + (m / 60.0) + (s / 3600.0)
    if ref in ("S", "W"):
        deg = -deg
    return deg

def extract_exif_gps(image_path: str):
    """
    Returns (lat, lon) if GPS exists, else None.
    Uses exifread (more robust than PIL for GPSInfo).
    """
    with open(image_path, "rb") as f:
        tags = exifread.process_file(f, details=False)

    lat_tag = tags.get("GPS GPSLatitude")
    lat_ref_tag = tags.get("GPS GPSLatitudeRef")
    lon_tag = tags.get("GPS GPSLongitude")
    lon_ref_tag = tags.get("GPS GPSLongitudeRef")

    if not (lat_tag and lat_ref_tag and lon_tag and lon_ref_tag):
        return None

    lat = _dms_to_deg(lat_tag.values, str(lat_ref_tag.values))
    lon = _dms_to_deg(lon_tag.values, str(lon_ref_tag.values))
    return (lat, lon)

def reverse_geocode(lat: float, lon: float) -> dict:
    """
    Free reverse geocoding using OpenStreetMap Nominatim.
    """
    geolocator = Nominatim(user_agent="place_detector_exif")
    loc = geolocator.reverse((lat, lon), language="en", zoom=16)
    if not loc:
        return {}

    addr = loc.raw.get("address", {})
    return {
        "display_name": loc.raw.get("display_name"),
        "city": addr.get("city") or addr.get("town") or addr.get("village"),
        "state": addr.get("state"),
        "country": addr.get("country"),
        "country_code": addr.get("country_code"),
    }

In [6]:
# -----------------------------
# 4) ImgBB + SerpApi fallback
# -----------------------------
def upload_to_imgbb(image_path: str) -> str:
    api_key = os.getenv("IMGBB_API_KEY")
    if not api_key:
        raise RuntimeError("Missing IMGBB_API_KEY in environment")

    b64 = preprocess_image_to_b64(image_path)

    r = requests.post(
        "https://api.imgbb.com/1/upload",
        params={"key": api_key},
        data={"image": b64},
        timeout=60,
    )
    r.raise_for_status()
    payload = r.json()

    if not payload.get("success"):
        raise RuntimeError(f"ImgBB upload failed: {payload}")

    data_obj = payload["data"]
    return data_obj.get("display_url") or data_obj["url"]


def serpapi_google_lens(public_image_url: str) -> dict:
    api_key = os.getenv("SERPAPI_API_KEY")
    if not api_key:
        raise RuntimeError("Missing SERPAPI_API_KEY in environment")

    params = {
        "engine": "google_lens",
        "url": public_image_url,
        "type": "all",
        "hl": "en",
        "api_key": api_key,
    }

    r = requests.get("https://serpapi.com/search", params=params, timeout=90)
    r.raise_for_status()
    return r.json()


def extract_serp_evidence(serp_json: dict, max_items: int = 8) -> dict:
    evidence = {"knowledge_graph": {}, "exact_matches": [], "visual_matches": []}

    kg = serp_json.get("knowledge_graph") or {}
    for k in ["title", "type", "description", "website", "address", "location"]:
        if k in kg and kg[k]:
            evidence["knowledge_graph"][k] = kg[k]

    for item in (serp_json.get("exact_matches") or [])[:max_items]:
        evidence["exact_matches"].append(
            {"title": item.get("title"), "source": item.get("source"), "link": item.get("link")}
        )

    for item in (serp_json.get("visual_matches") or [])[:max_items]:
        evidence["visual_matches"].append(
            {"title": item.get("title"), "source": item.get("source"), "link": item.get("link")}
        )

    return evidence

In [7]:
# -----------------------------
# 5) Prompt + LLM
# -----------------------------
prompt = PromptTemplate(
    input_variables=["format_instructions", "evidence_json"],
    template=(
        "You are a strict, evidence-based photo classifier and place verifier.\n"
        "The user provides ONLY a photo. You also receive EXIF GPS and/or reverse-image evidence.\n"
        "Your job is to classify correctly and NEVER invent a specific place.\n\n"
        "Definitions:\n"
        "- place_photo: the main subject is a real-world location/scene (landmark, street, beach, hill, cityscape, interior of a venue).\n"
        "- not_a_place: the main subject is an object/person/animal/food/document/screenshot/product, not a location.\n"
        "- ambiguous: cannot reliably decide, OR seems like a place but cannot identify with enough evidence.\n\n"
        "Hard rules (follow exactly):\n"
        "1) If the image is an object/person/animal/food/document/screenshot/product → input_type=not_a_place.\n"
        "2) If it is a scene/place but you cannot identify a specific place name from evidence → input_type=ambiguous.\n"
        "3) Only set input_type=place_photo if you are confident it is a place scene.\n"
        "4) You may name a specific place ONLY if evidence strongly supports it.\n"
        "   Strong evidence means at least ONE of these:\n"
        "   - EXIF GPS exists AND reverse geocode provides country/city (high trust).\n"
        "   - Reverse-image evidence has consistent agreement: the same place name appears across >=2 independent sources\n"
        "     (different domains/sources) OR a knowledge_graph title clearly matches.\n"
        "5) If evidence conflicts (different place names) or is generic/noisy → input_type=ambiguous and keep confidence <= 0.5.\n"
        "6) Confidence calibration (0..1):\n"
        "   - 0.85–1.00: EXIF GPS + reverse geocode present OR multiple sources agree on same place.\n"
        "   - 0.60–0.84: place scene is clear, but identification is partial (e.g., only city/country).\n"
        "   - 0.30–0.59: likely a place scene, but weak/contradictory evidence.\n"
        "   - 0.00–0.29: not a place OR extremely uncertain.\n\n"
        "What to output (VERY IMPORTANT):\n"
        "- what_i_see: short bullet strings describing visible cues and evidence used.\n"
        "- place_guess:\n"
        "  - If not_a_place: set name/city/country to null.\n"
        "  - If ambiguous: you MAY set only country/city if supported by EXIF; otherwise null.\n"
        "  - If place_photo and identified: fill name/city/country as supported.\n\n"
        "Significance policy (do not violate):\n"
        "- You MUST NOT provide historical significance unless a specific place is confidently identified.\n"
        "- If place_photo and confidently identified:\n"
        "  - If the place is historically/culturally significant: provide factual significance bullets.\n"
        "  - If it is primarily a tourist spot: provide practical visit suggestions (what to do/see), not fake history.\n"
        "- If it is a generic place with no notable significance (e.g., generic hill/sea view with no identifiable landmark):\n"
        "  - respond with significance bullets stating it is \"not a place of significance\" and explain why.\n"
        "- If not_a_place or ambiguous:\n"
        "  - significance must explain why significance cannot be determined.\n\n"
        "Evidence (JSON):\n"
        "{evidence_json}\n\n"
        "Return ONLY valid JSON matching this schema exactly (no extra keys, no markdown):\n"
        "{format_instructions}\n"
    ),
)

In [8]:
model = ChatGoogleGenerativeAI(model="gemini-2.5-flash-lite", temperature=0.2)

In [9]:
def build_message(inputs: dict):
    image_path = inputs["image_path"]

    evidence = {"exif": {}, "serpapi": {}}

    # --- EXIF first
    gps = extract_exif_gps(image_path)   # <- your exifread-based function
    if gps:
        lat, lon = gps
        rg = reverse_geocode(lat, lon)
        evidence["exif"] = {"gps": {"lat": lat, "lon": lon}, "reverse_geocode": rg}

    exif_strong = bool(evidence["exif"].get("reverse_geocode", {}).get("country"))

    # --- Only fallback if EXIF isn't strong
    public_url = None
    if not exif_strong:
        public_url = upload_to_imgbb(image_path)
        serp = serpapi_google_lens(public_url)
        evidence["serpapi"] = extract_serp_evidence(serp)

    # For Gemini: if we didn’t upload, we can still send the local image as base64 data-url
    # (so you don't unnecessarily upload when EXIF is enough)
    if public_url is None:
        # reuse your existing image_to_data_url or make one quickly:
        local_data_url = image_to_data_url(image_path)  # base64 data URL
        image_part = {"type": "image_url", "image_url": {"url": local_data_url}}
    else:
        image_part = {"type": "image_url", "image_url": {"url": public_url}}

    prompt_text = prompt.format(
        format_instructions=parser.get_format_instructions(),
        evidence_json=json.dumps(evidence, ensure_ascii=False, indent=2),
    )

    msg = HumanMessage(content=[{"type": "text", "text": prompt_text}, image_part])
    return [msg]

In [10]:
chain = (
    RunnableLambda(build_message)
    | model
    | (lambda ai_msg: ai_msg.content)
    | parser
)

In [15]:
image_path = "img/img7.jpg"  # Example image path

In [16]:
result: PlaceAnalysis = chain.invoke({"image_path": image_path})
print(result.model_dump_json(indent=2))

{
  "input_type": "ambiguous",
  "place_guess": {
    "name": null,
    "city": null,
    "country": null
  },
  "confidence": 0.35,
  "what_i_see": [
    "A rural landscape with a field of green crops, possibly rice.",
    "Dense vegetation in the foreground.",
    "A utility pole with wires crossing the sky.",
    "Distant trees and some scattered structures.",
    "A clear blue sky with scattered white clouds."
  ],
  "significance": [
    "Significance cannot be determined as the specific location is not identifiable.",
    "The image depicts a generic rural agricultural scene."
  ],
  "response": "The provided reverse image search results are highly generic, showing unrelated social media profiles and YouTube videos that do not consistently identify a specific location. There is no EXIF GPS data available to assist in verification. Therefore, the location cannot be confidently identified."
}


In [70]:
import json

def test_all(image_path: str):
    print("\n[1] preprocess_image_to_b64")
    b64 = preprocess_image_to_b64(image_path)
    print("  ok | b64 length:", len(b64))

    print("\n[2] extract_exif_gps")
    gps = extract_exif_gps(image_path)
    if gps:
        lat, lon = gps
        print(f"  ok | GPS found: lat={lat}, lon={lon}")

        print("\n[2b] reverse_geocode")
        rg = reverse_geocode(lat, lon)
        print("  ok | reverse geocode:", rg)
    else:
        print("  ok | no GPS found (expected for many images)")

    print("\n[3] upload_to_imgbb")
    public_url = upload_to_imgbb(image_path)
    print("  ok | url:", public_url)

    print("\n[4] serpapi_google_lens")
    serp = serpapi_google_lens(public_url)
    print("  ok | returned keys:", list(serp.keys())[:12])

    print("\n[5] extract_serp_evidence")
    evidence = extract_serp_evidence(serp)
    print(
        "  ok | evidence preview:\n",
        json.dumps(evidence, indent=2, ensure_ascii=False)[:1200]
    )

    # print("\n[6] chain.invoke")
    # out = chain.invoke({"image_path": image_path})
    # print("  ok | final output:\n", out.model_dump_json(indent=2))

    # return out

test_all(image_path)


[1] preprocess_image_to_b64
  ok | b64 length: 462760

[2] extract_exif_gps
  ok | GPS found: lat=23.78084897222222, lon=90.407559

[2b] reverse_geocode
  ok | reverse geocode: {'display_name': 'Road Number 01, Korail, Gulshan, Dhaka, Dhaka Metropolitan, Dhaka District, Dhaka Division, 2467, Bangladesh', 'city': 'Dhaka', 'state': 'Dhaka Division', 'country': 'Bangladesh', 'country_code': 'bd'}

[3] upload_to_imgbb
  ok | url: https://i.ibb.co/4RXBVmdK/ac9d3faaf8ef.jpg

[4] serpapi_google_lens


KeyboardInterrupt: 