In [None]:
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm
import json
from google import genai
from google.genai import types
import time
import glob
import os

In [None]:
paths = glob.glob("data/design/floor_2_2/*.jpg")
len(paths)

In [None]:
paths[:3]

In [None]:
gemini_api = "AIzaSyBYb3yuPTWuPXXnNDHu4Ua-qdic3nSRsc0"
# gemini_api = "AIzaSyDCkYK4Gbt9_iCU38RWLTVj4e1-W9zo2Zc"
# gemini_api = "AIzaSyDKzPcB9l0CnUpOs4asLXpAHiZo81A3JLk"

client = genai.Client(api_key=gemini_api)

In [None]:
prompt = """
You are analyzing a real estate masterplan map. Find all apartment codes visible in this image. Each code consists of uppercase letters and numbers (e.g., S6.06, GH-02A, E2).
Return strictly valid JSON in this format:
{
    "apartment_codes": ["code1", "code2", ...]
}
If no codes are found, return:
{
    "apartment_codes": []
}

Do not include any other text or characters outside the JSON.
"""
results_path = "results.json"

if os.path.exists(results_path):
    with open(results_path, "r", encoding="utf-8") as f:
        results = json.load(f)
else:
    results = {}
print(f"Đã load {len(results)} kết quả thành công trước đó.")

max_retries = 5

for img_path in tqdm(paths):
    file_name = os.path.basename(img_path)
    if file_name in results:
        continue

    image = cv2.imread(img_path)
    tile_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  
    success, encoded_image = cv2.imencode('.jpg', tile_img)
    if not success:
        continue

    img_bytes = encoded_image.tobytes()
    image_part = types.Part.from_bytes(data=img_bytes, mime_type="image/jpeg")
    for attempt in range(max_retries):
        try:
            response = client.models.generate_content(
                model="gemini-2.5-flash-lite",
                contents=[prompt, image_part],
                config=types.GenerateContentConfig(
                    response_mime_type="application/json"
                )
            )

            if response.text:
                results[file_name] = json.loads(response.text)
            time.sleep(3)
            break

        except Exception as e:
            error_msg = str(e).lower()
            if "quota" in error_msg or "429 resource_exhausted" in error_msg:
                print(f"Tile {file_name} failed due to quota: {e}")
                break
            else:
                print(f"Tile {file_name} attempt {attempt+1}/{max_retries} failed: {e}")
                time.sleep(2 ** attempt)

In [None]:
len(results)

In [None]:
with open("results.json", "w", encoding="utf-8") as f:
    json.dump(results, f, ensure_ascii=False, indent=4)

In [None]:
result = {}
with open("results.json", "r", encoding="utf-8") as f:
    data = json.load(f)
for tile, entry in data.items():
    for code in entry["apartment_codes"]:
        result.setdefault(code, []).append(tile)

sorted_data = dict(sorted(result.items(), key=lambda x: x[0]))
print(sorted_data)

with open("mapping.json", "w", encoding="utf-8") as f:
    json.dump(sorted_data, f, indent=2, ensure_ascii=False)

{'01': ['tile_38.jpg', 'tile_76.jpg', 'tile_8.jpg', 'tile_8.jpg', 'tile_86.jpg', 'tile_87.jpg', 'tile_87.jpg', 'tile_87.jpg', 'tile_87.jpg', 'tile_88.jpg', 'tile_88.jpg', 'tile_88.jpg', 'tile_88.jpg', 'tile_88.jpg', 'tile_99.jpg'], '02': ['tile_76.jpg', 'tile_8.jpg', 'tile_8.jpg', 'tile_86.jpg', 'tile_87.jpg', 'tile_88.jpg', 'tile_88.jpg', 'tile_88.jpg', 'tile_88.jpg', 'tile_88.jpg', 'tile_99.jpg'], '03': ['tile_76.jpg', 'tile_8.jpg', 'tile_8.jpg', 'tile_86.jpg', 'tile_87.jpg', 'tile_88.jpg', 'tile_88.jpg', 'tile_88.jpg', 'tile_88.jpg', 'tile_88.jpg', 'tile_99.jpg'], '04': ['tile_76.jpg', 'tile_8.jpg', 'tile_8.jpg', 'tile_86.jpg', 'tile_87.jpg', 'tile_88.jpg', 'tile_88.jpg', 'tile_88.jpg', 'tile_88.jpg', 'tile_88.jpg', 'tile_99.jpg'], '05': ['tile_76.jpg', 'tile_8.jpg', 'tile_8.jpg', 'tile_86.jpg', 'tile_87.jpg', 'tile_88.jpg', 'tile_88.jpg', 'tile_88.jpg', 'tile_88.jpg', 'tile_88.jpg', 'tile_99.jpg'], '06': ['tile_76.jpg', 'tile_8.jpg', 'tile_8.jpg', 'tile_86.jpg', 'tile_87.jpg', 'til