In [1]:
from pathlib import Path
import fitz 
import re
import cairosvg
import numpy as np 
import cv2
import json
import os
import glob
import matplotlib.pyplot as plt

In [2]:
def split_tiles (image, tile_size, overlap_pct):
    H, W = image.shape[:2] 
    overlap = int(tile_size * overlap_pct)
    offsets = []
    tiles = []
    for y in range(0, H, tile_size - overlap):
        for x in range(0, W, tile_size - overlap):
            tile = image[y:min(y+tile_size, H), x:min(x+tile_size, W)]
            offsets.append((x, y))
            tiles.append(tile)
    return offsets, tiles

In [None]:
out_dir  = "data/map/"
os.makedirs(out_dir, exist_ok=True)

img_path = "data/map.JPG"
image = cv2.imread(img_path)
offsets, tiles = split_tiles(image, 1536, 0.5)

metadata = {}
for tile_index in range(len(tiles)):
    tile_filename = f"tile_{tile_index}.jpg"
    tile = tiles[tile_index]
    offset = offsets[tile_index]
    
    cv2.imwrite(
        os.path.join(out_dir, tile_filename),
        tile,
        [cv2.IMWRITE_JPEG_QUALITY, 100, cv2.IMWRITE_JPEG_OPTIMIZE, 1]
    )
    metadata[tile_filename] = {
        "x": offset[0],
        "y": offset[1],
        "width": tile.shape[1],
        "height": tile.shape[0]
    }

with open("data/metadata.json", "w", encoding="utf-8") as f:
    json.dump(metadata, f, indent=2, ensure_ascii=False)

Đã lưu 182 ảnh và file metadata.json vào data/map/


In [4]:
pdf_path = "data/design/S606.pdf"
out_dir = "data/design/"

pdf = Path(pdf_path)
doc = fitz.open(pdf_path)
floor_pages = []
pattern = r"TẦNG\s*(\d+)(?:-(\d+))?"
for i, page in enumerate(doc):
    text = page.get_text("text")
    match = re.search(pattern, text)
    if match:
        start = int(match.group(1))
        end = int(match.group(2)) if match.group(2) else start
        floor_pages.append(f'floor_{start}_{end}')
print(floor_pages)

for i in range(len(doc)):
    page = doc[i]
    try:
        svg = page.get_svg_image()
    except AttributeError:
        raise RuntimeError("Phiên bản PyMuPDF của bạn không hỗ trợ get_svg_image()")
    png_bytes = cairosvg.svg2png(
        bytestring=svg.encode("utf-8"),
        scale=8,
        background_color="#FFFFFF"
    )

    nparr = np.frombuffer(png_bytes, np.uint8)
    img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
    out_file_jpg = Path(out_dir) / f"{floor_pages[i]}.jpg"
    cv2.imwrite(str(out_file_jpg), img, [cv2.IMWRITE_JPEG_QUALITY, 100])
doc.close()

['floor_2_2', 'floor_3_29']


In [None]:
paths = glob.glob("data/design/*.jpg")
paths = [p for p in paths if "tile" not in p]
output_dir = "data/design/"

for img_path in paths:
    image = cv2.imread(img_path)
    name = os.path.splitext(os.path.basename(img_path))[0]
    save_dir = os.path.join(out_dir, name)
    os.makedirs(save_dir, exist_ok=True)

    offsets, tiles = split_tiles(image, 1536, 0.5)
    for tile_index in range(len(tiles)):
        tile_filename = f"{name}_tile_{tile_index}.jpg"
        tile = tiles[tile_index]
        offset = offsets[tile_index]
        
        cv2.imwrite(
            os.path.join(save_dir, tile_filename),
            tile,
            [cv2.IMWRITE_JPEG_QUALITY, 100, cv2.IMWRITE_JPEG_OPTIMIZE, 1]
        )
        metadata[tile_filename] = {
            "x": offset[0],
            "y": offset[1],
            "width": tile.shape[1],
            "height": tile.shape[0]
        }
    with open(os.path.join(output_dir, f"{name}_metadata.json"), "w", encoding="utf-8") as f:
        json.dump(metadata, f, indent=2, ensure_ascii=False)

Ảnh floor_2_2: đã lưu 199 tiles và offsets.json vào data/design/floor_2_2
Ảnh floor_3_29: đã lưu 199 tiles và offsets.json vào data/design/floor_3_29
