Converting 'shp' file to 'gpkg' extension
- Change file path corresponding to your environment

In [1]:
import geopandas as gpd
gpkg_path = '/Users/ilseoplee/cape_town_annotation_checker/db_pipeline/final_annotations.gpkg'
gdf = gpd.read_file(gpkg_path)
gdf

Unnamed: 0,id,PV_normal,PV_heater,PV_pool,uncertflag,area,annotator,centroid_latitude,centroid_longitude,image_name,nw_corner_of_image_latitude,nw_corner_of_image_longitude,se_corner_of_image_latitude,se_corner_of_image_longitude,geometry
0,1,1.0,,,,23.162657,biz,-3.769763e+06,-19991.996891,2023_RGB_8cm_W16C_21,-3769000.0,-20000.0,-3770000.0,-19000.0,"POLYGON ((-19993.55 -3769759.6, -19988.801 -37..."
1,2,,,1.0,,22.464224,biz,-3.775459e+06,-7333.463276,2023_RGB_8cm_W07C_3,-3775000.0,-8000.0,-3776000.0,-7000.0,"POLYGON ((-7338.039 -3775460.804, -7330.294 -3..."
2,3,,,1.0,,71.734771,biz,-3.775556e+06,-4412.644364,2023_RGB_8cm_W07D_1,-3775000.0,-5000.0,-3776000.0,-4000.0,"POLYGON ((-4418.027 -3775551.846, -4406.117 -3..."
3,4,,,1.0,,16.578681,biz,-3.780474e+06,-9142.853580,2023_RGB_8cm_W08A_1,-3780000.0,-10000.0,-3781000.0,-9000.0,"POLYGON ((-9145.183 -3780476.685, -9142.692 -3..."
4,5,,,1.0,,27.684287,biz,-3.780033e+06,-8917.850802,2023_RGB_8cm_W08A_2,-3780000.0,-9000.0,-3781000.0,-8000.0,"POLYGON ((-8922.826 -3780035.448, -8914.535 -3..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19730,19731,1.0,,,,13.599248,mejia,-3.758774e+06,-49022.331892,2023_RGB_8cm_W45C_16,-3758000.0,-50000.0,-3759000.0,-49000.0,"POLYGON ((-49023.481 -3758771.941, -49020.398 ..."
19731,19732,1.0,,,,4.845304,mejia,-3.758791e+06,-49072.211780,2023_RGB_8cm_W45C_16,-3758000.0,-50000.0,-3759000.0,-49000.0,"POLYGON ((-49074.803 -3758789.825, -49069.39 -..."
19732,19733,1.0,,,,4.371103,mejia,-3.758792e+06,-49072.717572,2023_RGB_8cm_W45C_16,-3758000.0,-50000.0,-3759000.0,-49000.0,"POLYGON ((-49075.145 -3758791.263, -49069.938 ..."
19733,19734,1.0,,,,4.169215,mejia,-3.758794e+06,-49072.796580,2023_RGB_8cm_W45C_16,-3758000.0,-50000.0,-3759000.0,-49000.0,"POLYGON ((-49075.214 -3758792.771, -49070.075 ..."


In [None]:
import geopandas as gpd
import rasterio
from PIL import Image, ImageTk, ImageDraw
import tkinter as tk
from tkinter import ttk
import numpy as np
import os

# File path 
GPKG_PATH = "/Users/ilseoplee/cape_town_annotation_checker/db_pipeline/final_annotations.gpkg"
IMAGE_FOLDER = "/Users/ilseoplee/cape_town_annotation_checker/db_pipeline/download/images"
OUTPUT_PATH = "/Users/ilseoplee/cape_town_annotation_checker/annotation_qc_results_sample.gpkg"

# Data load
gdf = gpd.read_file(GPKG_PATH)
gdf["fid"] = gdf.index

# QC column
qc_cols = ["PV_normal_qc", "PV_heater_qc", "PV_pool_qc", "uncertflag_qc", "delete_qc"]
for col in qc_cols:
    if col not in gdf.columns:
        gdf[col] = 0

class QCChecker:
    def __init__(self, master):
        self.master = master
        self.index = 0
        self.tk_img = None

        self.label = tk.Label(master)
        self.label.pack()

        self.info = tk.Label(master, text="", font=("Arial", 12), justify="left")
        self.info.pack()

        button_frame = ttk.Frame(master)
        button_frame.pack()

        for i, col in enumerate(qc_cols):
            ttk.Button(button_frame, text=col, command=lambda c=col: self.mark(c)).grid(row=0, column=i, padx=5)

        self.next()

    def mark(self, col):
        for qc in qc_cols:
            gdf.at[self.index, qc] = 1 if qc == col else 0
        try:
            gdf.to_file(OUTPUT_PATH, driver="GPKG")
            print(f"Saved after ID {gdf.iloc[self.index].get('id', self.index)}")
        except Exception as e:
            print(f"Save failed: {e}")
        self.index += 1
        self.next()

    def next(self):
        while self.index < len(gdf):
            row = gdf.iloc[self.index]
            image_name = row.get("image_name")
            image_path = os.path.join(IMAGE_FOLDER, image_name + ".tif")

            try:
                with rasterio.open(image_path) as src:
                    geom = row.geometry
                    transform = src.transform
                    centroid = geom.centroid
                    cx, cy = ~transform * (centroid.x, centroid.y)

                    half_w = 300
                    half_h = 300
                    box_crop = (
                        int(cx - half_w),
                        int(cy - half_h),
                        int(cx + half_w),
                        int(cy + half_h)
                    )
                    # clip to image bounds
                    box_crop = (
                        max(0, box_crop[0]),
                        max(0, box_crop[1]),
                        min(src.width, box_crop[2]),
                        min(src.height, box_crop[3])
                    )
                    window = rasterio.windows.Window(
                        col_off=box_crop[0],
                        row_off=box_crop[1],
                        width=box_crop[2] - box_crop[0],
                        height=box_crop[3] - box_crop[1]
                    )
                    data = src.read([1, 2, 3], window=window)
                    win_transform = src.window_transform(window)

                    rgb = np.transpose(data, (1, 2, 0))
                    rgb = np.nan_to_num(rgb)
                    if rgb.dtype != np.uint8:
                        rgb = ((rgb - rgb.min()) / (rgb.ptp() + 1e-6) * 255).astype(np.uint8)

                    img = Image.fromarray(rgb)
                    draw = ImageDraw.Draw(img)

                    if hasattr(geom, "exterior"):
                        coords = list(geom.exterior.coords)
                        pixels = [~win_transform * (x, y) for x, y in coords]
                        pixels = [(int(x), int(y)) for x, y in pixels]
                        if len(pixels) > 2:
                            draw.polygon(pixels, outline="red", width=3)

                    if img.width > 800 or img.height > 800:
                        img.thumbnail((800, 800), Image.LANCZOS)

                    self.tk_img = ImageTk.PhotoImage(img)
                    self.label.configure(image=self.tk_img)
                    self.label.image = self.tk_img

                    self.info.config(
                        text=(
                            f"ID: {row.get('id', 'NA')} | image: {image_name} | annotator: {row.get('annotator', 'NA')}\n"
                            f"PV_normal: {row.get('PV_normal')}, "
                            f"PV_heater: {row.get('PV_heater')}, "
                            f"PV_pool: {row.get('PV_pool')}, "
                            f"uncertflag: {row.get('uncertflag')}"
                        )
                    )
                    return

            except Exception as e:
                print(f"Error loading {image_path}: {e}")

            self.index += 1

        gdf.to_file(OUTPUT_PATH, driver="GPKG")
        print("Save!")
        self.master.quit()

root = tk.Tk()
root.title("Annotation QC Checker")
app = QCChecker(root)
root.mainloop()


: 