In [None]:
"""
Script 4: LayerAnnotator

Description:
This script provides a semi-automated interface to manually assign layer numbers to polygon geometries
extracted from georeferenced stratigraphic section drawings. It overlays each polygon on its corresponding
section drawing (based on template matching and ROI extraction), and allows the user to:

- Enter the correct layer number (`Layer_No`)
- Assign the `Section_ID` based on the filename
- Mark all layers in a section as valid or erroneous (`Error` column)

To use this script correctly, the user must define user settings (see below)

The script uses a pre-existing shapefile that already contains the polygons and a `Raster` column linking each polygon
to its source raster image. Visual verification of geometry placement is done through matplotlib overlays.

Notes:
- The template must match the scale bar appearance and size used in the dataset.
- Ensure the shapefile contains or allows adding columns: `Layer_No`, `Section_ID`, `Error`.
- Polygons marked with `Layer_No = 0` are removed from the final shapefile upon saving.
"""
import os
import time
import cv2
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import Polygon
import rasterio
from rasterio.plot import reshape_as_image

# ---------------------------------------------
# USER SETTINGS (EDIT BEFORE RUNNING)
# ---------------------------------------------
input_folder = r"C:\path_to_imput_folder" # Path to the folder containing the rectified GeoTIFFs
shapefile_path = r"C:\path_to_LAYER.shp" #Path to the shapefile with polygon geometries and the "Raster" column
template_path = r"C:\path_to_template" #Image of the 1m scale bar used to locate the drawing area for ROI extraction

roi_width = 300 # This defines how wide the cropped drawing area will be. Adjust based on the form layout.
scale_template_resize_factor = 2.35 # Must match the scale appearance in the rectified drawings. Try adjusting if template matching fails.
roi_x_offset_pixels = 0  # Optional horizontal offset (in pixels) applied to shift the ROI position further to the right of the scale bar. Use if the ROI is misaligned due to form variation.
# ---------------------------------------------
# FUNCTIONS
# ---------------------------------------------
def template_matching(img, template_path):
    """Locate the position of the scale bar in a raster image using template matching"""
    template = cv2.imread(template_path, 0)
    template = cv2.resize(template, (
        int(template.shape[1] * scale_template_resize_factor),
        int(template.shape[0] * scale_template_resize_factor)))
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    result = cv2.matchTemplate(img_gray, template, cv2.TM_CCOEFF_NORMED)
    _, _, _, max_loc = cv2.minMaxLoc(result)
    return max_loc, template.shape[::-1]

def extract_roi(img, scale_loc, scale_size, roi_width):
    """Extract the Region of Interest (ROI) to the right of the scale bar."""
    x, y = scale_loc
    t_w, t_h = scale_size
    roi_x = x + t_w + roi_x_offset_pixels
    roi_y = y
    roi = img[roi_y:roi_y + t_h, roi_x:roi_x + roi_width]
    return roi, (roi_x, roi_y, t_h, roi_width)

def overlay_geometry_on_roi(roi, geom, transform, roi_origin):
    """Plot a shapely polygon geometry overlaid on the ROI image for visual inspection."""
    fig, ax = plt.subplots(figsize=(5, 6))
    ax.imshow(cv2.cvtColor(roi, cv2.COLOR_BGR2RGB))

    # Transform polygon coordinates to image space
    inv_transform = ~transform
    roi_x, roi_y, _, _ = roi_origin

    if isinstance(geom, Polygon):
        x, y = geom.exterior.coords.xy
        px = []
        py = []
        for xi, yi in zip(x, y):
            col, row = inv_transform * (xi, yi)
            px.append(col - roi_x)
            py.append(row - roi_y)
        ax.plot(px, py, color='red', linewidth=2)

    ax.axis('off')
    plt.tight_layout()
    plt.show()
# ---------------------------------------------
# MAIN PROCESS
# ---------------------------------------------
def main():
    start_time = time.time()
#    layer_records = []

    gdf = gpd.read_file(shapefile_path)

    # Add columns if not exist
    for col in ["Layer_No", "Section_ID", "Error"]:
        if col not in gdf.columns:
            gdf[col] = None

    rasters = gdf["Raster"].unique()

    for filename in sorted(os.listdir(input_folder)):
        if not filename.endswith(".tif"):
            continue

        raster_name = os.path.splitext(filename)[0]
        matching_polys = gdf[gdf["Raster"] == raster_name]

        if matching_polys.empty:
            print(f"⚠️ No polygons found for raster: {raster_name}")
            continue

        print(f"🔍 Processing {raster_name} with {len(matching_polys)} layers")

        raster_path = os.path.join(input_folder, filename)
        
        with rasterio.open(raster_path) as src:
            transform = src.transform
            img = reshape_as_image(src.read())
            if img.shape[2] == 1:
                img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)

        scale_loc, scale_size = template_matching(img, template_path)
        roi, roi_origin = extract_roi(img, scale_loc, scale_size, roi_width)

        for idx in matching_polys.index:
            polygon = gdf.loc[idx, "geometry"]
            overlay_geometry_on_roi(roi, polygon, transform, roi_origin)

            while True:
                layer_input = input("Layer number (or press Enter to skip): ").strip()
                if layer_input == "":
                    gdf.loc[idx, "Layer_No"] = 0
                    break
                elif layer_input.isdigit():
                    gdf.loc[idx, "Layer_No"] = int(layer_input)
                    break
                else:
                    print("Invalid input. Please enter a number.")

            section_id = ''.join(filter(str.isdigit, raster_name))
            gdf.loc[idx, "Section_ID"] = section_id

        confirm = input(f"✅ Confirm {raster_name} (Enter = OK, 'e' = mark all as error): ").strip().lower()
        if confirm == 'e':
            gdf.loc[gdf["Raster"] == raster_name, "Error"] = "Error"
        else:
            gdf.loc[gdf["Raster"] == raster_name, "Error"] = "OK"

    # Remove polygons with Layer_No = 0
    gdf = gdf[gdf["Layer_No"] != 0]

    # Save changes
    gdf.to_file(shapefile_path, driver="ESRI Shapefile")
    elapsed = time.time() - start_time
    print(f"✅ Updated and saved shapefile: {shapefile_path}")
    print(f"⏱️ Elapsed time: {elapsed:.2f} seconds")

if __name__ == "__main__":
    main()
