In [31]:
import rasterio
import numpy as np
import pandas as pd
from scipy.spatial import distance
from tqdm import tqdm

In [32]:
LULC_FILE = "../../data/raw/LULC/uk_lulc.tif"
LULC_OUT_TIF_FILE = "../../data/preprocessed/LULC/uk_lulc_classified.tif"
LULC_OUT_CSV_LEGEND_FILE = "../../data/preprocessed/LULC/uk_lulc_legend.csv"

In [33]:
# === 1️⃣ Load RGB raster ===
with rasterio.open(LULC_FILE) as src:
    img = src.read([1, 2, 3])  # RGB bands only
    profile = src.profile

In [34]:
rgb = np.transpose(img, (1, 2, 0))  # (H, W, 3)
H, W, _ = rgb.shape
print(f"Loaded image: {H}x{W}")

Loaded image: 10000x10000


In [35]:
legend = {
    (255, 0, 0): "Builtup, Urban",
    (168, 0, 0): "Builtup, Rural",
    (200, 133, 68): "Builtup, Mining",
    (255, 255, 115): "Agriculture, Crop land",
    (253, 243, 23): "Agriculture, Plantation",
    (255, 255, 181): "Agriculture, Fallow",
    (38, 115, 0): "Forest, Evergreen / Semi evergreen",
    (80, 187, 62): "Forest, Deciduous",
    (121, 200, 0): "Forest, Forest Plantation",
    (150, 231, 138): "Forest, Scrub Forest",
    (76, 230, 166): "Forest, Swamp / Mangroves",
    (181, 214, 41): "Grass / Grazing",
    (210, 10, 255): "Wasteland, Gullied / Ravinous Land",
    (255, 30, 250): "Wasteland, Scrub land",
    (229, 207, 255): "Wasteland, Sandy area",
    (255, 150, 232): "Wasteland, Barren rocky",
    (0, 168, 132): "Wetland, Inland Wetland",
    (0, 61, 222): "Waterbody, River / Stream / Canal",
    (99, 153, 255): "Waterbody, Reservoir / Lakes / Ponds",
    (225, 225, 225): "Snow and Glacier"
}

legend_colors = np.array(list(legend.keys()))

In [36]:
# === 3️⃣ Extract unique colors from the image ===
flat_rgb = rgb.reshape(-1, 3)
unique_colors, inverse = np.unique(flat_rgb, axis=0, return_inverse=True)

In [37]:
# Remove pure black background if present
valid_mask = np.any(unique_colors != [0, 0, 0], axis=1)
unique_colors = unique_colors[valid_mask]
print(f"Unique colors found (excluding black): {len(unique_colors)}")

Unique colors found (excluding black): 95238


In [38]:
# === 4️⃣ Vectorized nearest color matching ===
print("Mapping colors to LULC classes...")
distances = distance.cdist(unique_colors, legend_colors)
nearest_indices = np.argmin(distances, axis=1)
min_distances = np.min(distances, axis=1)

Mapping colors to LULC classes...


In [39]:
# Optional: apply tolerance threshold (in RGB units)
tolerance = 25
unknown_mask = min_distances > tolerance
nearest_indices[unknown_mask] = -1  # Mark as unknown

In [40]:
# Map color → class_id
class_ids = []
for i, idx in enumerate(nearest_indices):
    if idx == -1:
        class_ids.append(0)  # Unknown
    else:
        mapped_color = tuple(legend_colors[idx])
        class_ids.append(list(legend.keys()).index(mapped_color) + 1)

color_to_id = {tuple(k): v for k, v in zip(unique_colors, class_ids)}

In [41]:
# === 5️⃣ Create classified map ===
classified = np.array(
    [color_to_id.get(tuple(c), 0) for c in flat_rgb],
    dtype=np.uint8
).reshape(H, W)

In [43]:
# === 6️⃣ Save classified raster ===
profile.update(
    count=1,
    dtype=rasterio.uint8,
    compress='lzw'
)

with rasterio.open(LULC_OUT_TIF_FILE, "w", **profile) as dst:
    dst.write(classified, 1)

print("Saved: "+LULC_OUT_TIF_FILE)

Saved: ../../data/preprocessed/LULC/uk_lulc_classified.tif


In [44]:
# === 7️⃣ Export legend CSV ===
df = pd.DataFrame({
    "Class_ID": range(1, len(legend) + 1),
    "Class_Name": list(legend.values()),
    "RGB": [f"{r},{g},{b}" for (r, g, b) in legend.keys()]
})
df.loc[len(df)] = [0, "Unknown/Unmapped", "NA"]
df.to_csv(LULC_OUT_CSV_LEGEND_FILE, index=False)

print("Saved: lulc_class_legend.csv")
print("✅ Classification complete!")


Saved: lulc_class_legend.csv
✅ Classification complete!
