In [1]:
import os, sys, tempfile, getpass
from pathlib import Path
QGIS_PREFIX       = r"C:\Users\B464518\AppData\Local\Programs\OSGeo4W"   # QGIS install folder
DEM               = r"C:\Users\B464518\drainage\raster\arcticDEM_500m_ice_sheet.tif"   # DEM
icesheet          = r"C:\Users\B464518\drainage\vector\02_PROMICE-2022-IceMask-polygon.gpkg"
OUT               = r"C:\Users\B464518\drainage\output\moede_mandag" 
STREAM_THRESHOLD  = 200          # Ved subglaciale bassiner: anden opløsning af dem, hæv threshold til tilsvarende m2.

# ------------------
    # Importér+definér DEM og maske og set region
GISBASE = fr"{QGIS_PREFIX}\apps\grass\grass84"
os.environ["GISBASE"]      = GISBASE
os.environ["GRASS_PYTHON"] = sys.executable
os.environ["PROJ_LIB"]     = fr"{QGIS_PREFIX}\share\proj"
os.environ["GDAL_DATA"]    = fr"{QGIS_PREFIX}\share\gdal"
os.environ["PATH"] = os.pathsep.join([
    fr"{GISBASE}\bin", fr"{GISBASE}\extrabin",
    fr"{QGIS_PREFIX}\bin", fr"{QGIS_PREFIX}\apps\Qt5\bin",
    os.environ["PATH"],
])
sys.path.insert(0, fr"{GISBASE}\etc\python")

import grass.script as gs
import grass.script.setup as gsetup

Path(OUT).mkdir(parents=True, exist_ok=True)

def start_grass_from_raster(raster_path, location="dem_loc", mapset="PERMANENT"):
    """Create/ensure LOCATION from DEM, init session by MAPSET path."""
    gisdbase = os.path.join(tempfile.gettempdir(), f"grassdata_{getpass.getuser()}")
    Path(gisdbase).mkdir(parents=True, exist_ok=True)
    if not (Path(gisdbase)/location).exists():
        gs.core.create_location(dbase=gisdbase, location=location, filename=raster_path, overwrite=True)
    gsetup.init(path=str(Path(gisdbase)/location/mapset))
    return gisdbase, location, mapset
    
def import_dem_native(input_path, out_name="dem"):
    """Import or clone to a true GRASS raster (some modules dislike r.external)."""
    try:
        gs.run_command("r.in.gdal", input=input_path.replace("\\","/"), output=out_name, flags="o", overwrite=True)
        print(f"✓ r.in.gdal → {out_name}")
    except Exception:
        gs.run_command("r.external", input=input_path.replace("\\","/"), output=f"{out_name}_ext", flags="o", overwrite=True)
        gs.run_command("g.region", raster=f"{out_name}_ext")
        gs.mapcalc(f"{out_name} = {out_name}_ext * 1.0", overwrite=True)
        print(f"✓ r.external + clone → {out_name}")
    gs.run_command("g.region", raster=out_name)


def have(module_name):
    from shutil import which
    return which(module_name) is not None

# === 1) Start GRASS session and import DEM ===
GISDBASE, LOCATION, MAPSET = start_grass_from_raster(DEM)
import_dem_native(DEM, out_name="dem")  # creates native GRASS raster 'dem'
##GRASS check before starting
version_info = gs.read_command("g.version")
print(version_info)

✓ r.in.gdal → dem
GRASS 8.4.1 (2025)



In [2]:
#---------- Maske ----------#
gs.run_command('v.import', input=icesheet.replace("\\","/"), output='icesheet_mask', overwrite=True)
gs.run_command('r.mask',
                vector='icesheet_mask',
                maskcats=None,
                overwrite=True)

## Udregning af hydraulisk potentiale (ikke relevant for supraglacialt) ## 

In [35]:
import geopandas as gpd

# Input
ice_file = r"C:\Users\B464518\drainage\raster\bedmachine_surface.tif"
bed_file = r"C:\Users\B464518\drainage\raster\bedmachine_bed.tif"
gs.run_command('r.import', input=bed_file, output='bed_raster', overwrite=True)
gs.run_command('r.import', input=ice_file, output='ice_raster', overwrite=True)

# Output
output_file = r"C:\Users\B464518\drainage\raster\hydraulic_potential.tif"

#----------smoothing (kan udlades)-----------#
gs.run_command('r.neighbors',
                input="ice_raster", #skift til 'dem' ved overfladebassiner
                output='dem_smoothed',
                method='average',
                size=15,
                overwrite=True)

# variable
rho_w = 1000    # Densitet vand (kg/m³)
rho_i = 917     # Densitet is (kg/m³)
g = 9.81        # tyngdeacceleration (m/s²)

# Udregn hydr. pot (indenfor masken)
gs.mapcalc(f"hydraulic_potential = (({rho_w} * {g} * bed_raster) + ({rho_i} * {g} * (dem_smoothed-bed_raster)))/1000000", overwrite=True)

# Eksporter
gs.run_command('r.out.gdal', input='hydraulic_potential', output=output_file, format='GTiff', type ='Float64', overwrite=True)

In [None]:
## CALCULATE STREAMS AND ACCUMULATION MAP ##

# create accumulation map explicitly
gs.run_command('r.watershed', 
               elevation='dem', 
               accumulation='accum', 
               overwrite=True) 


#----------Fill sinks + flow direction----------#
gs.run_command('r.fill.dir',
                input='dem',
                output='dem_filled',
                direction='flow_dir',
                overwrite=True)

#----------Extract streams----------# 
gs.run_command('r.stream.extract', 
               elevation='dem_filled', 
               direction='flow_dir', 
               accumulation="accum", 
               threshold=STREAM_THRESHOLD, d8cut=0, mexp=0.0, #dette er de eneste variable, der eksplicit defineres i dette script. 
               stream_raster='streams', 
               stream_vector='streams_vect', overwrite=True) 

## OUTLET DEFINITIONS ##

#----------Define stream orders to get the outlet streams (where next_stream=-1)----------# 
gs.run_command('r.stream.order', 
               stream_rast='streams', 
               direction='flow_dir', 
               elevation='dem_filled', 
               accumulation ='accum', 
               strahler='strahler', 
               stream_vect='streams_vect',
               overwrite=True) 

#----------Extract only streams with no next_stream ----------# 
gs.run_command('v.extract', 
               input='streams_vect', 
               output='outlet_segment', 
               where="next_stream = -1", 
               overwrite=True) 
#----------Get the end points of those streams ----------# 
gs.run_command('v.to.points', 
               input='outlet_segment', 
               output='outlet_point', 
               use='end', # downstream end 
               flags='r', 
               overwrite=True)
#----------export to gpkg to work w geopandas ----------# 
gs.run_command('v.out.ogr', input ='outlet_point', format ='GPKG', output = 'outlets.gpkg', overwrite=True)

import geopandas as gpd
# 1. Read the GeoPackage
gdf = gpd.read_file("outlets.gpkg")


# 2. Add a new column with running numbers starting at 1
gdf['new_id'] = range(1, len(gdf) + 1)

# 2. Keep only one point per stream (lowest id is apparently always the start of the stream, and the highest is the end of each stream)
gdf_clean = gdf.loc[gdf.groupby('stream')['new_id'].idxmax()]

# 3. Save cleaned outlets back to a new GeoPackage
gdf_clean.to_file(fr"{OUT}\outlets_clean.gpkg", driver="GPKG")

gs.run_command('v.import', input='outlets_clean.gpkg', output='outlet_point_clean', overwrite=True)

## BASINS DELINEATION ##

# Delineate basins using points. If you would rather do it without the whole outlet extraction, use the streams raster as input instead. the r.stream.basins documentation describes it very nicely.
gs.run_command('r.stream.basins',
               direction='flow_dir',
               basins='basins',
               points='outlet_point_clean',
               overwrite=True)

In [5]:
#----------eksporter----------#
for name, fn in [
    ("dem", fr"{OUT}\dem_arctic_200.tif"),
    ("accum", fr"{OUT}\accum__arctic_200.tif"),
    ("flow_dir", fr"{OUT}\flowdir_arctic_200.tif"),
    ("streams", fr"{OUT}\streams_arctic_200.tif"),
    ("basins", fr"{OUT}\basins_arctic_200.tif"),
]:
    gs.run_command("r.out.gdal", input=name, output=fn.replace("\\","/"), format="GTiff",
                   createopt="COMPRESS=LZW", overwrite=True)

## Script from Rasmus to merge basins that are too small and assign cells with no basin to nearest basin. 

In [6]:
def _parse_stat_value(tok: str):
    """Parse r.stats category token which can be '13', '13.0', or '13-13'."""
    tok = tok.strip()
    if tok in ("", "*"):
        return None
    if "-" in tok:                  # e.g., '13-13'
        tok = tok.split("-", 1)[0]  # take the left edge
    try:
        return int(round(float(tok)))
    except Exception:
        return None

dem = 'hydraulic_potential'
MIN_BASIN_SIZE_KM2 = 5      # set your threshold (e.g., 10000 km²)
res = 500                      # DEM working resolution (m)

# Optional: exclave cleanup after merge. Set 0 to disable, or a smaller area than MIN_BASIN_SIZE_KM2.
EXCLAVE_MAX_KM2   = 10         # e.g., 50 to only remove tiny specks; 0 disables
MAX_EXCLAVE_ITERS = 4           # safety cap
# ===============================================

# Force int labels, mask to DEM extent
gs.mapcalc("basins0 = int(basins)", overwrite=True)
gs.mapcalc("basins0 = if(isnull(dem), null(), basins0)", overwrite=True)

# Thresholds in cells
cell_area_km2 = (res*res)/1e6
min_cells     = int(round(MIN_BASIN_SIZE_KM2 / cell_area_km2))
exclave_cells = int(round(EXCLAVE_MAX_KM2 / cell_area_km2)) if EXCLAVE_MAX_KM2 > 0 else 0
print(f"→ Merge threshold: {MIN_BASIN_SIZE_KM2} km² ≈ {min_cells} cells at {res} m")
if exclave_cells > 0:
    print(f"→ Exclave cleanup threshold: {EXCLAVE_MAX_KM2} km² ≈ {exclave_cells} cells")

# ---------- Split big vs small by basin area ----------
stats = gs.read_command("r.stats", input="basins0", flags="cn", separator=",").strip().splitlines()
if not stats:
    raise RuntimeError("No basins found within DEM extent.")
sizes = {int(cat): int(n) for cat, n in (ln.split(",") for ln in stats)}
big_ids   = {cat for cat, n in sizes.items() if n >= min_cells}
small_ids = sorted(set(sizes) - big_ids)

if not big_ids:
    largest = max(sizes.items(), key=lambda kv: kv[1])[0]
    big_ids = {largest}
    small_ids = sorted(set(sizes) - big_ids)
    print(f"ℹ️ All basins < threshold; seeding with largest basin {largest}")

print(f"→ Big basins: {len(big_ids)}  |  Small basins: {len(small_ids)}")

# If nothing to merge → carry forward
if not small_ids:
    gs.mapcalc("basins_after_merge = basins0", overwrite=True)
else:
    # 1) big-only raster (others NULL)
    rules_big = Path(OUT)/"big_reclass.txt"
    with open(rules_big, "w", encoding="utf-8") as f:
        for cat in sizes:
            f.write(f"{cat} = {cat}\n" if cat in big_ids else f"{cat} = NULL\n")
    gs.run_command("r.reclass", input="basins0", output="big_only",
                   rules=str(rules_big), overwrite=True)

    # 2) nearest big ID per cell
    gs.run_command("r.grow.distance", input="big_only", value="nearest_big_id",
                   flags="m", overwrite=True)

    # 3) For each small basin, pick ONE big neighbour by majority (mode) of nearest_big_id *within that basin*
    #    Build reclass rules: small_id = chosen_big_id; big_id = big_id (identity)
    rules_path = Path(OUT) / "whole_basin_reclass.txt"
    with open(rules_path, "w", encoding="utf-8") as f:
        # identity for bigs
        for bid in big_ids:
            f.write(f"{bid} = {bid}\n")
    
        # choose one neighbour per small basin by majority vote
        for sid in small_ids:
            masked = f"nbid_{sid}"
            gs.mapcalc(f"{masked} = if(basins0 == {sid}, nearest_big_id, null())", overwrite=True)
    
            # Exclude NULLs with 'N' flag; request category and count
            lines = gs.read_command("r.stats", input=masked, flags="cnN", separator=",").strip().splitlines()
            if not lines:
                # degenerate: no vote — keep itself
                f.write(f"{sid} = {sid}\n")
                continue
    
            counts = []
            for ln in lines:
                val_s, cnt_s = ln.split(",")
                val = _parse_stat_value(val_s)
                if val is None:
                    continue
                try:
                    cnt = int(cnt_s)
                except ValueError:
                    continue
                counts.append((val, cnt))
    
            if not counts:
                # still nothing usable; keep itself
                f.write(f"{sid} = {sid}\n")
                continue
    
            chosen = max(counts, key=lambda vc: vc[1])[0]
            f.write(f"{sid} = {chosen}\n")


    # 4) Reclass whole basins in one shot (guarantees NO splitting)
    gs.run_command("r.reclass", input="basins0", output="basins_after_merge",
                   rules=str(rules_path), overwrite=True)

# ---------- Full DEM coverage (fill any NULLs) ----------
gs.mapcalc("basins_after_merge = if(isnull(dem), null(), basins_after_merge)", overwrite=True)
gs.run_command("r.grow.distance", input="basins_after_merge", value="fill_from",
               flags="m", overwrite=True)
gs.mapcalc("basins_filled = if(isnull(basins_after_merge) && !isnull(dem), fill_from, basins_after_merge)",
           overwrite=True)

# ---------- Optional: Iterative EXCLAVE cleanup (small disconnected patches) ----------
current = "basins_filled"
if exclave_cells > 0:
    for it in range(1, MAX_EXCLAVE_ITERS+1):
        print(f"\n→ Exclave pass {it}")
        gs.run_command("r.clump", input=current, output="clumps", overwrite=True)
        cstats = gs.read_command("r.stats", input="clumps", flags="cn",
                                 separator=",").strip().splitlines()
        if not cstats:
            print("  (no clumps?)")
            break
        small_clump_ids = [int(cid) for cid, n in (ln.split(",") for ln in cstats)
                           if int(n) < exclave_cells]
        print(f"  small clumps found: {len(small_clump_ids)}")

        if not small_clump_ids:
            print("  ✓ no exclaves under threshold remain; stopping")
            break

        expr = " || ".join([f"clumps == {cid}" for cid in small_clump_ids])
        gs.mapcalc(f"smallmask = if({expr}, 1, null())", overwrite=True)
        gs.mapcalc(f"{current}_nulled = if(!isnull(smallmask), null(), {current})", overwrite=True)

        gs.run_command("r.grow.distance", input=f"{current}_nulled", value="refill",
                       flags="m", overwrite=True)
        gs.mapcalc(f"{current} = if(isnull({current}_nulled) && !isnull(dem), refill, {current}_nulled)",
                   overwrite=True)
else:
    print("→ Exclave cleanup disabled (EXCLAVE_MAX_KM2=0).")

# ---------- Final export ----------
final_map = current
gs.mapcalc("basins_merged_final = int({})".format(final_map), overwrite=True)
gs.run_command("r.out.gdal",
               input="basins_merged_final",
               output=str(Path(OUT)/"basins_merged_arctic_200.tif"),
               format="GTiff", createopt="COMPRESS=LZW", overwrite=True)

print("✅ Done →", Path(OUT)/"basins_merged_arctic_200.tif")


→ Merge threshold: 5 km² ≈ 20 cells at 500 m
→ Exclave cleanup threshold: 10 km² ≈ 40 cells
→ Big basins: 117  |  Small basins: 250

→ Exclave pass 1
  small clumps found: 1094

→ Exclave pass 2
  small clumps found: 979

→ Exclave pass 3
  small clumps found: 963

→ Exclave pass 4
  small clumps found: 956
✅ Done → C:\Users\B464518\drainage\output\moede_mandag\basins_merged_arctic_200.tif
