In [1]:
!uv pip install -U "kelp-o-matic==0.14.0rc2" tqdm rasterio shapely fiona polars

[2mUsing Python 3.12.3 environment at: /home/taylor/PycharmProjects/hakai-ml-train/.venv[0m
[2K[2mResolved [1m50 packages[0m [2min 107ms[0m[0m                                        [0m
[2mAudited [1m50 packages[0m [2min 0.11ms[0m[0m


In [2]:
from pathlib import Path

import polars as pl
from kelp_o_matic import model_registry, __version__ as komversion
from tqdm.auto import tqdm
import rasterio
import rasterio.features
import fiona
from shapely.geometry import shape

In [4]:
# Use polars to read a Google sheet to get paths to files to process

df = (
    pl
        .read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vRsZ2sRPsuAcnWf-fErM5OYPH688QkLfRSkjnoARz0giaoIzx1wHmyR1nvWsvYjRw/pub?gid=1820174638&single=true&output=csv")
        .filter(pl.col("Type") == "kelp")
        .filter(pl.col("Ortho georeferenced") == "Y")
        .select(
            site=pl.col("Site name"),
            mobe_id=pl.col("MOBE"),
            img_type=pl.col("RGB/MS"),
            path=(
                pl.col("location for KoM")
                    .str.replace_all(r"\\",r"/")
                    .str.replace_all("/10.8.1.30","mnt")
                    .str.replace("H:","/mnt/geospatial")
                    .str.replace("U:","/mnt/aco-uvic/")
                    .str.replace_all("\"","")
                    .str.strip_chars()
            ),
            notes=pl.col("Notes")
        )
)
aco_df = df.filter(pl.col("mobe_id") == "ACO")
df = df.filter((pl.col("mobe_id") != "ACO") | pl.col("mobe_id").is_null())
df

site,mobe_id,img_type,path,notes
str,str,str,str,str
"""Breaker""",,"""RGB ""","""/mnt/geospatial/Internal/RS/UA…",
"""Breaker""",,"""MS""","""/mnt/geospatial/Internal/RS/UA…",
"""Manley/Womanley""",,"""RGB ""","""/mnt/geospatial/Internal/RS/UA…",
"""Manley/Womanley""",,"""MS""","""/mnt/geospatial/Internal/RS/UA…",
"""Spider HIRMD""",,"""RGB ""","""/mnt/geospatial/Internal/RS/UA…",
…,…,…,…,…
"""West Beach/2nd Beach""","""U1413""","""RGB ""","""/mnt/geospatial/Internal/RS/UA…","""F6 2.1 m"""
"""West Beach/2nd Beach""","""U1437""","""RGB ""","""/mnt/geospatial/Internal/RS/UA…","""2.1 m July 28th"""
"""Dublin Bay""","""U1440""","""RGB ""","""/mnt/geospatial/Internal/RS/UA…","""H:\Internal\RS\Historical Air …"
"""NW Calvert - sunny""","""U1439""","""RGB ""","""/mnt/geospatial/Internal/RS/UA…",


In [5]:
rgb_paths = [Path(r[0]) for r in (
    df
        .filter(pl.col("img_type").str.contains("RGB"))
        .select(pl.col("path"))
        .iter_rows()
)]
print(f"{len(rgb_paths)} RGB images")

ms_paths = [Path(r[0]) for r in (
    df
        .filter(pl.col("img_type").str.contains("MS"))
        .select(pl.col("path"))
        .iter_rows()
)]
print(f"{len(ms_paths)} MS images")

aco_paths = [Path(r[0]) for r in (
    aco_df
        .filter(pl.col("img_type").str.contains("MS"))
        .select(pl.col("path"))
        .iter_rows()
)]
print(f"{len(aco_paths)} ACO images")

42 RGB images
19 MS images
3 ACO images


In [38]:
def get_outpath(in_path: Path, crop_size=2048, version="0.14.0") -> Path:
    version_string = "_".join(version.split(".")[:3])
    return in_path.parent.parent / "KoM_Outputs" / f"{in_path.stem}_kelp_kom_{version_string}_cs{crop_size}.tif"

def get_aco_outpath(in_path: Path, crop_size=2048, version="0.14.0") -> Path:
    version_string = "_".join(version.split(".")[:3])
    return in_path.with_name(f"{in_path.stem}_kelp_kom_{version_string}_cs{crop_size}.tif")

def convert_to_shapefile(inpath: Path, outpath: Path, version: str):
    schema={
      'geometry': 'Polygon', 
      'properties': {
          'species': 'str:32', 
          'value': 'int'
      }
    }
    
    with rasterio.open(inpath) as ds:
        raster = ds.read(1)
        mask = raster != 0
        crs = ds.crs
        transform = ds.transform
    
    with fiona.open(str(outpath), 'w', crs=crs, driver='ESRI Shapefile', schema=schema) as output:
        shape_gen = rasterio.features.shapes(raster, mask=mask, transform=transform)
        for polygon, value in tqdm(shape_gen, desc="Converting to shapefile", leave=False):
            if version >= "0.14.0":
                macro_id = 1
            else:
                macro_id = 2
            value = int(value)
            output.write({
                'geometry': shape(polygon), 
                'properties': {
                    "species": "Macrocystis pyrifera" if value == macro_id else "Nereocystis luetkeana",
                    "value": value
                }
            })

## Classify all RGB Images

In [None]:
model = model_registry["kelp-rgb"]
crop_size = 3072

for p in tqdm(rgb_paths, desc="RGB images"):
    print(p.name)
    outpath = get_outpath(p, crop_size, komversion)
    shppath = outpath.with_suffix(".shp")

    # Don't reprocess if already done
    if shppath.exists():
        print(f"Output {outpath} already exists, skipping")
        continue
        
    # Create raster output
    model.process(
        img_path=p,
        output_path=outpath,
        crop_size=crop_size
    )

    # Create the shapefile
    convert_to_shapefile(outpath, shppath, komversion)

## Classify all MS Images

In [None]:
model = model_registry["kelp-rgbi"]
crop_size = 3072

for p in tqdm(ms_paths, desc="MS images"):
    print(p.name)
    outpath = get_outpath(p, crop_size, komversion)
    outpath = outpath.with_name(f"{outpath.stem}_b3215.tif")
    shppath = outpath.with_suffix(".shp")

    # Don't reprocess if already done
    if shppath.exists():
        print(f"Output {outpath} already exists, skipping")
        continue
        
    # Create raster output
    model.process(
        img_path=p,
        output_path=outpath,
        crop_size=crop_size,
        band_order=[3,2,1,5]
    )

    # Create the shapefile
    convert_to_shapefile(outpath, shppath, komversion)

## ACO Images

In [None]:
model = model_registry["kelp-rgbi"]
crop_size = 3072

for p in tqdm(aco_paths, desc="ACO images"):
    print(p.name)
    outpath = get_aco_outpath(p, crop_size, komversion)
    outpath = outpath.with_name(f"{outpath.stem}_b3214.tif")
    shppath = outpath.with_suffix(".shp")

    # Don't reprocess if already done   
    if shppath.exists():
        print(f"Output {outpath} already exists, skipping")
        continue

    # Create raster output
    model.process(
        img_path=p,
        output_path=outpath,
        crop_size=crop_size,
        band_order=[3,2,1,4]
    )

    # Create the shapefile
    convert_to_shapefile(outpath, shppath.with_suffix(".shp"), komversion)