In [1]:
import geopandas as gpd
import pandas as pd
import seabee_annotation as anno

## 1. Create class definition file

Create a class definition file compatible with ArcGIS Pro from an Excel table. The Excel file should be structured with four columns, as in the example below:

|    **A**    |      **B**      |      **C**      |                      **D**                     |
|:-----------:|:---------------:|:---------------:|:----------------------------------------------:|
| **Level_1** |   **Level_2**   |   **Level_3**   |                 **Level3_Desc**                |
|    ALGAE    |       RED       |      PALPA      |             Palmaria palmata (søl)             |
|    ALGAE    |       RED       |      VERLA      | Vertebrata lanosa (grisetangdokke/trøffeltang) |
|    ALGAE    |       TURF      |       TURF      |             Unspecified turf (lurv)            |
|    MAERL    |      MAERL      |      MAERL      |     Unspecified maerl (rugl, Lithothamnion)    |
|    URCHIN   |      URCHIN     |      ECHES      |       Echinus esculentus (rød kråkebolle)      |
|  BEACHCAST  | BEACHCAST_BROWN | BEACHCAST_BROWN |      Unspecified, dried seaweed (Tangvoll)     |
|  BEACHCAST  | BEACHCAST_ANGIO | BEACHCAST_ANGIO |    Unspecified, dried seagass (sjøgressvoll)   |
|    ANGIO    |      ANGIO      |      ZOSMA      |            Zostera marina (ålegras)            |

The output is a `.ecs` file (JSON) that can be loaded into the Training Samples Manager.

In [2]:
df = pd.read_excel(
    "../class_definitions/seabee_habitat_classes_v0-1.xlsx", usecols="A:D"
)
df.head()

Unnamed: 0,1st level,2nd level,3rd level,Species/habitat
0,ALGAE,BROWN,ALAES,Alaria esculenta (butare)
1,ALGAE,BROWN,CHOFI,Chorda filum (martaum)
2,ALGAE,BROWN,DESVI,Desmarestia viridis (mykt kjerringhår)
3,ALGAE,BROWN,LAMHY,Laminaria hyperborea (stortare)
4,ALGAE,BROWN,LAMDI,Laminaria digitata (fingertare)


In [3]:
name = "seabee_class_definitions_v0-1"
desc = "v0.1 of the SeaBee class definition file. Created 07.12.2022."

# Assign colours for levels 1 and 2; level 3 is assigned randomly
colour_dict = {
    0: {
        "ALGAE": "#0091ff",
        "MAERL": "#757472",
        "URCHIN": "#ff0000",
        "BEACHCAST": "#e67b09",
        "ANGIO": "#2da12b",
        "WOOD": "#a1952b",
        "GRASS": "#c1f507",
        "ROCK": "#b5b5b3",
        "SEDIMENT": "#ffff00",
        "ANTHRO": "#000000",
    },
    1: {
        "BROWN": "#6e5400",
        "GREEN": "#19b01b",
        "RED": "#912727",
        "TURF": "#9e219a",
        "MAERL": "#757472",
        "URCHIN": "#ff0000",
        "BEACHCAST_BROWN": "#e67b09",
        "BEACHCAST_ANGIO": "#03fcba",
        "ANGIO": "#2da12b",
        "WOOD": "#a1952b",
        "GRASS": "#c1f507",
        "BOULDER": "#b5b5b3",
        "COBBLE": "#b5b5b3",
        "GRAVEL": "#b5b5b3",
        "SAND": "#ffff00",
        "MUD": "#ffb700",
        "ANTHRO": "#000000",
    },
    2: {},
}

out_fold = r"../class_definitions"
anno.class_definition_from_df(
    df,
    name,
    out_fold=out_fold,
    version=1,
    org="NIVA",
    desc=desc,
    colour_dict=colour_dict,
)

## 2. Merge shapefiles

During annotation, **all users should work with the same class definition file** (i.e. the `.ecs` created above). Annotations from all users for the same area can then be exported as shapefiles and added to a single folder. As long as the same class definition file has been used by everyone, the shapefiles will have a consistent structure with the same classes. These can therefore be merged and "dissolved" to create a single annotation dataset for the whole area.

In [4]:
# Merge and dissolve all shapefiles in folder
shp_fold = "../vector/annotation_by_subarea"
gdf = anno.merge_shapefiles(shp_fold)
gdf.head()

Unnamed: 0,Classcode,geometry,Classname,Classvalue,RED,GREEN,BLUE,Count
0,1010,MULTIPOLYGON Z (((327684.077 6919066.286 0.000...,BROWN,1010,110,84,0,140424
1,101015,"POLYGON Z ((327921.227 6919074.341 0.000, 3279...",PELCA,101015,94,185,211,148
2,101017,MULTIPOLYGON Z (((327788.619 6919071.256 0.000...,FUCSE,101017,193,161,100,4110
3,101018,MULTIPOLYGON Z (((327778.052 6919076.088 0.000...,ASCNO,101018,106,71,53,223666
4,101020,MULTIPOLYGON Z (((327883.738 6919069.758 0.000...,FUCVE,101020,194,36,204,1675


## 3. Rebuild the class hierarchy

ArcGIS Pro stores all annotation in a single field (`Classname`), regardless of the level in the `.ecs` hierarchy. For machine learning, it is better to have one column of class labels per level, as this makes it easier to generate raster training datasets using labels for any level. To get around this limitation, `class_definition_from_df` embeds the hierarchy in the `Classcode`, so that it can be reconstructed afterwards. This is done by the function below.

In [5]:
# Extract annotation levels to separate columns
gdf = anno.rebuild_class_hierarchy(gdf, f"{name}.ecs")

# Save
shp_path = "../vector/2022-08-31_Remoy_Annotation_Merged_v0-1.shp"
gdf.to_file(shp_path, index=False)

gdf.head()

Unnamed: 0,lev1_code,lev2_code,lev3_code,lev1_name,lev2_name,lev3_name,lev3_desc,red,green,blue,count,geometry
0,10,1010,1010--,ALGAE,BROWN,,,110,84,0,140424,MULTIPOLYGON Z (((327684.077 6919066.286 0.000...
1,10,1010,101015,ALGAE,BROWN,PELCA,Pelevetia canaliculata (sauetang),94,185,211,148,"POLYGON Z ((327921.227 6919074.341 0.000, 3279..."
2,10,1010,101017,ALGAE,BROWN,FUCSE,Fucus serratus (sagtang),193,161,100,4110,MULTIPOLYGON Z (((327788.619 6919071.256 0.000...
3,10,1010,101018,ALGAE,BROWN,ASCNO,Ascophyllum nodosum (grisetang),106,71,53,223666,MULTIPOLYGON Z (((327778.052 6919076.088 0.000...
4,10,1010,101020,ALGAE,BROWN,FUCVE,Fucus vesiculosus (blæretang),194,36,204,1675,MULTIPOLYGON Z (((327883.738 6919069.758 0.000...
