In [1]:
from pathlib import Path
from typing import Optional, Dict, Any, Union, Tuple, List, Literal
import sys, re, json

import numpy as np
import pandas as pd
import h5py as h5
from imageio.v3 import imread

In [2]:
anno = pd.read_csv("Annotation.csv")
anno

Unnamed: 0,ID,StructureID,Description
0,OB,507,Main olfactory bulb
1,MOs,993,Secondary motor area
2,MOp,985,Primary motor area
3,SSp_m,345,"Primary somatosensory area, mouth"
4,SSp_ul,369,"Primary somatosensory area, upper limb"
5,SSp_ll,337,"Primary somatosensory area, lower limb"
6,SSp_n,353,"Primary somatosensory area, nose"
7,SSp_un,182305689,"Primary somatosensory area, unassigned"
8,SSp_tr,361,"Primary somatosensory area, trunk"
9,SSp_bfd,329,"Primary somatosensory area, barrel field"


In [13]:
SIDES = ('Left', 'Right')
Side = Literal['Left', 'Right']

def get_roi_info(row: pd.Series, side: Side = 'Left', root: Optional[Path] = None) -> Dict[str, Any]:
    if root is None:
        root = Path()
    imgfile = root / side / f"Mask_{row.ID}.png"
    assert imgfile.exists()
    img = imread(str(imgfile))
    if np.ndim(img) == 3:
        img = img.mean(2) > 0
    return {
        'name': row.ID,
        'side': side.lower(),
        'AllenID': row.StructureID,
        'description': row.Description,
        'mask': img,
    }

In [14]:
rois = []
sides = ('Left', 'Right')
for idx, (_, row) in enumerate(anno.iterrows(), start=1):
    for side in sides:
        rois.append(get_roi_info(row, side))

In [16]:
len(rois)

44

In [21]:
maskfile = "reference_masks.h5"
saveopts = dict(compression=9, chunks=(512, 512))
with h5.File(maskfile, 'w') as out:
    masks = out.create_group('masks')
    for idx, roi in enumerate(rois, start=1):
        entry = masks.create_dataset(f"{idx:03d}", data=roi['mask'], **saveopts)
        for key, value in roi.items():
            if key != 'mask':
                entry.attrs[key] = value