In [None]:
# Create a new conda env from environment.yml and run notebook in it
!conda env create -f ./environment.yml
!conda activate rtdefects

In [23]:
!pip list

Package                     Version
--------------------------- -----------
absl-py                     1.4.0
aiofiles                    22.1.0
aiohttp                     3.8.3
aiosignal                   1.2.0
aiosqlite                   0.18.0
albumentations              1.3.1
alembic                     1.11.2
anyio                       3.5.0
appdirs                     1.4.4
appnope                     0.1.2
argon2-cffi                 21.3.0
argon2-cffi-bindings        21.2.0
asciitree                   0.3.3
asttokens                   2.0.5
astunparse                  1.6.3
async-timeout               4.0.2
attrs                       22.1.0
Babel                       2.11.0
backcall                    0.2.0
bcrypt                      4.0.1
beartype                    0.12.0
beautifulsoup4              4.12.2
bleach                      4.1.0
blinker                     1.6.2
bokeh                       3.2.1
boto3                       1.28.22
botocore                    1

tensorflow-estimator        2.12.0
termcolor                   2.1.0
terminado                   0.17.1
testpath                    0.6.0
texttable                   1.6.7
threadpoolctl               3.2.0
tifffile                    2023.7.18
timm                        0.4.12
tomli                       2.0.1
tomlkit                     0.12.1
toolz                       0.12.0
torch                       2.0.1
torchvision                 0.15.2
tornado                     6.3.2
tqdm                        4.66.0
trackpy                     0.5.0
traitlets                   5.7.1
traits                      6.4.2
trove-classifiers           2023.7.6
typeguard                   2.13.3
typer                       0.7.0
types-paramiko              3.3.0.0
types-requests              2.31.0.2
types-six                   1.16.21.9
types-tabulate              0.9.0.3
types-urllib3               1.26.25.14
typing_extensions           4.7.1
tzdata                  

In [1]:
# Python must be version 3.10.*
import sys
assert sys.version_info[0] == 3 and sys.version_info[1] == 10

In [2]:
import garden_ai
from garden_ai import step, GardenClient

import json
from typing import Optional, Tuple
import numpy as np
import pandas as pd
from datetime import datetime
from pathlib import Path
from hashlib import md5
from skimage import color, measure, morphology
from io import BytesIO
from time import perf_counter
from hyperspy import io as hsio
from scipy.stats import siegelslopes
from scipy.interpolate import interp1d
import albumentations as albu
import imageio.v2 as imageio
import torch

In [3]:
client = GardenClient()

# First, we needed to register our pretrained ML model with the garden service.
# When we registered the pretrained model (model.pth copied in this directory) via `$ garden-ai model register ...`
# We were given this model name, which we can use to reference the model in a pipeline.
# All rtdefect models included in ./models directory
REGISTERED_MODEL_NAME_1 = "maxtuecke@gmail.com/rtdefect-torch-model-1-seedling" #small_voids_031023.pth
REGISTERED_MODEL_NAME_2 = "maxtuecke@gmail.com/rtdefect-torch-model-2-seedling" #voids_segmentation_091321.pth
REGISTERED_MODEL_NAME_3 = "maxtuecke@gmail.com/rtdefect-torch-model-3-seedling" #voids_segmentation_030323.pth

TEST_INPUT_PATH = "./data/input_image.tiff"
TEST_OUTPUT_MASK_PATH_1 = "./data/torch_1_output_mask.tiff"
TEST_OUTPUT_MASK_PATH_2 = "./data/torch_2_output_mask.tiff"
TEST_OUTPUT_MASK_PATH_3 = "./data/torch_3_output_mask.tiff"
TEST_OUTPUT_DEFECT_PATH_1 = "./data/torch_1_output_defect_results.json"
TEST_OUTPUT_DEFECT_PATH_2 = "./data/torch_2_output_defect_results.json"
TEST_OUTPUT_DEFECT_PATH_3 = "./data/torch_3_output_defect_results.json"
PIPELINE_DOI_1 = "10.23677/b246-hj14"
PIPELINE_DOI_2 = "10.23677/xn48-pr25"
PIPELINE_DOI_3 = "10.23677/5jzj-0j60"
GARDEN_DOI = "10.23677/nzhf-rq49"
PIP_REQUIREMENTS = ["torchvision==0.15.2", "torch==2.0.1", "segmentation_models.pytorch==0.2.*", "pandas==2.0.3", "scikit-image==0.21.0", "chardet==5.2.0", "hyperspy==1.7.5", "werkzeug==2.2.3", "albumentations==1.3.1"]
CONDA_REQUIREMENTS = ["tensorflow>2", "nomkl"]

In [4]:
# Next, we define a our pipelines steps
# Decorate it with `@step` so that we can use it to build up a pipeline

@step
def preprocessing_all(
    input_data: np.ndarray,
) -> np.ndarray:
    from typing import Optional, Tuple
    from io import BytesIO
    from skimage import color, measure, morphology
    from skimage.transform import resize
    import numpy as np
    import imageio.v2 as imageio

    def encode_as_tiff(data: np.ndarray, compress_type: int = 5) -> bytes:
        # Convert mask to a uint8-compatible image
        data = np.squeeze(data)
        assert data.ndim == 2, "Image must be grayscale"
        assert np.logical_and(data >= 0, data <= 1).all(), "Image values must be between 0 and 1"
        data = np.array(data * 255, dtype=np.uint8)

        # Convert mask to a TIFF-encoded image
        output_img = BytesIO()
        writer = imageio.get_writer(output_img, format='tiff', mode='i')
        writer.append_data(data, meta={'compression': compress_type})
        return output_img.getvalue()
    
    #Encode image data as tiff
    encoded_image_data = encode_as_tiff(input_data, compress_type=5)

    # Load the TIFF file into a numpy array
    image_gray = imageio.imread(BytesIO(encoded_image_data))

    # Convert to RGB
    image: np.ndarray = color.gray2rgb(image_gray)

    # Scale to 1024x1024
    if image.shape[:2] != (1024, 1024):
        image = resize(image, output_shape=(1024, 1024), anti_aliasing=True)

    return image

@step
def preprocessing_torch_1(
    input_data: np.ndarray,
) -> torch.Tensor:
    from typing import Optional, Tuple
    import numpy as np
    import segmentation_models_pytorch as smp
    import albumentations as albu
    import torch
    
    MODEL_NAME = "small_voids_031023.pth"
    
    # Define the conversion from image to inputs
    def to_tensor(x: np.ndarray, **kwargs):
        return x.transpose(2, 0, 1).astype('float32')

    _encoders = {
        'voids_segmentation_091321.pth': 'se_resnext50_32x4d',
        'voids_segmentation_030323.pth': 'efficientnet-b3',
        'small_voids_031023.pth': 'se_resnext50_32x4d',
    }
    preprocessing_fn = smp.encoders.get_preprocessing_fn(_encoders[MODEL_NAME])
    
    _transform = [
        albu.Lambda(image=preprocessing_fn),
        albu.Lambda(image=to_tensor),
    ]
    preprocess = albu.Compose(_transform)

    # Perform the preprocessing
    image = preprocess(image=input_data)

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    return torch.from_numpy(image['image']).to(device).unsqueeze(0)

@step
def preprocessing_torch_2(
    input_data: np.ndarray,
) -> torch.Tensor:
    from typing import Optional, Tuple
    import numpy as np
    import segmentation_models_pytorch as smp
    import albumentations as albu
    import torch
    
    MODEL_NAME = "voids_segmentation_091321.pth"
    
    # Define the conversion from image to inputs
    def to_tensor(x: np.ndarray, **kwargs):
        return x.transpose(2, 0, 1).astype('float32')

    _encoders = {
        'voids_segmentation_091321.pth': 'se_resnext50_32x4d',
        'voids_segmentation_030323.pth': 'efficientnet-b3',
        'small_voids_031023.pth': 'se_resnext50_32x4d',
    }
    preprocessing_fn = smp.encoders.get_preprocessing_fn(_encoders[MODEL_NAME])
    
    _transform = [
        albu.Lambda(image=preprocessing_fn),
        albu.Lambda(image=to_tensor),
    ]
    preprocess = albu.Compose(_transform)

    # Perform the preprocessing
    image = preprocess(image=input_data)

    device = 'cuda' if torch.cuda.is_available() else 'cpu' 
    return torch.from_numpy(image['image']).to(device).unsqueeze(0)

@step
def preprocessing_torch_3(
    input_data: np.ndarray,
) -> torch.Tensor:
    from typing import Optional, Tuple
    import numpy as np
    import segmentation_models_pytorch as smp
    import albumentations as albu
    import torch
    
    MODEL_NAME = "voids_segmentation_030323.pth"
    
    # Define the conversion from image to inputs
    def to_tensor(x: np.ndarray, **kwargs):
        return x.transpose(2, 0, 1).astype('float32')

    _encoders = {
        'voids_segmentation_091321.pth': 'se_resnext50_32x4d',
        'voids_segmentation_030323.pth': 'efficientnet-b3',
        'small_voids_031023.pth': 'se_resnext50_32x4d',
    }
    preprocessing_fn = smp.encoders.get_preprocessing_fn(_encoders[MODEL_NAME])
    
    _transform = [
        albu.Lambda(image=preprocessing_fn),
        albu.Lambda(image=to_tensor),
    ]
    preprocess = albu.Compose(_transform)

    # Perform the preprocessing
    image = preprocess(image=input_data)

    device = 'cuda' if torch.cuda.is_available() else 'cpu' 
    return torch.from_numpy(image['image']).to(device).unsqueeze(0)

@step
def run_inference_model_1(
    input_data: torch.Tensor,
    model=garden_ai.Model(REGISTERED_MODEL_NAME_1),  # loads the registered model by name, with a `.predict()` method
) -> torch.Tensor:
    return model.predict(input_data)

@step
def run_inference_model_2(
    input_data: torch.Tensor,
    model=garden_ai.Model(REGISTERED_MODEL_NAME_2),  # loads the registered model by name, with a `.predict()` method
) -> torch.Tensor:
    return model.predict(input_data)

@step
def run_inference_model_3(
    input_data: torch.Tensor,
    model=garden_ai.Model(REGISTERED_MODEL_NAME_3),  # loads the registered model by name, with a `.predict()` method
) -> torch.Tensor:
    return model.predict(input_data)
    
@step
def postprocessing_all(input_data: torch.Tensor) -> np.ndarray:
    from typing import Optional, Tuple
    import numpy as np
    from io import BytesIO
    from skimage import color, measure, morphology
    import segmentation_models_pytorch as smp
    import albumentations as albu
    import imageio.v2 as imageio
    import torch

    def encode_as_tiff(data: np.ndarray, compress_type: int = 5) -> bytes:
        # Convert mask to a uint8-compatible image
        data = np.squeeze(data)
        assert data.ndim == 2, "Image must be grayscale"
        assert np.logical_and(data >= 0, data <= 1).all(), "Image values must be between 0 and 1"
        data = np.array(data * 255, dtype=np.uint8)

        # Convert mask to a TIFF-encoded image
        output_img = BytesIO()
        writer = imageio.get_writer(output_img, format='tiff', mode='i')
        writer.append_data(data, meta={'compression': compress_type})
        return output_img.getvalue()

    def analyze_defects(mask: np.ndarray, min_size: int = 50) -> Tuple[dict, np.ndarray]:
        mask = morphology.remove_small_objects(mask, min_size=min_size)
        mask = morphology.remove_small_holes(mask, min_size)
        mask = morphology.binary_erosion(mask, morphology.square(1))
        output = {'void_frac': mask.sum() / (mask.shape[0] * mask.shape[1])}

        # Assign labels to the labeled regions
        labels = measure.label(mask)
        output['void_count'] = int(labels.max())

        # Compute region properties
        props = measure.regionprops(labels, mask)
        radii = [p['equivalent_diameter'] / 2 for p in props]
        output['radii'] = radii
        output['radii_average'] = np.average(radii)
        output['positions'] = [p['centroid'] for p in props]
        return output, labels
    
    input_data_numpy = input_data.squeeze().cpu().detach().numpy()

    # Make it into a bool array
    segment = np.squeeze(input_data_numpy)
    mask = segment > 0.9

    # Generate the analysis results
    defect_results, _ = analyze_defects(mask)  # Discard the labeled output

    # Convert mask to a TIFF-encoded image
    mask_data = encode_as_tiff(mask)
    
    output = {"mask" : mask_data, "defect_results" : defect_results}
    
    return output

In [5]:
# make a simple Pipeline using the steps we just defined
rtdefect_pipeline_1 = client.create_pipeline(
    title="RT Defect Analysis Torch 1 Demo Pipeline",
    python_version=f"{sys.version_info[0]}.{sys.version_info[1]}.{sys.version_info[2]}",
    pip_dependencies=PIP_REQUIREMENTS,
    conda_dependencies=CONDA_REQUIREMENTS,
    steps=(preprocessing_all, preprocessing_torch_1, run_inference_model_1, postprocessing_all),  # steps run in order, passing output to subsequent steps
    authors=[
        "Ward, Logan",
    ],
    contributors=["Tuecke, Max"],
    version="0.0.1",
    year=2023,
    tags=[],
    short_name="rtdefect_torch_1",
    doi=PIPELINE_DOI_1,
)


rtdefect_pipeline_2 = client.create_pipeline(
    title="RT Defect Analysis Torch 2 Demo Pipeline",
    python_version=f"{sys.version_info[0]}.{sys.version_info[1]}.{sys.version_info[2]}",
    pip_dependencies=PIP_REQUIREMENTS,
    conda_dependencies=CONDA_REQUIREMENTS,
    steps=(preprocessing_all, preprocessing_torch_2, run_inference_model_2, postprocessing_all),  # steps run in order, passing output to subsequent steps
    authors=[
        "Ward, Logan",
    ],
    contributors=["Tuecke, Max"],
    version="0.0.1",
    year=2023,
    tags=[],
    short_name="rtdefect_torch_2",
    doi=PIPELINE_DOI_2,
)

rtdefect_pipeline_3 = client.create_pipeline(
    title="RT Defect Analysis Torch 3 Demo Pipeline",
    python_version=f"{sys.version_info[0]}.{sys.version_info[1]}.{sys.version_info[2]}",
    pip_dependencies=PIP_REQUIREMENTS,
    conda_dependencies=CONDA_REQUIREMENTS,
    steps=(preprocessing_all, preprocessing_torch_3, run_inference_model_3, postprocessing_all),  # steps run in order, passing output to subsequent steps
    authors=[
        "Ward, Logan",
    ],
    contributors=["Tuecke, Max"],
    version="0.0.1",
    year=2023,
    tags=[],
    short_name="rtdefect_torch_3",
    doi=PIPELINE_DOI_3,
)

print("Created pipelines")

Created pipelines


In [6]:
# now we need to register the pipeline for remote execution
# build a container with the pipeline's specific dependencies/python version
# then register the pipeline so that it will execute from that container
container_id = "d81c62ed-9df6-4ecf-a5a3-4e7e666c2917" # (reuse a known container uuid to skip slow build step)
#container_id = client.build_container(rtdefect_pipeline_1) # <-- to build a fresh container
print(f"Container ID: {container_id}")

client.register_pipeline(rtdefect_pipeline_1, container_id)
print(f"Registered pipeline '{rtdefect_pipeline_1.doi}'!")

client.register_pipeline(rtdefect_pipeline_2, container_id)
print(f"Registered pipeline '{rtdefect_pipeline_2.doi}'!")

client.register_pipeline(rtdefect_pipeline_3, container_id)
print(f"Registered pipeline '{rtdefect_pipeline_3.doi}'!")


Container ID: d81c62ed-9df6-4ecf-a5a3-4e7e666c2917
Registered pipeline '10.23677/b246-hj14'!
Registered pipeline '10.23677/xn48-pr25'!
Registered pipeline '10.23677/5jzj-0j60'!


In [7]:
# now that we've registered our pipeline, we can test its remote execution against some sample input
def load_rtdefects_input(path: Path) -> np.ndarray:
    # Step 1: attempt to read it with imageio
    load_functions = [
        imageio.imread,
        lambda x: hsio.load(x).data
    ]
    data = None
    for function in load_functions:
        try:
            data: np.ndarray = function(path)
        except Exception as e:
            continue
    if data is None:
        raise ValueError(f'Failed to load image from {path}')

    # Standardize the format
    data = np.array(data, dtype=np.float32)
    data = np.squeeze(data)
    if data.ndim == 3:
        data = color.rgb2gray(data)
    data = (data - data.min()) / (data.max() - data.min())
    return data

demo_input = load_rtdefects_input(TEST_INPUT_PATH)

ERROR:hyperspy.io:If this file format is supported, please report this error to the HyperSpy developers.


In [15]:
# results we want to reproduce:
with open(TEST_OUTPUT_MASK_PATH_1, "rb") as img:
	expected_mask_1 = img.read()
expected_defects_1 = json.load(open(TEST_OUTPUT_DEFECT_PATH_1))

with open(TEST_OUTPUT_MASK_PATH_2, "rb") as img:
	expected_mask_2 = img.read()
expected_defects_2 = json.load(open(TEST_OUTPUT_DEFECT_PATH_2))

with open(TEST_OUTPUT_MASK_PATH_3, "rb") as img:
	expected_mask_3 = img.read()
expected_defects_3 = json.load(open(TEST_OUTPUT_DEFECT_PATH_3))


# to run remotely, use the client to fetch our newly registered pipeline --
# note that our pipeline is only available to us at this point;
# it can't be discovered/used by others until it's part of a published Garden
print("Starting pipeline 1 remote execution.")
rtdefect_remote_1 = client.get_registered_pipeline(PIPELINE_DOI_1)
results_1 = rtdefect_remote_1(
    demo_input,
    endpoint="6d39d01e-2955-47b9-a1f6-50f147e650d6",  # execute on Globus Compute endpoint of choice
)
assert results_1["mask"] == expected_mask_1
assert json.loads(json.dumps(results_1["defect_results"])) == expected_defects_1


print("Starting pipeline 2 remote execution.")
rtdefect_remote_2 = client.get_registered_pipeline(PIPELINE_DOI_2)
results_2 = rtdefect_remote_2(
    demo_input,
    endpoint="6d39d01e-2955-47b9-a1f6-50f147e650d6",  # execute on Globus Compute endpoint of choice
)
assert results_2["mask"] == expected_mask_2
assert json.loads(json.dumps(results_2["defect_results"])) == expected_defects_2


print("Starting pipeline 3 remote execution.")
rtdefect_remote_3 = client.get_registered_pipeline(PIPELINE_DOI_3)
results_3 = rtdefect_remote_3(
    demo_input,
    endpoint="6d39d01e-2955-47b9-a1f6-50f147e650d6",  # execute on Globus Compute endpoint of choice
)
assert results_3["mask"] == expected_mask_3
assert json.loads(json.dumps(results_3["defect_results"])) == expected_defects_3


print("Done! All pipelines executed with correct results.")

Starting pipeline 1 remote execution.


Starting pipeline 2 remote execution.


Starting pipeline 3 remote execution.


Done! All pipelines executed with correct results.


In [16]:
# now that we've sanity-checked the pipeline's remote execution, we can publish it as part of a Garden:
rtdefect_garden_torch = client.create_garden(
    title="RT Defect Analysis Torch Demo Garden",
    authors=["Max Tuecke"],
    description="Recreates the RT Defect Analysis pytorch model from https://github.com/ivem-argonne/real-time-defect-analysis/tree/main",
    doi=GARDEN_DOI,
)
# include the pipeline by just its DOI:
rtdefect_garden_torch.pipeline_ids += [PIPELINE_DOI_1]
rtdefect_garden_torch.pipeline_ids += [PIPELINE_DOI_2]
rtdefect_garden_torch.pipeline_ids += [PIPELINE_DOI_3]

In [17]:
# Finally, publish our new garden, making it (and its pipeline) discoverable by other garden users
# (see example discovery/execution flow of this garden and pipeline in rtdefect_garden_remote_inference.ipynb)
client.publish_garden_metadata(rtdefect_garden_torch)

In [19]:
# Search for the new published garden
!garden-ai garden search --title="RT Defect Analysis Torch Demo Garden"

[1m{[0m
  [1;34m"gmeta"[0m: [1m[[0m
    [1m{[0m
      [1;34m"@datatype"[0m: [32m"GMetaResult"[0m,
      [1;34m"entries"[0m: [1m[[0m
        [1m{[0m
          [1;34m"content"[0m: [1m{[0m
            [1;34m"pipeline_aliases"[0m: [1m{[0m[1m}[0m,
            [1;34m"year"[0m: [32m"2023"[0m,
            [1;34m"description"[0m: [32m"Recreates the RT Defect Analysis pytorch model from https://github.com/ivem-argonne/real-time-defect-analysis/tree/main"[0m,
            [1;34m"language"[0m: [32m"en"[0m,
            [1;34m"title"[0m: [32m"RT Defect Analysis Torch Demo Garden"[0m,
            [1;34m"version"[0m: [32m"0.0.1"[0m,
            [1;34m"tags"[0m: [1m[[0m[1m][0m,
            [1;34m"pipelines"[0m: [1m[[0m
              [1m{[0m
                [1;34m"models"[0m: [1m[[0m
                  [1m{[0m
                    [1;34m"flavor"[0m: [32m"pytorch"[0m,
                    [1;34m"user_email"[0m: [32m"m

In [22]:
# Sanity check: get and run the new published garden
rtdefects_garden_published = client.get_published_garden(GARDEN_DOI)
results = rtdefects_garden_published.rtdefect_torch_1(demo_input, endpoint="6d39d01e-2955-47b9-a1f6-50f147e650d6")
print(results["defect_results"])

{'void_frac': 0.00034999847412109375, 'void_count': 6, 'radii': [5.322553886092406, 3.989422804014327, 4.1073621666150775, 4.029119531035698, 4.259537945889915, 4.61809077155419], 'radii_average': 4.387681184200269, 'positions': [(219.5505617977528, 711.0224719101124), (312.46, 249.16), (409.3207547169811, 441.64150943396226), (513.6862745098039, 205.58823529411765), (628.7719298245614, 150.03508771929825), (700.8955223880597, 1017.8955223880597)]}
