In [None]:
# Create a new conda env from environment.yml and run notebook in it
!conda env create -f ./environment.yml
!conda activate rtdefects

In [13]:
!pip list

Package                     Version
--------------------------- -----------
absl-py                     1.4.0
aiofiles                    22.1.0
aiohttp                     3.8.3
aiosignal                   1.2.0
aiosqlite                   0.18.0
albumentations              1.3.1
alembic                     1.11.2
anyio                       3.5.0
appdirs                     1.4.4
appnope                     0.1.2
argon2-cffi                 21.3.0
argon2-cffi-bindings        21.2.0
asciitree                   0.3.3
asttokens                   2.0.5
astunparse                  1.6.3
async-timeout               4.0.2
attrs                       22.1.0
Babel                       2.11.0
backcall                    0.2.0
bcrypt                      4.0.1
beartype                    0.12.0
beautifulsoup4              4.12.2
bleach                      4.1.0
blinker                     1.6.2
bokeh                       3.2.1
boto3                       1.28.22
b

In [1]:
# Python must be version 3.10.*
import sys
assert sys.version_info[0] == 3 and sys.version_info[1] == 10

In [2]:
import garden_ai
from garden_ai import step, GardenClient

import json
from typing import Optional, Tuple
import numpy as np
import pandas as pd
from datetime import datetime
from pathlib import Path
from hashlib import md5
from skimage import color, measure, morphology
from io import BytesIO
from time import perf_counter
from hyperspy import io as hsio
from scipy.stats import siegelslopes
from scipy.interpolate import interp1d
import imageio
import tensorflow as tf

2023-08-11 13:15:06.426295: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
client = GardenClient()

# First, we needed to register our pretrained ML model with the garden service.
# When we registered the pretrained model (model.pth copied in this directory) via `$ garden-ai model register ...`
# We were given this model name, which we can use to reference the model in a pipeline.
# All rtdefect models included in ./models directory
REGISTERED_MODEL_NAME = "maxtuecke@gmail.com/rtdefect-tf-model-seedling"

TEST_INPUT_PATH = "./data/input_image.tiff"
TEST_OUTPUT_PATH = "./data/tensorflow_output_mask.tiff"
TEST_OUTPUT_DEFECT_PATH = "./data/tensorflow_output_defect_results.json"
PIPELINE_DOI = "10.23677/kd6n-fk59"
GARDEN_DOI = "10.23677/c66j-tb82"
PIP_REQUIREMENTS = ["torchvision==0.14.1", "torch==1.13.1", "segmentation_models.pytorch==0.2.*", "pandas==2.0.3", "scikit-image==0.21.0", "chardet==5.2.0", "hyperspy==1.7.5", "werkzeug==2.2.3"]
CONDA_REQUIREMENTS = ["tensorflow>2"]

In [4]:
# Next, we define a our pipelines steps
# Decorate it with `@step` so that we can use it to build up a pipeline

@step
def preprocessing(
    input_data: np.ndarray,
) -> np.ndarray:
    import numpy as np
    from io import BytesIO
    import imageio
    from skimage import color, measure, morphology
    from typing import Optional, Tuple
    
    def encode_as_tiff(data: np.ndarray, compress_type: int = 5) -> bytes:
        # Convert mask to a uint8-compatible image
        data = np.squeeze(data)
        assert data.ndim == 2, "Image must be grayscale"
        assert np.logical_and(data >= 0, data <= 1).all(), "Image values must be between 0 and 1"
        data = np.array(data * 255, dtype=np.uint8)

        # Convert mask to a TIFF-encoded image
        output_img = BytesIO()
        writer = imageio.get_writer(output_img, format='tiff', mode='i')
        writer.append_data(data, meta={'compression': compress_type})
        return output_img.getvalue()
    
    
    #Encode image data as tiff
    encoded_image_data = encode_as_tiff(input_data, compress_type=5)

    # Load the TIFF file into a numpy array
    image_gray = imageio.imread(BytesIO(encoded_image_data))

    # Preprocess the image data
    image = color.gray2rgb(image_gray)  # Convert to RGB
    image = np.array(image, dtype=np.float32) / 255  # Convert to float32
    image =  np.expand_dims(image, axis=0)

    # Check the shape
    assert image.ndim == 4, "Expects a stack of images"
    assert image.shape[-1] == 3, "Expects 3 output channels"
    assert image.dtype == np.float32, "Expects np.float32"
    assert 0 <= np.min(image) and np.max(image) <= 1, "Image values should be in [0, 1]"
    
    return image

@step
def run_inference(
    input_data: np.ndarray,
    model=garden_ai.Model(REGISTERED_MODEL_NAME),  # loads the registered model by name, with a `.predict()` method
) -> np.ndarray:
    return model.predict(input_data)
    
@step
def postprocessing(input_data: np.ndarray) -> np.ndarray:
    import numpy as np
    from io import BytesIO
    import imageio
    from skimage import color, measure, morphology
    from typing import Optional, Tuple
    
    def encode_as_tiff(data: np.ndarray, compress_type: int = 5) -> bytes:
        # Convert mask to a uint8-compatible image
        data = np.squeeze(data)
        assert data.ndim == 2, "Image must be grayscale"
        assert np.logical_and(data >= 0, data <= 1).all(), "Image values must be between 0 and 1"
        data = np.array(data * 255, dtype=np.uint8)

        # Convert mask to a TIFF-encoded image
        output_img = BytesIO()
        writer = imageio.get_writer(output_img, format='tiff', mode='i')
        writer.append_data(data, meta={'compression': compress_type})
        return output_img.getvalue()

    def analyze_defects(mask: np.ndarray, min_size: int = 50) -> Tuple[dict, np.ndarray]:
        mask = morphology.remove_small_objects(mask, min_size=min_size)
        mask = morphology.remove_small_holes(mask, min_size)
        mask = morphology.binary_erosion(mask, morphology.square(1))
        output = {'void_frac': mask.sum() / (mask.shape[0] * mask.shape[1])}

        # Assign labels to the labeled regions
        labels = measure.label(mask)
        output['void_count'] = int(labels.max())

        # Compute region properties
        props = measure.regionprops(labels, mask)
        radii = [p['equivalent_diameter'] / 2 for p in props]
        output['radii'] = radii
        output['radii_average'] = np.average(radii)
        output['positions'] = [p['centroid'] for p in props]
        return output, labels
    
    
    # Make it into a bool array
    segment = np.squeeze(input_data)
    mask = segment > 0.9

    # Generate the analysis results
    defect_results, _ = analyze_defects(mask)  # Discard the labeled output

    # Convert mask to a TIFF-encoded image
    mask_data = encode_as_tiff(mask)
    
    output = {"mask" : mask_data, "defect_results" : defect_results}
    
    return output

In [5]:
# make a simple Pipeline using the steps we just defined
rtdefect_pipeline = client.create_pipeline(
    title="RT Defect Analysis TF Demo Pipeline",
    python_version=f"{sys.version_info[0]}.{sys.version_info[1]}.{sys.version_info[2]}",
    pip_dependencies=PIP_REQUIREMENTS,
    conda_dependencies=CONDA_REQUIREMENTS,
    steps=(preprocessing, run_inference, postprocessing),  # steps run in order, passing output to subsequent steps
    authors=[
        "Ward, Logan",
    ],
    contributors=["Tuecke, Max"],
    version="0.0.1",
    year=2023,
    tags=[],
    short_name="rtdefect_tf",
    doi=PIPELINE_DOI,
)

In [6]:
# now we need to register the pipeline for remote execution
# build a container with the pipeline's specific dependencies/python version
# then register the pipeline so that it will execute from that container
container_id = "cb99321a-7e27-4d13-b2ac-1855ce28e90d" # (reuse a known container uuid to skip slow build step)
#container_id = client.build_container(rtdefect_pipeline) # <-- to build a fresh container

client.register_pipeline(rtdefect_pipeline, container_id)
print(f"Registered pipeline '{rtdefect_pipeline.doi}'!")

Registered pipeline '10.23677/kd6n-fk59'!


In [7]:
# now that we've registered our pipeline, we can test its remote execution against some sample input
def load_rtdefects_input(path: Path) -> np.ndarray:
    # Step 1: attempt to read it with imageio
    load_functions = [
        imageio.imread,
        lambda x: hsio.load(x).data
    ]
    data = None
    for function in load_functions:
        try:
            data: np.ndarray = function(path)
        except Exception as e:
            continue
    if data is None:
        raise ValueError(f'Failed to load image from {path}')

    # Standardize the format
    data = np.array(data, dtype=np.float32)
    data = np.squeeze(data)
    if data.ndim == 3:
        data = color.rgb2gray(data)
    data = (data - data.min()) / (data.max() - data.min())
    return data

demo_input = load_rtdefects_input(TEST_INPUT_PATH)

  data: np.ndarray = function(path)
ERROR:hyperspy.io:If this file format is supported, please report this error to the HyperSpy developers.


In [8]:
# results we want to reproduce:
with open(TEST_OUTPUT_PATH, "rb") as img:
	expected_mask = img.read()
expected_defects = json.load(open(TEST_OUTPUT_DEFECT_PATH))

# to run remotely, use the client to fetch our newly registered pipeline --
# note that our pipeline is only available to us at this point;
# it can't be discovered/used by others until it's part of a published Garden
rtdefect_remote = client.get_registered_pipeline(PIPELINE_DOI)

results = rtdefect_remote(
    demo_input,
    endpoint="6d39d01e-2955-47b9-a1f6-50f147e650d6",  # execute on Globus Compute endpoint of choice
)

assert results["mask"] == expected_mask
assert json.loads(json.dumps(results["defect_results"])) == expected_defects #json turns tuples to list, make result same format as expected

print("Done! Pipeline executed with correct results.")

Done! Pipeline executed with correct results.


In [9]:
# now that we've sanity-checked the pipeline's remote execution, we can publish it as part of a Garden:
rtdefect_garden_tf = client.create_garden(
    title="RT Defect Analysis TF Demo Garden",
    authors=["Max Tuecke"],
    description="Recreates the RT Defect Analysis tensorflow model from https://github.com/ivem-argonne/real-time-defect-analysis/tree/main",
    doi=GARDEN_DOI,
)
# include the pipeline by just its DOI:
rtdefect_garden_tf.pipeline_ids += [PIPELINE_DOI]

In [10]:
# Finally, publish our new garden, making it (and its pipeline) discoverable by other garden users
# (see example discovery/execution flow of this garden and pipeline in rtdefect_garden_remote_inference.ipynb)
client.publish_garden_metadata(rtdefect_garden_tf)

In [11]:
# Search for the new published garden
!garden-ai garden search --title="RT Defect Analysis TF Demo Garden"

[1m{[0m
  [1;34m"gmeta"[0m: [1m[[0m
    [1m{[0m
      [1;34m"@datatype"[0m: [32m"GMetaResult"[0m,
      [1;34m"entries"[0m: [1m[[0m
        [1m{[0m
          [1;34m"content"[0m: [1m{[0m
            [1;34m"pipeline_aliases"[0m: [1m{[0m[1m}[0m,
            [1;34m"year"[0m: [32m"2023"[0m,
            [1;34m"description"[0m: [32m"Recreates the RT Defect Analysis tensorflow model from https://github.com/ivem-argonne/real-time-defect-analysis/tree/main"[0m,
            [1;34m"language"[0m: [32m"en"[0m,
            [1;34m"title"[0m: [32m"RT Defect Analysis TF Demo Garden"[0m,
            [1;34m"version"[0m: [32m"0.0.1"[0m,
            [1;34m"tags"[0m: [1m[[0m[1m][0m,
            [1;34m"pipelines"[0m: [1m[[0m
              [1m{[0m
                [1;34m"models"[0m: [1m[[0m
                  [1m{[0m
                    [1;34m"flavor"[0m: [32m"tensorflow"[0m,
                    [1;34m"user_email"[0m: [32

In [12]:
# Sanity check: get and run the new published garden
rtdefects_garden_published = client.get_published_garden(GARDEN_DOI)
results = rtdefects_garden_published.rtdefect_tf(demo_input, endpoint="6d39d01e-2955-47b9-a1f6-50f147e650d6")
print(results["defect_results"])

{'void_frac': 0.0023212432861328125, 'void_count': 7, 'radii': [10.940041919714261, 4.442433223290478, 10.704744696916627, 7.998767850296815, 5.352372348458314, 14.439285835884782, 14.820047957642227], 'radii_average': 9.813956261743357, 'positions': [(120.55319148936171, 259.25531914893617), (312.98387096774195, 259.11290322580646), (589.8416666666667, 932.0722222222222), (661.0995024875622, 1017.3781094527363), (856.2444444444444, 865.1), (953.7862595419847, 682.2290076335878), (1002.4869565217391, 555.6579710144928)]}
