In [1]:
!pwd

/home/jupyter/vertexAI/2_mnist_project


In [90]:
!mkdir components/load_data/src
!mkdir components/train_linear/src
!mkdir components/train_cnn/src
!mkdir components/evaluate_models/src
!mkdir components/deploy_model/src

mkdir: cannot create directory ‘components/load_data/src’: File exists
mkdir: cannot create directory ‘components/train_linear/src’: File exists
mkdir: cannot create directory ‘components/train_cnn/src’: File exists
mkdir: cannot create directory ‘components/evaluate_models/src’: File exists
mkdir: cannot create directory ‘components/deploy_model/src’: File exists


<img src="demo.png" width="40%"/>

### 1. Component / load_data

In [57]:
%%writefile components/load_data/src/load_data.py
from kfp.v2.dsl import *

@component(
    packages_to_install=["tensorflow", "numpy"],
    output_component_file="load_data.yaml"
)
def load_data(
    dataset: Output[Dataset]
):
    import tensorflow as tf
    import numpy as np
    
    mnist = tf.keras.datasets.mnist
    (train_x, train_y), (test_x, test_y) = mnist.load_data()
    train_x = train_x / 255.0
    test_x = test_x / 255.0
    
    with open(dataset.path, "wb") as f:
        np.savez(
            f,
            train_x=train_x,
            train_y=train_y,
            test_x=test_x,
            test_y=test_y
        )
    print(f"Saved on : {dataset.path}")

Overwriting components/load_data/src/load_data.py


## 2. Component / train linear

In [58]:
%%writefile components/train_linear/src/train_linear.py
from kfp.v2.dsl import *

@component(
    packages_to_install=["tensorflow", "numpy"],
    output_component_file="train_linear.yaml"
)
def train_linear(
    dataset: Input[Dataset],
    output_model: Output[Model],
    metrics: Output[Metrics]
):
    import tensorflow as tf
    import numpy as np
    
    
    with open(dataset.path, "rb") as f:
        mnist = np.load(f)
        train_x, train_y = mnist["train_x"], mnist["train_y"]
        test_x, test_y = mnist["test_x"], mnist["test_y"]
    print(f"train x shape: {train_x.shape}")
    print(f"train y shape: {train_y.shape}")
    print(f"test x shape: {test_x.shape}")
    print(f"test y shape: {test_y.shape}")
    model = tf.keras.Sequential(
        [
            tf.keras.layers.Flatten(input_shape=(28, 28)),
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dense(10, activation='softmax')
        ]
    )
    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer='adam',
        metrics=['acc']
    )
    model.fit(train_x, train_y)
    loss, acc = model.evaluate(test_x, test_y)

    metrics.log_metric("accuracy",(acc * 100.0))
    metrics.log_metric("framework", "Tensorflow")
    metrics.log_metric("Model", "LinearModel")
    metrics.log_metric("dataset_size", len(train_x))
    
    model.save(output_model.path)

Overwriting components/train_linear/src/train_linear.py


## 3. Component / train cnn

In [59]:
%%writefile components/train_cnn/src/train_cnn.py
from kfp.v2.dsl import *

@component(
    packages_to_install=["tensorflow", "numpy"],
    output_component_file="train_cnn.yaml"
)
def train_cnn(
    dataset: Input[Dataset],
    output_model: Output[Model],
    metrics: Output[Metrics]
):
    import tensorflow as tf
    import numpy as np
    
    
    with open(dataset.path, "rb") as f:
        mnist = np.load(f)
        train_x, train_y = mnist["train_x"], mnist["train_y"]
        test_x, test_y = mnist["test_x"], mnist["test_y"]
    train_x = train_x.reshape(-1, 28, 28, 1)
    test_x = test_x.reshape(-1, 28, 28, 1)
    model = tf.keras.Sequential(
        [
            tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
            tf.keras.layers.MaxPooling2D((2, 2)),
            tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
            tf.keras.layers.MaxPooling2D((2, 2)),
            tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(10, activation='softmax')            
        ]
    )
    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer='adam',
        metrics=['acc']
    )
    model.fit(train_x, train_y)
    loss, acc = model.evaluate(test_x, test_y)

    metrics.log_metric("accuracy",(acc * 100.0))
    metrics.log_metric("framework", "Tensorflow")
    metrics.log_metric("Model", "CNNModel")
    metrics.log_metric("dataset_size", len(train_x))
    
    model.save(output_model.path)

Overwriting components/train_cnn/src/train_cnn.py


## 4. Component / Evalute models

In [110]:
%%writefile components/evaluate_models/src/evaluate_models.py
from kfp.v2.dsl import *

@component(
    packages_to_install=["tensorflow", "numpy"],
    output_component_file="evaluate_models.yaml"
)
def evaluate_models(
    dataset: Input[Dataset],
    model_linear: Input[Model],
    model_cnn: Input[Model],
    model_best: Output[Model]
):
    import tensorflow as tf
    import numpy as np
    from tensorflow.keras.models import clone_model
    
    with open(dataset.path, "rb") as f:
        mnist = np.load(f)
        train_x, train_y = mnist["train_x"], mnist["train_y"]
        test_x, test_y = mnist["test_x"], mnist["test_y"]
    
    # evaluate Linear Model
    linear_model = tf.keras.models.load_model(model_linear.path)
    linear_loss, linear_acc = linear_model.evaluate(test_x, test_y)
    
    # evaluate CNN Model
    test_x = test_x.reshape(-1, 28, 28, 1)
    cnn_model = tf.keras.models.load_model(model_cnn.path)
    cnn_loss, cnn_acc = cnn_model.evaluate(test_x, test_y)
    
    best_model = cnn_model if cnn_acc > linear_acc else linear_model
    best_model.save(model_best.path)
    

Overwriting components/evaluate_models/src/evaluate_models.py


## 5. Deploy Model on Vertex AI

In [111]:
%%writefile components/deploy_model/src/deploy_model.py
from kfp.v2.dsl import *

@component(
    packages_to_install=["google-cloud-aiplatform"],
    output_component_file="deploy_model.yaml"
)
def deploy_model(
    model: Input[Model],
    project: str,
    region: str,
    vertex_endpoint: Output[Artifact],
    vertex_model: Output[Model]
):
    from google.cloud import aiplatform

    aiplatform.init(project=project, location=region) # 1

    deployed_model = aiplatform.Model.upload( # 2
        display_name="simple-mnist-pipeline",
        artifact_uri = model.uri,
        serving_container_image_uri="us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-6:latest"
    )
    endpoint = deployed_model.deploy(machine_type="n1-standard-4") # 3

    # Save data to the output params
    vertex_endpoint.uri = endpoint.resource_name # 4
    vertex_model.uri = deployed_model.resource_name 

Overwriting components/deploy_model/src/deploy_model.py


### 100. define Pipeline

In [112]:
%%writefile pipelines/pipeline.py
import kfp
from kfp.v2 import dsl
from kfp import components

PROJECT_ID = "abstract-flame-330901"
REGION = "us-central1"

@dsl.pipeline(
    name="mnist-pipeline",
    description="mnist pipeline tutorial",
    pipeline_root="gs://suwan"
)
def pipeline(
    project: str = PROJECT_ID,
    region: str = REGION
):
    load_data = components.load_component_from_file("load_data.yaml")
    train_linear = components.load_component_from_file("train_linear.yaml")
    train_cnn = components.load_component_from_file("train_cnn.yaml")
    evaluate_models = components.load_component_from_file("evaluate_models.yaml")
    deploy_model = components.load_component_from_file("deploy_model.yaml")
    load_data_task = load_data()
    train_linear_task = train_linear(
        dataset=load_data_task.outputs["dataset"]
    )
    train_cnn_task = train_cnn(
        dataset=load_data_task.outputs["dataset"]
    )
    evaluate_models_task = evaluate_models(
        dataset=load_data_task.outputs["dataset"],
        model_linear=train_linear_task.outputs["output_model"],
        model_cnn=train_cnn_task.outputs["output_model"]
    )
    deploy_task = deploy_model(
        model=evaluate_models_task.outputs["model_best"],
        project=project,
        region=region
    )

client = kfp.Client("https://22767e8e71dc72a3-dot-us-central1.pipelines.googleusercontent.com/")

client.create_run_from_pipeline_func(
    pipeline,
    arguments={},
    mode=kfp.dsl.PipelineExecutionMode.V2_COMPATIBLE,
)    


Overwriting pipelines/pipeline.py


## 100.1 Run pipeline

In [113]:
!python components/load_data/src/load_data.py
!python components/train_linear/src/train_linear.py
!python components/train_cnn/src/train_cnn.py
!python components/evaluate_models/src/evaluate_models.py
!python components/deploy_model/src/deploy_model.py

In [114]:
!python pipelines/pipeline.py

