# MNIST Train Implements

- 1. Load Data Component
- 2. Proc Data Component
- 3. Train CNN Component
- 4. Train Linear Component
- 5. Evaluate Component

<br/>

- without Metrics
- without Deploy

<img src="demo.png" width="50%"/>

## Version Check

In [1]:
import kfp
import google_cloud_pipeline_components
import tensorflow as tf

In [2]:
kfp.__version__, google_cloud_pipeline_components.__version__, tf.__version__

('1.8.7', '0.1.9', '2.6.0')

## Import Library

In [1]:
import kfp
import kfp.dsl as dsl
from kfp.v2.dsl import component, Input, Output, OutputPath, Dataset, Model, InputPath

from kfp.v2 import compiler
from kfp.v2.google.client import AIPlatformClient
from google.cloud.aiplatform import pipeline_jobs
from datetime import datetime

### 1. Load Data Component

In [111]:
@component(
    packages_to_install=["tensorflow", "numpy"]
)
def load_data(
    dataset: Output[Dataset]
):
    import tensorflow as tf
    import numpy as np
    
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
    
    with open(dataset.path, "wb") as f:
        np.savez(
            f,
            x_train=x_train,
            y_train=y_train,
            x_test=x_test,
            y_test=y_test
        )
    print(f"Saved On : {dataset.path}")
    

### 2. Preprocess Data Component

In [112]:
@component(
    packages_to_install=["tensorflow", "numpy"]
)
def proc_data(
    dataset_input: Input[Dataset],
    dataset_output: Output[Dataset]
):
    import tensorflow as tf
    import numpy as np
    
    print(f"Load on : {dataset_input.path}")
    with open(dataset_input.path, "rb") as f:
        dataset = np.load(f)
        x_train, y_train = dataset["x_train"], dataset["y_train"]
        x_test, y_test = dataset["x_test"], dataset["y_test"]
    
    x_train = x_train / 255.0
    x_test = x_test / 255.0
    
    print("train x shape: ", x_train.shape)
    print("test x shape: ", x_test.shape)
    
    with open(dataset_output.path, "wb") as f:
        np.savez(
            f,
            x_train=x_train,
            y_train=y_train,
            x_test=x_test,
            y_test=y_test
        )
    print(f"Saved On : {dataset_output.path}")

### 3. Train Linear Model

In [113]:
@component(
    packages_to_install=["tensorflow", "numpy"]
)
def train_linear(
    dataset_input: Input[Dataset],
    mnist_model: Output[Model]
):
    import tensorflow as tf
    import numpy as np
    
    print(f"Load on : {dataset_input.path}")
    with open(dataset_input.path, "rb") as f:
        dataset = np.load(f)
        x_train, y_train = dataset["x_train"], dataset["y_train"]
        x_test, y_test = dataset["x_test"], dataset["y_test"]
    
    print("train x shape: ", x_train.shape)
    print("test x shape: ", x_test.shape)
    
    model = tf.keras.Sequential(
        [
            tf.keras.layers.Flatten(input_shape=(28, 28)),
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dense(10, activation='softmax')
        ]
    )
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    history = model.fit(x_train, y_train, epochs=2)
    train_acc = history.history['accuracy'][-1]
    mnist_model.metadata['train accuracy'] = train_acc
        
    model.save(mnist_model.path)
    
    print(f"Model saved On : {mnist_model.path}")

### 3.1 Train CNN Model

In [114]:
@component(
    packages_to_install=["tensorflow", "numpy"]
)
def train_cnn(
    dataset_input: Input[Dataset],
    mnist_model: Output[Model]
):
    import tensorflow as tf
    import numpy as np
    
    print(f"Load on : {dataset_input.path}")
    with open(dataset_input.path, "rb") as f:
        dataset = np.load(f)
        x_train, y_train = dataset["x_train"], dataset["y_train"]
        x_test, y_test = dataset["x_test"], dataset["y_test"]
    
    print("train x shape: ", x_train.shape)
    print("test x shape: ", x_test.shape)
    
    x_train = x_train.reshape(-1, 28, 28, 1)
    x_test = x_test.reshape(-1, 28, 28, 1)
    
    model = tf.keras.Sequential(
        [
            tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
            tf.keras.layers.MaxPooling2D((2, 2)),
            tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
            tf.keras.layers.MaxPooling2D((2, 2)),
            tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(10, activation='softmax')
        ]
    )
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    history = model.fit(x_train, y_train, epochs=2)
    train_acc = history.history['accuracy'][-1]
    mnist_model.metadata['train accuracy'] = train_acc
    
    model.save(mnist_model.path)
    
    print(f"Model saved On : {mnist_model.path}")

### 4. Evaluate Models

In [115]:
@component(
    packages_to_install=["tensorflow", "numpy"]
)
def evaluate_models(
    dataset_input: Input[Dataset],
    mnist_linear_model: Input[Model],
    mnist_cnn_model: Input[Model]
):
    import tensorflow as tf
    import numpy as np
    
    with open(dataset_input.path, "rb") as f:
        dataset = np.load(f)
        x_test, y_test = dataset["x_test"], dataset["y_test"]
    
    model_linear = tf.keras.models.load_model(mnist_linear_model.path)
    model_cnn = tf.keras.models.load_model(mnist_cnn_model.path)

    # Evalute Linear Model
    loss, acc = model_linear.evaluate(x_test, y_test)
    mnist_linear_model.metadata['loss'] = loss
    mnist_linear_model.metadata['acc'] = acc
    
    # Evalute CNN Model
    x_test = x_test.reshape(-1, 28, 28, 1)
    loss, acc = model_cnn.evaluate(x_test, y_test)
    mnist_cnn_model.metadata['loss'] = loss
    mnist_cnn_model.metadata['acc'] = acc

### 100. Define Pipeline

In [116]:
from google_cloud_pipeline_components import aiplatform as gcc_aip

@dsl.pipeline(
    name='mnist-pipeline',
    description='An example pipeline that performs load mnist data.',
    pipeline_root='gs://suwan_test/'
)
def mnist_pipeline():
    load_data_task = load_data()
    
    print(load_data_task.output)
    
    proc_data_task = proc_data(
        dataset_input=load_data_task.output
    )
    train_linear_task = train_linear(
        dataset_input=proc_data_task.output
    )
    train_cnn_task = train_cnn(
        dataset_input=proc_data_task.output
    )
    evaluate_models_task = evaluate_models(
        dataset_input=proc_data_task.output,
        mnist_linear_model=train_linear_task.output,
        mnist_cnn_model=train_cnn_task.output,
    )

### 101. Run 

In [117]:
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

compiler.Compiler().compile(
    pipeline_func=mnist_pipeline,
    package_path="mnist_pipeline.json"
)

job = pipeline_jobs.PipelineJob(
    display_name="mnist_pipeline",
    template_path="mnist_pipeline.json",
    job_id=f"mnist-pipeline-{TIMESTAMP}",
    enable_caching=True,
)

{{pipelineparam:op=load-data;name=dataset}}


In [118]:
job.run()

INFO:google.cloud.aiplatform.pipeline_jobs:Creating PipelineJob
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob created. Resource name: projects/697793444829/locations/us-central1/pipelineJobs/mnist-pipeline-20211031081001
INFO:google.cloud.aiplatform.pipeline_jobs:To use this PipelineJob in another session:
INFO:google.cloud.aiplatform.pipeline_jobs:pipeline_job = aiplatform.PipelineJob.get('projects/697793444829/locations/us-central1/pipelineJobs/mnist-pipeline-20211031081001')
INFO:google.cloud.aiplatform.pipeline_jobs:View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/mnist-pipeline-20211031081001?project=697793444829
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob projects/697793444829/locations/us-central1/pipelineJobs/mnist-pipeline-20211031081001 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob projects/697793444829/locations/us-central1/pipelineJobs/mnist-pip