# MLOps Coloring Book

This notebook may be used for demonstration of the 1-ClickMLOps tool

In [1]:
# Check that package is installed correctly. The KFP SDK version should be >=1.6:
!python3 -c "import kfp; print('KFP SDK version: {}'.format(kfp.__version__))"

KFP SDK version: 1.8.14


## Imports

In [2]:
import json
import pandas as pd
from kfp.v2 import compiler, dsl
from kfp.v2.dsl import pipeline, component, Artifact, Dataset, Input, Metrics, Model, Output, InputPath, OutputPath
from google.cloud import aiplatform
from google.cloud import aiplatform_v1
import datetime
from IPython.core.magic import register_cell_magic

from utils import OneClickMLOps

@register_cell_magic
def execute_and_save(file, cell):
    'Run and save python code block to a file'
    with open(file, 'wt') as fd:
        fd.write(cell)
    code = compile(cell, file, 'exec')
    exec(code, globals())

## Data Loading

In [3]:
@component(
    packages_to_install=[
        "google-cloud-bigquery", 
        "pandas",
        "pyarrow",
        "db_dtypes"
    ],
    base_image="python:3.9",
    output_component_file="create_dataset.yaml"
)
def create_dataset(
    bq_table: str,
    output_data_path: OutputPath("Dataset"),
    project: str
):
    from google.cloud import bigquery
    import pandas as pd
    bq_client = bigquery.Client(project=project)


    def get_query(bq_input_table: str) -> str:
        """Generates BQ Query to read data.

        Args:
        bq_input_table: The full name of the bq input table to be read into
        the dataframe (e.g. <project>.<dataset>.<table>)
        Returns: A BQ query string.
        """
        return f"""
        SELECT *
        FROM `{bq_input_table}`
        """

    def load_bq_data(query: str, client: bigquery.Client) -> pd.DataFrame:
        """Loads data from bq into a Pandas Dataframe for EDA.
        Args:
        query: BQ Query to generate data.
        client: BQ Client used to execute query.
        Returns:
        pd.DataFrame: A dataframe with the requested data.
        """
        df = client.query(query).to_dataframe()
        return df

    dataframe = load_bq_data(get_query(bq_table), bq_client)
    dataframe.to_csv(output_data_path)

## Model Training

In [4]:
@component(
    packages_to_install=[
        "scikit-learn",
        "pandas",
        "joblib",
        "tensorflow"
    ],
    base_image="python:3.9",
    output_component_file="train_model.yaml",
)
def train_model(
    output_model_directory: str,
    dataset: Input[Dataset],
    metrics: Output[Metrics],
    model: Output[Model]
):
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.metrics import roc_curve
    from sklearn.model_selection import train_test_split
    from joblib import dump
    import pandas as pd
    import tensorflow as tf
    import pickle
    import os
    
    def save_model(model, uri):
        """Saves a model to uri."""
        with tf.io.gfile.GFile(uri, 'w') as f:
            pickle.dump(model, f)
    
    df = pd.read_csv(dataset.path)
    labels = df.pop("Class").tolist()
    data = df.values.tolist()
    x_train, x_test, y_train, y_test = train_test_split(data, labels)
    skmodel = DecisionTreeClassifier()
    skmodel.fit(x_train,y_train)
    score = skmodel.score(x_test,y_test)
    print('accuracy is:',score)
    metrics.log_metric("accuracy",(score * 100.0))
    metrics.log_metric("framework", "Scikit Learn")
    metrics.log_metric("dataset_size", len(df))

    output_uri = os.path.join(output_model_directory, f'model.pkl')
    save_model(skmodel, output_uri)
    model.path = output_model_directory

## Uploading & Deploying the Model

In [5]:
@component(
    packages_to_install=[
        "google-cloud-aiplatform"
    ],
    base_image="python:3.9",
    output_component_file="deploy_model.yaml",
)
def deploy_model(
    model: Input[Model],
    project: str,
    region: str,
    vertex_endpoint: Output[Artifact],
    vertex_model: Output[Model]
):
    from google.cloud import aiplatform
    aiplatform.init(project=project, location=region)
    deployed_model = aiplatform.Model.upload(
        display_name="beans-model-pipeline",
        artifact_uri = model.uri,
        serving_container_image_uri="us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.0-24:latest"
    )
    endpoint = deployed_model.deploy(machine_type="n1-standard-4")
    vertex_endpoint.uri = endpoint.resource_name
    vertex_model.uri = deployed_model.resource_name

## Define and Run the Pipeline

In [6]:
%%execute_and_save pipeline.py 
@dsl.pipeline(name='training-pipeline')
def pipeline(bq_table: str,
             output_model_directory: str,
             project: str,
             region: str,
            ):
    
    dataset_task = create_dataset(
        bq_table=bq_table, 
        project=project)

    model_task = train_model(
        output_model_directory=output_model_directory,
        dataset=dataset_task.output)

    deploy_task = deploy_model(
        model=model_task.outputs["model"],
        project=project,
        region=region)

In [7]:
pipeline_params = {
    "bq_table": "sandbox-srastatter.mlops_boxer_test.dry-beans",
    "output_model_directory": f"gs://mlops-boxer-test/trained_models/{datetime.datetime.now()}",
    "project": "sandbox-srastatter",
    "region": "us-central1"
}

In [8]:
OneClickMLOps.go(project_id='sandbox-srastatter', pipeline_params=pipeline_params)

[0;32m BUILDING COMPONENTS [0m


Creating temporary tarball archive of 29 file(s) totalling 64.0 KiB before compression.
Uploading tarball of [..] to [gs://sandbox-srastatter_cloudbuild/source/1670946930.221125-7e83a6e9f43645f08e6436e8afa8ebf2.tgz]
Created [https://cloudbuild.googleapis.com/v1/projects/sandbox-srastatter/locations/global/builds/359701bb-4085-443f-bc3c-abb9cf53700f].
Logs are available at [ https://console.cloud.google.com/cloud-build/builds/359701bb-4085-443f-bc3c-abb9cf53700f?project=1006819402307 ].


----------------------------- REMOTE BUILD OUTPUT ------------------------------
starting build "359701bb-4085-443f-bc3c-abb9cf53700f"

FETCHSOURCE
Fetching storage object: gs://sandbox-srastatter_cloudbuild/source/1670946930.221125-7e83a6e9f43645f08e6436e8afa8ebf2.tgz#1670946930584839
Copying gs://sandbox-srastatter_cloudbuild/source/1670946930.221125-7e83a6e9f43645f08e6436e8afa8ebf2.tgz#1670946930584839...
/ [1 files][ 14.6 KiB/ 14.6 KiB]                                                
Operation completed over 1 objects/14.6 KiB.
BUILD
Starting Step #0 - "Build component: deploy_model"
Step #0 - "Build component: deploy_model": Already have image (with digest): gcr.io/cloud-builders/docker
Step #0 - "Build component: deploy_model": Sending build context to Docker daemon  6.656kB
Step #0 - "Build component: deploy_model": Step 1/7 : FROM python:3.9
Step #0 - "Build component: deploy_model": 3.9: Pulling from library/python
Step #0 - "Build component: deploy_model": f2f58072e9ed: Pulli



[0;32m RUN PIPELINE JOB [0m
Creating PipelineJob


INFO:google.cloud.aiplatform.pipeline_jobs:Creating PipelineJob


PipelineJob created. Resource name: projects/1006819402307/locations/us-central1/pipelineJobs/training-pipeline-20221213110537
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/1006819402307/locations/us-central1/pipelineJobs/training-pipeline-20221213110537')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/training-pipeline-20221213110537?project=1006819402307


INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob created. Resource name: projects/1006819402307/locations/us-central1/pipelineJobs/training-pipeline-20221213110537
INFO:google.cloud.aiplatform.pipeline_jobs:To use this PipelineJob in another session:
INFO:google.cloud.aiplatform.pipeline_jobs:pipeline_job = aiplatform.PipelineJob.get('projects/1006819402307/locations/us-central1/pipelineJobs/training-pipeline-20221213110537')
INFO:google.cloud.aiplatform.pipeline_jobs:View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/training-pipeline-20221213110537?project=1006819402307


In [8]:
OneClickMLOps.generate(project_id='sandbox-srastatter',
                       af_registry_name='mlops-boxer-test',
                       af_registry_location='us-central1',
                       gs_bucket_location='us-central1',
                       gs_bucket_name='mlops-boxer-test',
                       pipeline_params=pipeline_params)