# MLOps Coloring Book

This notebook may be used for demonstration of the 1-ClickMLOps tool

## Imports

In [1]:
from IPython.core.magic import register_cell_magic

from utils import OneClickMLOps
# consider dumping files to a tmpfiles dir
@register_cell_magic
def imports(_, cell):
    'Run and save python code block to a file'
    file = '.imports.py'
    with open(file, 'wt') as fd:
        fd.write(cell)
    code = compile(cell, file, 'exec')
    exec(code, globals())

@register_cell_magic
def define(_, cell):
    'Run and save python code block to a file'
    file = '.cell.py'
    with open(file, 'wt') as fd:
        fd.write(cell)
    code_to_exec = cell[cell.find("OneClickMLOps.makeComponent("):cell.find(")")+1]
    code = compile(code_to_exec, file, 'exec')
    exec(code, globals())

In [2]:
%%imports
import json
import pandas as pd
from google.cloud import aiplatform
from google.cloud import aiplatform_v1
from google.cloud import bigquery
from google.cloud import storage
import datetime
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_curve
from sklearn.model_selection import train_test_split
from joblib import dump
import pickle
import os

## Globals

In [3]:
bq_table = "sandbox-srastatter.mlops_boxer_test.dry-beans"
model_directory = f"gs://mlops-boxer-test/trained_models/{datetime.datetime.now()}"
data_path = "gs://mlops-boxer-test/data"
project_id = "sandbox-srastatter"
region = "us-central1"

## Data Loading

In [4]:
%%define
OneClickMLOps.makeComponent(
    name="create_dataset",
    description="Loads data from BQ and writes a dataframe as a csv to GCS.", # optional
    params=[
        {"name": "bq_table", "type": str, "description": "The bq input table."}, # descriptions are optional
        {"name": "data_path", "type": str, "description": "GS location where the training data is written."},
        {"name": "project_id", "type": str, "description": "Project_id."}
    ]
)

bq_client = bigquery.Client(project=project_id)

def get_query(bq_input_table: str) -> str:
    """Generates BQ Query to read data.

    Args:
    bq_input_table: The full name of the bq input table to be read into
    the dataframe (e.g. <project>.<dataset>.<table>)
    Returns: A BQ query string.
    """
    return f"""
    SELECT *
    FROM `{bq_input_table}`
    """

def load_bq_data(query: str, client: bigquery.Client) -> pd.DataFrame:
    """Loads data from bq into a Pandas Dataframe for EDA.
    Args:
    query: BQ Query to generate data.
    client: BQ Client used to execute query.
    Returns:
    pd.DataFrame: A dataframe with the requested data.
    """
    df = client.query(query).to_dataframe()
    return df

dataframe = load_bq_data(get_query(bq_table), bq_client)
dataframe.to_csv(data_path)

## Model Training

In [5]:
%%define
OneClickMLOps.makeComponent(
    name="train_model",
    description="Trains a decision tree on the training data.",
    params=[
        {"name": "model_directory", "type": str, "description": "GS location of saved model."},
        {"name": "data_path", "type": str, "description": "GS location where the training data."}
    ]
)

def save_model(model, model_directory):
    """Saves a model to uri."""
    filename = f'model.pkl'
    with open(filename, 'wb') as f:
        pickle.dump(model, f)
    
    bucket_name = model_directory.split('/')[2]
    prefix='/'.join(model_directory.split('/')[3:])
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket_name)
    blob = bucket.blob(os.path.join(prefix, filename))
    blob.upload_from_filename(filename)

df = pd.read_csv(data_path)
labels = df.pop("Class").tolist()
data = df.values.tolist()
x_train, x_test, y_train, y_test = train_test_split(data, labels)
skmodel = DecisionTreeClassifier()
skmodel.fit(x_train,y_train)
score = skmodel.score(x_test,y_test)
print('accuracy is:',score)

output_uri = os.path.join(model_directory, f'model.pkl')
save_model(skmodel, model_directory)

## Uploading & Deploying the Model

In [6]:
%%define
OneClickMLOps.makeComponent(
    name="deploy_model",
    description="Trains a decision tree on the training data.",
    params=[
        {"name": "model_directory", "type": str, "description": "GS location of saved model."},
        {"name": "project_id", "type": str, "description": "Project_id."},
        {"name": "region", "type": str, "description": "Region."}
    ]
)

aiplatform.init(project=project_id, location=region)
deployed_model = aiplatform.Model.upload(
    display_name="beans-model-pipeline",
    artifact_uri = model_directory,
    serving_container_image_uri="us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.0-24:latest"
)
endpoint = deployed_model.deploy(machine_type="n1-standard-4")

## Define and Run the Pipeline

In [7]:
pipeline_params = {
    "bq_table": bq_table,
    "model_directory": model_directory,
    "data_path": data_path,
    "project_id": project_id,
    "region": region
}

In [8]:
OneClickMLOps.makePipeline(
    name="training-pipeline",
    description="description", # optional
    params=[
        {"name": "bq_table", "type": str, "description": "Description."}, # descriptions are optional
        {"name": "model_directory", "type": str, "description": "Description."},
        {"name": "data_path", "type": str, "description": "Description."},
        {"name": "project_id", "type": str, "description": "Description."},
        {"name": "region", "type": str, "description": "Description."}
    ],
    pipeline=[{
        "component_name": "create_dataset", "param_mapping": [
            ("bq_table", "bq_table"), # (component_param, pipeline_param)
            ("data_path", "data_path"),
            ("project_id", "project_id")
        ]
    },
    {
        "component_name": "train_model", "param_mapping": [
            ("model_directory", "model_directory"),
            ("data_path", "data_path")
        ]
    },
    {
        "component_name": "deploy_model", "param_mapping": [
            ("model_directory", "model_directory"),
            ("project_id", "project_id"),
            ("region", "region")
        ]
    }]
)

In [9]:
OneClickMLOps.go(project_id='sandbox-srastatter', pipeline_params=pipeline_params)

[0;32m BUILDING COMPONENTS [0m


Creating temporary tarball archive of 28 file(s) totalling 65.3 KiB before compression.
Some files were not included in the source upload.

Check the gcloud log [/Users/srastatter/.config/gcloud/logs/2022.12.16/16.14.56.972550.log] to see which files and the contents of the
default gcloudignore file used (see `$ gcloud topic gcloudignore` to learn
more).

Uploading tarball of [..] to [gs://sandbox-srastatter_cloudbuild/source/1671225296.999804-56e39c845845493da031a02e4840fb0b.tgz]
Created [https://cloudbuild.googleapis.com/v1/projects/sandbox-srastatter/locations/global/builds/85673889-b6e8-4e4e-bb8b-f15c9d62dbe7].
Logs are available at [ https://console.cloud.google.com/cloud-build/builds/85673889-b6e8-4e4e-bb8b-f15c9d62dbe7?project=1006819402307 ].


----------------------------- REMOTE BUILD OUTPUT ------------------------------
starting build "85673889-b6e8-4e4e-bb8b-f15c9d62dbe7"

FETCHSOURCE
Fetching storage object: gs://sandbox-srastatter_cloudbuild/source/1671225296.999804-56e39c845845493da031a02e4840fb0b.tgz#1671225297378945
Copying gs://sandbox-srastatter_cloudbuild/source/1671225296.999804-56e39c845845493da031a02e4840fb0b.tgz#1671225297378945...
/ [1 files][ 17.6 KiB/ 17.6 KiB]                                                
Operation completed over 1 objects/17.6 KiB.
BUILD
Already have image (with digest): gcr.io/cloud-builders/docker
Sending build context to Docker daemon  17.92kB
Step 1/6 : FROM python:3.9
3.9: Pulling from library/python
f2f58072e9ed: Pulling fs layer
5c8cfbf51e6e: Pulling fs layer
aa3a609d1579: Pulling fs layer
094e7d9bb04e: Pulling fs layer
2cbfd734f382: Pulling fs layer
aa86ac293d0f: Pulling fs layer
ea442e3d4174: Pulling fs layer
c662908b49d7: Pulling fs layer
f7e80cce9b62: Pulling fs layer
094e7d



[0;32m RUNNING PIPELINE JOB [0m
Creating PipelineJob


INFO:google.cloud.aiplatform.pipeline_jobs:Creating PipelineJob


PipelineJob created. Resource name: projects/1006819402307/locations/us-central1/pipelineJobs/training-pipeline-20221216161726
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/1006819402307/locations/us-central1/pipelineJobs/training-pipeline-20221216161726')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/training-pipeline-20221216161726?project=1006819402307


INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob created. Resource name: projects/1006819402307/locations/us-central1/pipelineJobs/training-pipeline-20221216161726
INFO:google.cloud.aiplatform.pipeline_jobs:To use this PipelineJob in another session:
INFO:google.cloud.aiplatform.pipeline_jobs:pipeline_job = aiplatform.PipelineJob.get('projects/1006819402307/locations/us-central1/pipelineJobs/training-pipeline-20221216161726')
INFO:google.cloud.aiplatform.pipeline_jobs:View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/training-pipeline-20221216161726?project=1006819402307
