# BigQuery - Pipelines with KFP for BQML + Online Predictions
https://source.cloud.google.com/aburdenko-project/idexx/+/master:training_pipeline.py
This notebook uses a Kubeflow Pipeline (KFP) to create a BQML model, export it, and deploy it to an Vertex AI Endpoint for online predictions.

**Prerequisites**
- `00 - Initial Setup`
- `01 - BigQuery - Data`
- Review Only
    - `02 - BigQuery - BQML`
    - `03 - BigQuery - BQML Online Predictions`

**Overview**

<img src="architectures/statmike-mlops-08.png">

---
## Setup

Parameters:

Python Packages:

In [None]:
from kfp import dsl
from kfp.v2 import compiler
from kfp.v2.dsl import component
from kfp.v2.google.client import AIPlatformClient

---
## Create Kubeflow Pipeline

---
### Train Model With BQML

In [None]:
@component(base_image="python:3.9", packages_to_install=["google-cloud-bigquery","pandas","pyarrow"])    
def build_model(species: str, test_code: str):   
    sql = f"CREATE OR REPLACE MODEL `idexx.{species}_{test_code}_autoencoder` \
            OPTIONS( \
                model_type='autoencoder', \
                activation_fn='relu', \
                batch_size=8, \
                dropout=0.2, \
                hidden_units=[32, 16, 4, 16, 32], \
                learn_rate=0.001, \
                l1_reg_activation=0.0001, \
                max_iterations=10, \
                optimizer='adam' \
            )  \
            AS \
            select testcode, species, cast ( result as numeric) result from idexx.src_table"

    print(sql)
    GCP_PROJECT_NAME="aburdenko-project"
    from google.cloud import bigquery    
    bq = bigquery.Client(project=GCP_PROJECT_NAME)
    job = bq.query(sql)
    job.result()

---
### Evaluate Model With BQML

---
### Export BQML Model

---
### Upload BQML Model To Vertex AI

---
### Create Vertex AI Endpoint

---
### Deploy BQML Model to Vertex AI Endpoint

---
### Build KFP Pipeline 

In [None]:
@dsl.pipeline(
    name="bqml-machine-drift-pipeline",
    description="A simple intro pipeline",
    pipeline_root=PIPELINE_ROOT,
)
def bqml_training_pipeline(query: str =  "select Species, TestCode from `aburdenko-project.idexx.SOURCE` where species = 'C' group by Species, TestCode"):    
    # GCP_PROJECT_NAME="aburdenko-project"
    # from google.cloud import bigquery    
    # bq = bigquery.Client(project=GCP_PROJECT_NAME)
    # df = bq.query(query).to_dataframe()  
    # for _, row in df.iterrows():        
    #     species = row['Species']
    #     test_code = row['TestCode']
    
        species = "C"
        test_code = "HC02"
        load_src_table_task = load_src_table(species, test_code)
        load_test_table_task = load_test_table(species, test_code).after( load_src_table_task )        
        train_bqml_model_task = build_model( species, test_code ).after( load_test_table_task )        
        export_bqml_model_task = export_model( species, test_code ).after( train_bqml_model_task )        
        deploy_model_to_vertex_task = deploy_model_to_vertex_endpoint().after( export_bqml_model_task )        
        predict_task = predict( species, test_code ).after( deploy_model_to_vertex_task )   

---
### Compile Pipeline

In [None]:
compiler.Compiler().compile(pipeline_func = bqml_training_pipeline, package_path = "bqml_training_pipeline_job.json")

---
## Run Pipeline Job

In [None]:
response = api_client.create_run_from_job_spec(
    job_spec_path="bqml_training_pipeline_job.json",  # <--- CHANGE THIS IF YOU WANT TO RUN OTHER PIPELINES
    pipeline_root=PIPELINE_ROOT,
service_account=SERVICE_ACCOUNT)

---
## Predictions