In [None]:
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Continues Training

<table align="left">
  <td>
    <a href="https://console.cloud.google.com/ai-platform/notebooks/deploy-notebook?download_url=https://github.com/GoogleCloudPlatform/fraudfinder/raw/main/06_model_training_pipeline.ipynb">
       <img src="https://www.gstatic.com/cloud/images/navigation/vertex-ai.svg" alt="Google Cloud Notebooks">Open in Cloud Notebook
    </a>
  </td> 
  <td>
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/fraudfinder/blob/main/06_model_training_pipeline.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> Open in Colab
    </a>
  </td>
  <td>
    <a href="https://github.com/GoogleCloudPlatform/fraudfinder/blob/main/06_model_training_pipeline.ipynb">
        <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      View on GitHub
    </a>
  </td>
</table>

## Overview

[Fraudfinder](https://github.com/googlecloudplatform/fraudfinder) is a series of labs on how to build a real-time fraud detection system on Google Cloud. Throughout the Fraudfinder labs, you will learn how the continuous training of MLOPS works. The goal of this notebook is to perform continuous training of the model by automating the ML pipeline; this lets you achieve continuous delivery of model prediction service. 

### Objective

This notebook shows how to trigger a deployed pipeline to execute and generate models. 

This lab uses the following Google Cloud services and resources:

- [Vertex AI](https://cloud.google.com/vertex-ai/)
- [BigQuery](https://cloud.google.com/bigquery/)

Steps performed in this notebook:

    * Run the pipeline from within Notebook (on-demand), using pipeline generated by CI/CD
    * Run the pipeline using Cloud Scheduler job, which send a message to a Pub/Sub topic, and then calls a Cloud Function to trigger the VertexAI pipeline

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* BigQuery

Learn about [Vertex AI
pricing](https://cloud.google.com/vertex-ai/pricing), [BigQuery pricing](https://cloud.google.com/bigquery/pricing) and use the [Pricing
Calculator](https://cloud.google.com/products/calculator/)
to generate a cost estimate based on your projected usage.

### Load configuration settings from the setup notebook

Set the constants used in this notebook and load the config settings from the `00_environment_setup.ipynb` notebook.

In [None]:
GCP_PROJECTS = !gcloud config get-value project
PROJECT_ID = GCP_PROJECTS[0]
BUCKET_NAME = f"{PROJECT_ID}-fraudfinder"
config = !gsutil cat gs://{BUCKET_NAME}/config/notebook_env.py
print(config.n)
exec(config.n)

### Import libraries and define constants

#### Libraries

In [None]:
import os

In [None]:
PIPELINE_NAME = f'fraud-finder-xgb-pipeline-{ID}'
IMAGE_REPOSITORY = f'fraudfinder-{ID}'
IMAGE_NAME='dask-xgb-classificator'
IMAGE_TAG='v1'
IMAGE_URI=f"us-central1-docker.pkg.dev/{PROJECT_ID}/{IMAGE_REPOSITORY}/{IMAGE_NAME}:{IMAGE_TAG}"

### A) Run the pipeline from within Notebook (on-demand), using pipeline generated by CI/CD

In [None]:
PIPELINE_STORE_URI = f"gs://{BUCKET_NAME}/pipeline-store/"
pipelines_file_location = os.path.join(PIPELINE_STORE_URI, f'{PIPELINE_NAME}.json')
pipelines_file_location

In [None]:
!mkdir ./pipelines/gcs/

In [None]:
!gsutil cp $pipelines_file_location ./pipelines/gcs/

In [None]:
os.environ["PROJECT_ID"] = PROJECT_ID

In [None]:
!gsutil ubla set on gs://{BUCKET_NAME}

In [None]:
!python ./deploy_kfp_pipeline/pipeline_run.py --pipelines-file-location=$pipelines_file_location

### B) Run using Cloud Scheduler job => Pub/Sub topic => Cloud Function

In [None]:
pipeline_spec_uri = os.path.join(f'gs://{BUCKET_NAME}/pipeline-store/', f'{PIPELINE_NAME}.json')
pipeline_spec_uri

#### 1. Create a Pub/Sub topic
The `my-scheduled-pipeline-topic` is the name of the new topic you are creating:

In [None]:
!gcloud pubsub topics create my-scheduled-pipeline-topic

#### 2. Deploy the subscribe function
This Cloud Funciton will invoked by Pub/Sub, and will trigger the Vertex AI Piepline

In [None]:
!gcloud functions deploy ff-scheduled-pipeline-function \
--source=./trigger_cf \
--entry-point=subscribe \
--trigger-topic my-scheduled-pipeline-topic \
--runtime python37 \
--ingress-settings internal-only

#### 3. Create Cloud Scheduler
A cloud scheduler to publish a new message to Pub/Sub
Note: 
- It need to enable App Enging and Cloud Scheduler APIs
- Make sure fraudfinderdemo@appspot.gserviceaccount.com has storage access

In [None]:
import json
parameters = f"""{{"pipeline_spec_uri":"{pipelines_file_location}", "parameter_values":{{ "project_id": "{PROJECT_ID}","region": "{REGION}" }} }}"""
message_body =json.dumps(message_body, skipkeys = True)

In [None]:
!gcloud scheduler jobs create pubsub ff-training-pipleline \
--schedule "35 11 * * *" \
--topic=my-scheduled-pipeline-topic \
--location=us-central1 \
--message-body=$message_body