In [None]:
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# 02 Formalization: Build an end-to-end Churn Pipeline using Vertex AI

## Overview
This notebook is a follow-up of `01_feature_management`. In this Notebook we will show you how you can do model experimentation using Vertex AI. 

**IMPORTANT** You can only run this notebook if you first completed `01_feature_management`.

### Install additional packages

Install the following packages required to execute this notebook. 

In [None]:
import os

# The Vertex AI Workbench Notebook product has specific requirements
IS_WORKBENCH_NOTEBOOK = os.getenv("DL_ANACONDA_HOME") and not os.getenv("VIRTUAL_ENV")
IS_USER_MANAGED_WORKBENCH_NOTEBOOK = os.path.exists(
    "/opt/deeplearning/metadata/env_version"
)

# Vertex AI Notebook requires dependencies to be installed with '--user'
USER_FLAG = ""
if IS_WORKBENCH_NOTEBOOK:
    USER_FLAG = "--user"

! pip3 install --upgrade google-cloud-aiplatform {USER_FLAG} -q
! pip3 install {USER_FLAG} kfp --upgrade -q
! pip3 install {USER_FLAG} google-cloud-pipeline-components --upgrade -q

### Check installed package versions
Run the following cell to check the KFP version.

In [None]:
! pip freeze | grep kfp

### Restart the kernel

After you install the additional packages, you need to restart the notebook kernel so it can find the packages.

In [None]:
# Automatically restart kernel after installs. Might take a bit. 
import os

if not os.getenv("IS_TESTING"):
    # Automatically restart kernel after installs
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

## Before you begin

### Set up your Google Cloud project

**The following steps are required, regardless of your notebook environment.**

1. [Select or create a Google Cloud project](https://console.cloud.google.com/cloud-resource-manager). When you first create an account, you get a $300 free credit towards your compute/storage costs.

1. [Make sure that billing is enabled for your project](https://cloud.google.com/billing/docs/how-to/modify-project).

1. [Enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com). {TODO: Update the APIs needed for your tutorial. Edit the API names, and update the link to append the API IDs, separating each one with a comma. For example, container.googleapis.com,cloudbuild.googleapis.com}

1. If you are running this notebook locally, you will need to install the [Cloud SDK](https://cloud.google.com/sdk).

1. Enter your project ID in the cell below. Then run the cell to make sure the
Cloud SDK uses the right project for all the commands in this notebook.

**Note**: Jupyter runs lines prefixed with `!` as shell commands, and it interpolates Python variables prefixed with `$` into these commands.

#### Set your project ID

**If you don't know your project ID**, you may be able to get your project ID using `gcloud`.

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}

In [None]:
if PROJECT_ID == "" or PROJECT_ID is None or PROJECT_ID == "[your-project-id]":
    # Get your GCP project id from gcloud
    shell_output = ! gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    print("Project ID:", PROJECT_ID)

#### Region

You can also change the `REGION` variable, which is used for operations
throughout the rest of this notebook.  Below are regions supported for Vertex AI. We recommend that you choose the region closest to you.

- Americas: `us-central1`
- Europe: `europe-west4`
- Asia Pacific: `asia-east1`

You may not use a multi-regional bucket for training with Vertex AI. Not all regions provide support for all Vertex AI services.

Learn more about [Vertex AI regions](https://cloud.google.com/vertex-ai/docs/general/locations).

In [None]:
REGION = "[your-region]"  # @param {type: "string"} --> You can change the region if you want

if REGION == "[your-region]":
    REGION = "us-central1"
    
print(f"Region: {REGION}")

#### UUID

If you are in a live tutorial session, you might be using a shared test account or project. To avoid name collisions between users on resources created, you create a uuid for each instance session, and append it onto the name of resources you create in this tutorial.

In [None]:
import random
import string


# Generate a uuid of a specifed length(default=8)
def generate_uuid(length: int = 8) -> str:
    return "".join(random.choices(string.ascii_lowercase + string.digits, k=length))


UUID = generate_uuid()

### Authenticate your Google Cloud account

**If you are using Vertex AI Workbench Notebooks**, your environment is already
authenticated. 

**If you are using Colab**, run the cell below and follow the instructions
when prompted to authenticate your account via oAuth.

**Otherwise**, follow these steps:

1. In the Cloud Console, go to the [**Create service account key**
   page](https://console.cloud.google.com/apis/credentials/serviceaccountkey).

2. Click **Create service account**.

3. In the **Service account name** field, enter a name, and
   click **Create**.

4. In the **Grant this service account access to project** section, click the **Role** drop-down list. Type "Vertex AI"
into the filter box, and select
   **Vertex AI Administrator**. Type "Storage Object Admin" into the filter box, and select **Storage Object Admin**.

5. Click *Create*. A JSON file that contains your key downloads to your
local environment.

6. Enter the path to your service account key as the
`GOOGLE_APPLICATION_CREDENTIALS` variable in the cell below and run the cell.

In [None]:
# If you are running this notebook in Colab, run this cell and follow the
# instructions to authenticate your GCP account. This provides access to your
# Cloud Storage bucket and lets you submit training jobs and prediction
# requests.

import os
import sys

# If on Vertex AI Workbench, then don't execute this code
IS_COLAB = "google.colab" in sys.modules
if not os.path.exists("/opt/deeplearning/metadata/env_version") and not os.getenv(
    "DL_ANACONDA_HOME"
):
    if "google.colab" in sys.modules:
        from google.colab import auth as google_auth

        google_auth.authenticate_user()

    # If you are running this notebook locally, replace the string below with the
    # path to your service account key and run this cell to authenticate your GCP
    # account.
    elif not os.getenv("IS_TESTING"):
        %env GOOGLE_APPLICATION_CREDENTIALS ''

### Load 
Next we will load the constants and the config file. 

In [None]:
BUCKET_NAME=f"{PROJECT_ID}-vertexai-churn"

GCP_PROJECTS = !gcloud config get-value project
PROJECT_ID = GCP_PROJECTS[0]
config = !gsutil cat gs://{BUCKET_NAME}/config/notebook_env.py
!gsutil cp ./telecom_churn_data.csv gs://{BUCKET_NAME}/data/
print(config.n)
exec(config.n)

In [None]:
PATH=%env PATH
%env PATH={PATH}:/home/jupyter/.local/bin

STAGING_URI = f"gs://{BUCKET_NAME}/staging"
PIPELINE_ROOT = f"gs://{BUCKET_NAME}/pipeline_root/{UUID}"
MODEL_DIR = f"gs://{BUCKET_NAME}/model/{UUID}"
MODEL_DISPLAY_NAME = f"train_deploy_{UUID}"
TRAINING_DATA_TABLE = f"bq://{PROJECT_ID}.ml_sample.train" 
TABLE_ID = f"{PROJECT_ID}.ml_sample.train"
TENSORBOARD_LOG_DIR = f"{BUCKET_NAME}/tensorboard_log_dir/"
DISPLAY_NAME = "telecom_churn_pipeline"
TRAINING_DATA_SELECTOR_LOC = f"gs://{BUCKET_NAME}/dataset/query_instance_2.csv"

Finally, validate access to your Cloud Storage bucket by examining its contents:

In [None]:
! gsutil ls -al $BUCKET_URI

### Import libraries

In [None]:
# General
#import tensorflow as tf
import pandas as pd
from typing import NamedTuple

# Google Cloud
from google.cloud import aiplatform as vertex
from google.cloud import bigquery
from google.cloud import bigquery_storage
from google.cloud.aiplatform import datasets
from google_cloud_pipeline_components import aiplatform as vertex_ai_components
from google_cloud_pipeline_components.v1.model import ModelUploadOp
from google_cloud_pipeline_components.types import artifact_types
from google_cloud_pipeline_components.v1.endpoint import (EndpointCreateOp, ModelDeployOp)
from google.cloud.aiplatform_v1beta1 import (FeaturestoreOnlineServingServiceClient, FeaturestoreServiceClient)
from google.cloud.aiplatform import Feature, Featurestore

# KFP
import kfp
from kfp import dsl
from kfp.v2 import compiler
from kfp.v2.dsl import HTML, Artifact, Condition, Input, Output, component, Metrics, Dataset
from kfp.v2.components import importer_node

### Initialize Vertex AI SDK for Python

Initialize the Vertex AI SDK for Python for your project and corresponding bucket.

In [None]:
vertex.init(project=PROJECT_ID, location=REGION, staging_bucket=STAGING_URI)

### Extract data from the Feature Store
The next step is for us to create a dataset in BigQuery that we can use for training. To read data from the feature store we need a `read-instance list` that contains information for each training example. It lists observations at a particular point in time. This can be either a CSV file or a BigQuery table. The list must include the following information:
* Timestamps: the times at which labels were observed or measured. The timestamps are required so that Vertex AI Feature Store can perform a point-in-time lookup.
* Entity IDs: one or more IDs of the entities that correspond to the label.

**Important** Please make sure that you first finish Notebook `01_feature_management`.

In [None]:
bqclient = bigquery.Client()
bqstorageclient = bigquery_storage.BigQueryReadClient()
query_string = """
SELECT
    mobile_number
FROM `{}`
""".format(BQ_RAW_DATA.split('/')[-1])

user_df = (
    bqclient.query(query_string)
    .result()
    .to_dataframe(bqstorage_client=bqstorageclient)
)

X_train = user_df["mobile_number"]

In [None]:
from datetime import datetime
now = datetime.now()
current_time = now.strftime("%Y-%m-%dT%H:%M:%SZ")
res = pd.DataFrame()
res['users']  = X_train
res['timestamp'] = current_time
res.to_csv(TRAINING_DATA_SELECTOR_LOC, index=False)

In [None]:
fs = Featurestore(
    featurestore_name=FEATURESTORE_ID,
    project=PROJECT_ID,
    location=REGION,
)
print(fs.gca_resource)

In [None]:
SERVING_FEATURE_IDS = {
    # to choose all the features use 'entity_type_id: ['*']'
    "users": ["mou_m1", "mou_m2", "mou_m3", "arpu_m1", "arpu_m2", "arpu_m3", "is_churn"],
}

In [None]:
fs.batch_serve_to_bq(
    bq_destination_output_uri=TRAINING_DATA_TABLE,
    serving_feature_ids=SERVING_FEATURE_IDS,
    read_instances_uri=TRAINING_DATA_SELECTOR_LOC,
)

## Build a customer component

In [None]:
@component(base_image='gcr.io/deeplearning-platform-release/tf2-gpu.2-5',
           packages_to_install=[
                                'pandas', 
                                "google-cloud-bigquery-storage",
                                'google-cloud-bigquery', 
                                'pyarrow',
                                'fsspec', 
                                'gcsfs', 
                                'google-cloud-aiplatform',
                                'kfp==1.8.13',
                                'kfp-pipeline-spec',
                                'kfp-server-api',
                                'pandas==1.2.4',
                                'pandas-profiling==3.0.0',
                                'tensorboard==2.5.0',
                                'tensorboard-data-server==0.6.1',
                                'tensorboard-plugin-wit==1.8.0',
                                'tensorflow==2.5.0',
                                'tensorflow-datasets==4.3.0',
                                'tensorflow-estimator==2.5.0',
                                'tensorflow-metadata==1.0.0',
                                'google-cloud-logging==2.4.0',
                                'numpy==1.19.5',
                                'sklearn'])

def custom_training(project_id: str,
                    table_id: str,
                    model_dir: str,
                    metrics: Output[Metrics],
                    tensorboard_log_dir: str,) -> NamedTuple("Ouptputs", [("output_uri", str),],): 
    from google.cloud import aiplatform as vertex
    import logging
    import os
    import tensorflow as tf
    import pandas as pd
    from google.cloud import bigquery
    from google.cloud import bigquery_storage
    from kfp.v2.dsl import ClassificationMetrics, Metrics, Output, component
    import numpy as np
    
    bqclient = bigquery.Client(project_id)
    bqstorageclient = bigquery_storage.BigQueryReadClient()

    query_string = f"""
    SELECT
      *
    FROM `{table_id}`
    """

    df = (
        bqclient.query(query_string)
        .result()
        .to_dataframe(bqstorage_client=bqstorageclient)
    )

    if os.environ.get('AIP_MODEL_DIR') is not None:
        model_dir = os.environ["AIP_MODEL_DIR"]

    logging.info('Creating and training model ...')
    
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dropout(.2),
        tf.keras.layers.Dense(16, activation='relu'),
        tf.keras.layers.Dropout(.2),
        tf.keras.layers.Dense(8, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    log_dir = tensorboard_log_dir
    if os.environ.get('AIP_TENSORBOARD_LOG_DIR') is not None:
        log_dir = os.environ["AIP_TENSORBOARD_LOG_DIR"]

    tensorboard_callback = tf.keras.callbacks.TensorBoard(
      log_dir=log_dir,
      histogram_freq=1)

    df = df.fillna(0)
    
    print(df.head(3))

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.01, clipnorm=1.0),
        loss=tf.keras.losses.BinaryCrossentropy(),
        metrics=[tf.keras.metrics.AUC()])
    X_train, y_train = df[['mou_m1','mou_m2', 'mou_m3', 'arpu_m1', 'arpu_m2', 'arpu_m3']][1000:], df['is_churn'][1000:]
    
    model.fit(X_train, y_train, epochs=2, verbose=1, batch_size=8, callbacks=[tensorboard_callback])
    logging.info(f'Exporting SavedModel to: {model_dir}')
    model.save(model_dir)

    result = model.evaluate(df[['mou_m1','mou_m2', 'mou_m3', 'arpu_m1', 'arpu_m2', 'arpu_m3']][:1000], df['is_churn'][:1000])
    
    component_outputs = NamedTuple("Ouptputs",
                                [("output_uri", str),],)
    
    metrics.log_metric("auROC", result[0])
    
    return component_outputs(model_dir)

In [None]:
@kfp.dsl.pipeline(name="telecom-churn-model-pipeline")
def pipeline(project: str = PROJECT_ID,
             bq_training_data: str = TABLE_ID,
             bq_training_dat_uri: str = TRAINING_DATA_TABLE,
             model_display_name: str = MODEL_DISPLAY_NAME,
             region: str = REGION,
):             
        dataset_create_op = vertex_ai_components.TabularDatasetCreateOp(
            project=project, display_name='churn_datasets',
            location=region, bq_source=bq_training_dat_uri)
                
        custom_training_op = custom_training(
            project_id=project,
            table_id=bq_training_data,
            model_dir=MODEL_DIR,
            tensorboard_log_dir=TENSORBOARD_LOG_DIR,
        )
        
        custom_training_op.after(dataset_create_op)
        
        unmanaged_model_importer = importer_node.importer(
            artifact_uri=custom_training_op.outputs["output_uri"],
            artifact_class=artifact_types.UnmanagedContainerModel,
            metadata={
                "containerSpec": {
                    "imageUri": "us-docker.pkg.dev/cloud-aiplatform/prediction/tf2-cpu.2-3:latest"
                }
            },
        )    
        
        unmanaged_model_importer.after(custom_training_op)

        model_upload_op = ModelUploadOp(
            project=project,
            display_name=model_display_name,
            unmanaged_container_model=unmanaged_model_importer.outputs["artifact"],
        )   
        
        model_upload_op.after(unmanaged_model_importer)
        
        endpoint_create_op = EndpointCreateOp(
            project=project,
            display_name="pipelines-created-endpoint",
        )
                
        custom_model_deploy_job =  ModelDeployOp(
            endpoint=endpoint_create_op.outputs["endpoint"],
            model=model_upload_op.outputs["model"],
            traffic_split={"0": 100},
            deployed_model_display_name=model_display_name,
            dedicated_resources_machine_type="n1-standard-16",
            dedicated_resources_min_replica_count=1,
            dedicated_resources_max_replica_count=1,
        )
        
        custom_model_deploy_job.after(model_upload_op)

In [None]:
from kfp.v2 import compiler

compiler.Compiler().compile(
    pipeline_func=pipeline, package_path="telecom_churn_model_pipeline.json"
)

In [None]:
job = vertex.PipelineJob(
    display_name=DISPLAY_NAME,
    template_path="telecom_churn_model_pipeline.json",
    pipeline_root=PIPELINE_ROOT,
    enable_caching=True
)

print (job.run())

! rm telecom_churn_model_pipeline.json

In [None]:
import random
COUNTER = 25 # You can change this 

while COUNTER >= 1:
    print (COUNTER)
    COUNTER = COUNTER - 1
    
    mou_m1 = round(random.uniform(-200, 200000), 2)
    mou_m2 = round(random.uniform(-200, 200000), 2)
    mou_m3 = round(random.uniform(-20000, 200000), 2)
    arpu_m1 = round(random.uniform(-1500, 5000), 2)
    arpu_m2 = round(random.uniform(-15000, 50000), 2) 
    arpu_m3 = round(random.uniform(-1500, 5000), 2)
    
    instances = [[mou_m1,mou_m2,mou_m3,arpu_m1,arpu_m2,arpu_m3],
                 [mou_m1,mou_m2,mou_m3,arpu_m1,arpu_m2,arpu_m3],
                 [mou_m1,mou_m2,mou_m3,arpu_m1,arpu_m2,arpu_m3],
                 [mou_m1,mou_m2,mou_m3,arpu_m1,arpu_m2,arpu_m3]]
    
    endpoint=vertex.Endpoint.list(order_by="update_time")
    endpoint=endpoint[-1]

    prediction = endpoint.predict(instances=instances)
    
    print(COUNTER, prediction)
    
else:
    print('done')

# Depricated Code  