## Project Setup

In [1]:
project_id = "curious-skyline-360213"
dataset_id = "CX"
pipeline_root_path = "gs://curious-skyline/Projects/CX/CXpipeline"

In [2]:
import kfp
from google.cloud import aiplatform

from google_cloud_pipeline_components import aiplatform as gcc_aip
from google_cloud_pipeline_components.v1.endpoint import (EndpointCreateOp, ModelDeployOp)
from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp
from kfp.v2 import compiler


## Create AutoML tabular models for each of the model selection possibilities

In [49]:
@kfp.dsl.pipeline(name="train-deploy-ltr-predict-models",
                  description="One model per defined type of available data",
                  pipeline_root=pipeline_root_path)

def pipeline(project: str = project_id):
    dataset_create_op = gcc_aip.TabularDatasetCreateOp(
        project=project_id, 
        location='us-west1',
        display_name="Comments_and_LTR_Doctor", 
        bq_source="bq://curious-skyline-360213.CX.CX_Stream_hour_partition_processed"
        ### TODO ###
        #This should be changed to output of a custom query with a predefined lookback
        #Possibly for a predefined enterprise granularity
    )
    
    ### LTR_Facility MODELS ###
    ### comments and ltr_doctor -> ltr_facility ###
    training_op_1 = gcc_aip.AutoMLTabularTrainingJobRunOp(
        project=project,
        location='us-west1',
        display_name="train-automl-Comments_and_LTR_Doctor",
        optimization_prediction_type="regression",
        optimization_objective="minimize-rmse",
        column_transformations=[
            {"numeric": {"column_name": "Anything_Outstanding_sentiment_score"}},
            {"numeric": {"column_name": "Anything_Outstanding_sentiment_magnitude"}},
            {"numeric": {"column_name": "Improve_Stay_sentiment_score"}},
            {"numeric": {"column_name": "Improve_Stay_sentiment_magnitude"}},
            {"numeric": {"column_name": "LTR_Doctor"}}
        ],
        dataset=dataset_create_op.outputs["dataset"],
        target_column="LTR_Facility",
    ).after(dataset_create_op)

    endpoint_op_1 = EndpointCreateOp(
        project=project,
        location='us-west1',
        display_name="endpoint-comments-and-ltr-doctor",
    )

    ModelDeployOp(
        model=training_op_1.outputs["model"],
        endpoint=endpoint_op_1.outputs["endpoint"],
        dedicated_resources_machine_type="n1-standard-4",
        dedicated_resources_min_replica_count=1,
        dedicated_resources_max_replica_count=1,
    ).after(training_op_1)
    
    
    ### comments and ltr_facility -> ltr_doctor ###
    training_op_2 = gcc_aip.AutoMLTabularTrainingJobRunOp(
        project=project,
        location='us-west1',
        display_name="train-automl-Comments_and_LTR_Facility",
        optimization_prediction_type="regression",
        optimization_objective="minimize-rmse",
        column_transformations=[
            {"numeric": {"column_name": "Anything_Outstanding_sentiment_score"}},
            {"numeric": {"column_name": "Anything_Outstanding_sentiment_magnitude"}},
            {"numeric": {"column_name": "Improve_Stay_sentiment_score"}},
            {"numeric": {"column_name": "Improve_Stay_sentiment_magnitude"}},
            {"numeric": {"column_name": "LTR_Facility"}}
        ],
        dataset=dataset_create_op.outputs["dataset"],
        target_column="LTR_Doctor",
    ).after(dataset_create_op)

    endpoint_op_2 = EndpointCreateOp(
        project=project,
        location='us-west1',
        display_name="endpoint-comments-and-ltr-facility",
    )

    ModelDeployOp(
        model=training_op_2.outputs["model"],
        endpoint=endpoint_op_2.outputs["endpoint"],
        dedicated_resources_machine_type="n1-standard-4",
        dedicated_resources_min_replica_count=1,
        dedicated_resources_max_replica_count=1,
    ).after(training_op_2)

In [50]:
compiler.Compiler().compile(
    pipeline_func=pipeline,
    package_path="automl-ltr-model-test.json".replace(" ", "_"),
)



In [51]:
DISPLAY_NAME = "automl-ltr-test"

job = aiplatform.PipelineJob(
    display_name=DISPLAY_NAME,
    template_path="automl-ltr-model-test.json",
    pipeline_root=pipeline_root_path,
    enable_caching=False,
)

job.submit()

Creating PipelineJob
PipelineJob created. Resource name: projects/348611359036/locations/us-central1/pipelineJobs/train-deploy-ltr-predict-models-20220901203851
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/348611359036/locations/us-central1/pipelineJobs/train-deploy-ltr-predict-models-20220901203851')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/train-deploy-ltr-predict-models-20220901203851?project=348611359036


## Test endpoint for predictions -- ONLINE

In [99]:
def predict_tabular_regression_sample(
    endpoint_name: str,
    instances: List[Dict],
    location: str = 'us-west1',
    project: str = "348611359036"
):
    aiplatform.init(project=project, location=location)

    endpoint = aiplatform.Endpoint(endpoint_name)

    result = endpoint.predict(instances=instances)
    
    return result.predictions[0]['value']

    


In [237]:
predict_tabular_regression_sample(
    #project="348611359036",
    endpoint_name="1189530843765276672",
    #location='us-west1',
    instances=[{ "Anything_Outstanding_sentiment_score": 0.85,
            "Anything_Outstanding_sentiment_magnitude": 5.0,
            "Improve_Stay_sentiment_score": 0.24,
            "Improve_Stay_sentiment_magnitude": 1.1,
            "LTR_Facility": "4.0"}]
            
)

8.185041427612305

In [242]:
def load_data():

    query =f"""
    SELECT 
    *

    FROM curious-skyline-360213.CX.CX_Stream_hour_partition_processed
    """
    query_job = bq_client.query(query,
        location="us-west1",
    )

    return query_job.to_dataframe()

In [243]:
df_result = load_data()
df_result

Unnamed: 0,Market,Ministry,Department,Type_Of_Visit,Survey_Completion_Date,Survey_Type,Survey_Project,Survey_ID,Gender,Age_Group,...,Anything_Outstanding,Improve_Stay,publish_time,publish_hour_UTC,Anything_Outstanding_sentiment_score,Anything_Outstanding_sentiment_magnitude,Improve_Stay_sentiment_score,Improve_Stay_sentiment_magnitude,Facility_Model,Doctor_Model
0,MI,FLJAC,13354,Virtual,09-02-2022,OAS,18229,14040,Male,35-39,...,"The Oscars Ballast Point, Sulphur Springs, Mon...","Juneau, midtown several statues from antiquity...",2022-09-02 00:31:15.443000+00:00,0,0.0,0.0,0.2,0.2,Comments_and_LTR,Comments_and_LTR
1,FL,FLJAC,18699,Virtual,09-02-2022,HCAHPS,21718,12633,Male,18-24,...,"Distinction is as Volker Schlöndorff, Werner H...","Figures, only and Loken (2011) completed a stu...",2022-09-02 00:31:10.162000+00:00,0,-0.1,0.1,0.0,0.0,Comments_and_LTR,Comments_and_LTR
2,MI,FLJAC,24593,Virtual,09-02-2022,AMG,29088,29897,Male,18-24,...,"2.1%. The latitude as Salzburg, Austria. The t...",Or unpredictable (about 92,2022-09-02 00:31:10.162000+00:00,0,0.0,0.3,-0.4,0.4,Comments_and_LTR,Comments_and_LTR
3,FL,FLJAC,16755,Virtual,09-02-2022,HCAHPS,29831,17591,Male,25-29,...,"Autódromo José liberal, published since 1870),...",Union plans thrived for,2022-09-02 00:30:43.471000+00:00,0,0.0,0.0,0.4,0.4,Comments_and_LTR,Comments_and_LTR
4,TN,FLJAC,25275,Virtual,09-02-2022,HCAHPS,19871,14784,Female,35-39,...,Acquiring new arthropods or,Historians of are attached to native customs c...,2022-09-02 00:30:43.471000+00:00,0,0.0,0.0,0.0,0.0,Comments_and_LTR,Comments_and_LTR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
605,FL,MIDET,14048,In Person,09-01-2022,ED,29951,12023,Female,18-24,...,Priority. Argentina waves (wireless networking...,"Professional golf, hard-wired technology produ...",2022-09-01 00:31:40.221000+00:00,0,0.0,0.1,0.7,0.7,Comments_and_LTR,Comments_and_LTR
606,TX,MIDET,26781,In Person,09-01-2022,ED,11312,24891,Female,25-29,...,"Event, and about how the popularity of the Jap...","Finally, methods just over four million middle...",2022-09-01 00:31:40.221000+00:00,0,0.1,0.1,0.7,0.7,Comments_and_LTR,Comments_and_LTR
607,TN,MIDET,19465,Virtual,09-01-2022,HCAHPS,18137,26947,Female,18-24,...,And Jorge electricity carried by the Los Angel...,"Literature free multilateral aid, with the",2022-09-01 00:30:45.607000+00:00,0,-0.1,0.1,0.1,0.1,Comments_and_LTR,Comments_and_LTR
608,FL,TXAUS,17470,In Person,09-01-2022,HCAHPS,22508,10081,Female,30-34,...,Following C framework for understanding ethics...,"American Italian falls, by the",2022-09-01 00:30:45.607000+00:00,0,0.0,0.0,-0.1,0.1,Comments_and_LTR,Comments_and_LTR


In [244]:
df_result.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 610 entries, 0 to 609
Data columns (total 22 columns):
 #   Column                                    Non-Null Count  Dtype              
---  ------                                    --------------  -----              
 0   Market                                    610 non-null    object             
 1   Ministry                                  610 non-null    object             
 2   Department                                610 non-null    int64              
 3   Type_Of_Visit                             610 non-null    object             
 4   Survey_Completion_Date                    610 non-null    object             
 5   Survey_Type                               610 non-null    object             
 6   Survey_Project                            610 non-null    int64              
 7   Survey_ID                                 610 non-null    int64              
 8   Gender                                    610 non-null    ob

In [245]:
#Need a dictionary for endpoints and model_selection to select the right model endpoint based on available data.
Facility_Model_Endpoint_Select = {"Comments_and_LTR": "143006880355057664"}
Facility_Model_Columns_Select = {"Comments_and_LTR": ['Anything_Outstanding_sentiment_score', 
                                                             'Anything_Outstanding_sentiment_magnitude', 
                                                             'Improve_Stay_sentiment_score', 
                                                             'Improve_Stay_sentiment_magnitude', 
                                                             'LTR_Doctor']}


Doctor_Model_Endpoint_Select = {"Comments_and_LTR" : "1189530843765276672"}
Doctor_Model_Columns_Select = {"Comments_and_LTR": ['Anything_Outstanding_sentiment_score', 
                                                             'Anything_Outstanding_sentiment_magnitude', 
                                                             'Improve_Stay_sentiment_score', 
                                                             'Improve_Stay_sentiment_magnitude', 
                                                             'LTR_Facility']}


def generate_instance(model_select, df_row, target):
    if target == 'Doctor':
        return dict(zip(Doctor_Model_Columns_Select[model_select], [item for item in df_row]) )
    else:
        return dict(zip(Facility_Model_Columns_Select[model_select], [item for item in df_row]) )

In [250]:
## Serve prediction
df_result['Facility_Instance'] = df_result.apply(lambda z: generate_instance(z['Facility_Model'], z[Facility_Model_Columns_Select[z['Facility_Model']]], 'Facility'), axis=1)
df_result.iloc[0]['Facility_Instance']

{'Anything_Outstanding_sentiment_score': 0.0,
 'Anything_Outstanding_sentiment_magnitude': 0.0,
 'Improve_Stay_sentiment_score': 0.20000000298023224,
 'Improve_Stay_sentiment_magnitude': 0.20000000298023224,
 'LTR_Doctor': 4}

In [251]:
df_result['Predict_LTR_Facility'] = df_result.apply(lambda z: predict_tabular_regression_sample(endpoint_name=Facility_Model_Endpoint_Select[z['Facility_Model']], instances=[z['Facility_Instance']] ), axis=1)

InvalidArgument: 400 {"error": "Column: LTR_Doctor. Error: Expected string_value but got number_value. Type casting is not allowed."}

In [None]:
df_result['Doctor_Instance'] = df_result.apply(lambda z: generate_instance(z['Doctor_Model'], z, 'Doctor'), axis=1)
df_result['Predict_LTR_Doctor'] = df_result.apply(lambda z: predict_tabular_regression_sample(endpoint_name=Doctor_Model_Endpoint_Select[z['Doctor_Model']], instances=[z['Doctor_Instance']] ), axis=1)

df_result.drop(['Facility_Instance', 'Doctor_Instance'], axis=1, inplace=True)