# Google Cloud Platform Vertex AI - Model Building using the codelabs - https://codelabs.developers.google.com/vertex-cpr-sklearn?hl=en#0

Install the required dependencies that are needed to build the model

In [1]:
%%writefile requirements.txt
fastapi
uvicorn==0.17.6
joblib~=1.0
numpy~=1.20
scikit-learn~=0.24
pandas
google-cloud-storage>=1.26.0,<2.0.0dev
google-cloud-aiplatform[prediction]>=1.16.0

Overwriting requirements.txt


Pip install the dependencies in the notebook.

In [2]:
!pip install -U --user -r requirements.txt

Collecting fastapi (from -r requirements.txt (line 1))
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn==0.17.6 (from -r requirements.txt (line 2))
  Downloading uvicorn-0.17.6-py3-none-any.whl.metadata (6.2 kB)
Collecting joblib~=1.0 (from -r requirements.txt (line 3))
  Downloading joblib-1.5.1-py3-none-any.whl.metadata (5.6 kB)
Collecting numpy~=1.20 (from -r requirements.txt (line 4))
  Downloading numpy-1.26.4-cp312-cp312-win_amd64.whl.metadata (61 kB)
Collecting scikit-learn~=0.24 (from -r requirements.txt (line 5))
  Downloading scikit-learn-0.24.2.tar.gz (7.5 MB)
     ---------------------------------------- 0.0/7.5 MB ? eta -:--:--
     ---------------------------------------- 0.0/7.5 MB ? eta -:--:--
     - -------------------------------------- 0.3/7.5 MB ? eta -:--:--
     ---- ----------------------------------- 0.8/7.5 MB 2.4 MB/s eta 0:00:03
     --------- ------------------------------ 1.8/7.5 MB 3.6 MB/s eta 0:00:02
     -------------

  error: subprocess-exited-with-error
  
  × Preparing metadata (pyproject.toml) did not run successfully.
  │ exit code: 1
  ╰─> [20 lines of output]
      Partial import of sklearn during the build process.
      Traceback (most recent call last):
        File "C:\Users\AnaCarolineFerreiraR\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\pip\_vendor\pyproject_hooks\_in_process\_in_process.py", line 389, in <module>
          main()
        File "C:\Users\AnaCarolineFerreiraR\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\pip\_vendor\pyproject_hooks\_in_process\_in_process.py", line 373, in main
          json_out["return_val"] = hook(**hook_input["kwargs"])
                                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        File "C:\Users\AnaCarolineFerreiraR\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8

Create the directories where the model artifacts are stored

In [None]:
USER_SRC_DIR = "src_dir"

In [3]:
!mkdir $USER_SRC_DIR

In [4]:
!mkdir model_artifacts

In [5]:
# copy the requirements to the source dir
!cp requirements.txt $USER_SRC_DIR/requirements.txt

'cp' is not recognized as an internal or external command,
operable program or batch file.


This would be a sklearn based model, import the necessary packages

In [None]:
import seaborn as sns
import numpy as np
import pandas as pd

from sklearn import preprocessing
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer

import joblib
import logging

# set logging to see the docker container logs
logging.basicConfig(level=logging.INFO)

Identifiers to describe the model and the cloud storage bucket details.

In [None]:
REGION = "us-central1"
MODEL_ARTIFACT_DIR = "wos-sklearn-model-artifacts"
REPOSITORY = "wos-diamonds"
IMAGE = "wos-sklearn-image"
MODEL_DISPLAY_NAME = "wos-diamonds-cpr"

# Replace with your project
PROJECT_ID = "driven-density-377506"

# Replace with your bucket
BUCKET_NAME = "gs://driven-density-xxxxx-wos-cpr-bucket"

Load the data

In [None]:
data = sns.load_dataset('diamonds', cache=True, data_home=None)

label = 'price'

y_train = data['price']
x_train = data.drop(columns=['price'])

In [None]:
x_train.head()

In [None]:
y_train.head()

Column transformations - One hot encode the categorical features and scale the numerical features

In [None]:
column_transform = make_column_transformer(
    (preprocessing.OneHotEncoder(sparse=False), [1,2,3]),
    (preprocessing.StandardScaler(), [0,4,5,6,7,8]))

Create a RandomForestRegressor

In [None]:
regr = RandomForestRegressor(max_depth=10, random_state=0)

Create the model pipeline and fit it with the training data

In [None]:
my_pipeline = make_pipeline(column_transform, regr)

In [None]:
my_pipeline.fit(x_train, y_train)

Perform local predictions

In [None]:
my_pipeline.predict([[0.23, 'Ideal', 'E', 'SI2', 61.5, 55.0, 3.95, 3.98, 2.43]])

Export the model pipeline to the artifacts folder

In [None]:
joblib.dump(my_pipeline, 'model_artifacts/model.joblib')

Copy the model artifact the the cloud storage bucket

In [None]:
!gsutil cp model_artifacts/model.joblib {BUCKET_NAME}/{MODEL_ARTIFACT_DIR}/

Define the pre-processing map for model inference

In [None]:
clarity_dict={"Flawless": "FL",
              "Internally Flawless": "IF",
              "Very Very Slightly Included": "VVS1",
              "Very Slightly Included": "VS2",
              "Slightly Included": "S12",
              "Included": "I3"}

In [None]:
import json
with open("model_artifacts/preprocessor.json", "w") as f:
    json.dump(clarity_dict, f)

In [None]:
!gsutil cp model_artifacts/preprocessor.json {BUCKET_NAME}/{MODEL_ARTIFACT_DIR}/

Define the Custom Prediction Routine to load the model, pre-process the data and post-process the scoring response to what the wrapping WML scoring endpoint and thereby OpenScale expects

In [None]:
%%writefile $USER_SRC_DIR/predictor.py

import joblib
import numpy as np
import json

from google.cloud import storage
from google.cloud.aiplatform.prediction.sklearn.predictor import SklearnPredictor


class CprPredictor(SklearnPredictor):

    def __init__(self):
        return

    def load(self, artifacts_uri: str) -> None:
        """Loads the sklearn pipeline and preprocessing artifact."""

        super().load(artifacts_uri)

        # open preprocessing artifact
        with open("preprocessor.json", "rb") as f:
            self._preprocessor = json.load(f)


    def preprocess(self, prediction_input: np.ndarray) -> np.ndarray:
        """Performs preprocessing by checking if clarity feature is in abbreviated form."""

        inputs = super().preprocess(prediction_input)

        for sample in inputs:
            if sample[3] not in self._preprocessor.values():
                sample[3] = self._preprocessor[sample[3]]
        return inputs

    def postprocess(self, prediction_results: np.ndarray) -> dict:
        """Performs postprocessing by rounding predictions and converting to WML scoring format."""
        # return {"predictions": [f"${value}" for value in np.round(prediction_results)]}                                
        return {"predictions": [{"fields":["prediction"], "values":[[value] for value in np.round(prediction_results)]}]}

Build the Custom Routine Predictor docker image

In [None]:
from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=REGION)

import os

from google.cloud.aiplatform.prediction import LocalModel

from src_dir.predictor import CprPredictor  # Should be path of variable $USER_SRC_DIR

local_model = LocalModel.build_cpr_model(
    USER_SRC_DIR,
    f"{REGION}-docker.pkg.dev/{PROJECT_ID}/{REPOSITORY}/{IMAGE}",
    predictor=CprPredictor,
    requirements_path=os.path.join(USER_SRC_DIR, "requirements.txt"),
)

In [None]:
import json

sample = {"instances": [
  [0.23, 'Ideal', 'E', 'VS2', 61.5, 55.0, 3.95, 3.98, 2.43],
  [0.29, 'Premium', 'J', 'Internally Flawless', 52.5, 49.0, 4.00, 2.13, 3.11]]}

with open('instances.json', 'w') as fp:
    json.dump(sample, fp)

Make local predictions against the predictor routine

In [None]:
with local_model.deploy_to_local_endpoint(
    artifact_uri = 'model_artifacts/', # local path to artifacts
) as local_endpoint:
    predict_response = local_endpoint.predict(
        request_file='instances.json',
        headers={"Content-Type": "application/json"},
    )

    health_check_response = local_endpoint.run_health_check()

The scoring response..

In [None]:
predict_response.content

In [None]:
REPOSITORY

In [None]:
!gcloud artifacts repositories create {REPOSITORY} --repository-format=docker --location=us-central1 --description="Docker repository"

In [None]:
!gcloud auth configure-docker {REGION}-docker.pkg.dev --quiet

Push the custom routine predictor docker image

In [None]:
local_model.push_image()

In [None]:
MODEL_DISPLAY_NAME

In [None]:
BUCKET_NAME

In [None]:
MODEL_ARTIFACT_DIR

In [None]:
model = aiplatform.Model.upload(local_model = local_model,
                                display_name=MODEL_DISPLAY_NAME,
                                artifact_uri=f"{BUCKET_NAME}/{MODEL_ARTIFACT_DIR}",)

Create an endpoint to the Custom Routine Predictor Image

In [None]:
endpoint = model.deploy(machine_type="n1-standard-2")

Perform scoring against the endpoint

In [None]:
endpoint.predict(instances=[[0.23, 'Ideal', 'E', 'IF', 61.5, 55.0, 3.95, 3.98, 2.43], 
                            [0.32, 'Fair', 'E', 'VS2', 61.5, 55.0, 3.95, 3.98, 2.43],                     
                            [0.39, 'Ideal', 'E', 'Very Very Slightly Included', 90.5, 55.0, 3.95, 3.98, 2.43],
                            [0.39, 'Ideal', 'E', 'IF', 90.5, 55.0, 3.95, 3.98, 2.43]
                           ])

In [None]:
endpoint.predict(instances=[[0.23, 'Ideal', 'E', 'IF', 61.5, 55.0, 3.95, 3.98, 2.43]
                           ])