## Train model

In [1]:
%%writefile requirements.txt
fastapi
uvicorn==0.17.6
joblib~=1.1.1
numpy>=1.17.3, <1.24.0
scikit-learn
pandas
google-cloud-storage>=2.2.1,<3.0.0dev
google-cloud-aiplatform[prediction]>=1.18.2

Overwriting requirements.txt


In [2]:
!pip install -U --user -r requirements.txt

Collecting fastapi (from -r requirements.txt (line 1))
  Using cached fastapi-0.110.0-py3-none-any.whl.metadata (25 kB)
Collecting scikit-learn (from -r requirements.txt (line 5))
  Using cached scikit_learn-1.4.1.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting google-cloud-aiplatform>=1.18.2 (from google-cloud-aiplatform[prediction]>=1.18.2->-r requirements.txt (line 8))
  Downloading google_cloud_aiplatform-1.43.0-py2.py3-none-any.whl.metadata (27 kB)
Collecting starlette<0.37.0,>=0.36.3 (from fastapi->-r requirements.txt (line 1))
  Using cached starlette-0.36.3-py3-none-any.whl.metadata (5.9 kB)
INFO: pip is looking at multiple versions of scikit-learn to determine which version is compatible with other requirements. This could take a while.
Collecting scikit-learn (from -r requirements.txt (line 5))
  Using cached scikit_learn-1.4.0-1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Downloading google_cloud_ai

In [3]:
USER_SRC_DIR = "src_dir"
!mkdir $USER_SRC_DIR
!mkdir model_artifacts

# copy the requirements to the source dir
!cp requirements.txt $USER_SRC_DIR/requirements.txt

mkdir: cannot create directory ‘src_dir’: File exists
mkdir: cannot create directory ‘model_artifacts’: File exists


In [4]:
import seaborn as sns
import numpy as np
import pandas as pd

from sklearn import preprocessing
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer

import joblib
import logging

# set logging to see the docker container logs
logging.basicConfig(level=logging.INFO)

In [5]:
REGION = "europe-west1"
MODEL_ARTIFACT_DIR = "sklearn-model-artifacts"
REPOSITORY = "diamonds"
IMAGE = "sklearn-image"
MODEL_DISPLAY_NAME = "diamonds-cpr"

# Replace with your project
PROJECT_ID = "project-4-vertexai"

# Replace with your bucket
BUCKET_NAME = "gs://project-4-vertexai-cpr-bucket"


In [6]:
data = sns.load_dataset('diamonds', cache=True, data_home=None)

label = 'price'

y_train = data['price']
x_train = data.drop(columns=['price'])

In [7]:
x_train.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,4.34,4.35,2.75


In [8]:
column_transform = make_column_transformer(
    (preprocessing.OneHotEncoder(sparse=False), [1,2,3]),
    (preprocessing.StandardScaler(), [0,4,5,6,7,8]))

In [9]:
regr = RandomForestRegressor(max_depth=10, random_state=0)

In [10]:
my_pipeline = make_pipeline(column_transform, regr)

In [11]:
my_pipeline.fit(x_train, y_train)



In [12]:
my_pipeline.predict([[0.23, 'Ideal', 'E', 'SI2', 61.5, 55.0, 3.95, 3.98, 2.43]])



array([497.61779835])

In [13]:
joblib.dump(my_pipeline, 'model_artifacts/model.joblib')
!gsutil cp model_artifacts/model.joblib {BUCKET_NAME}/{MODEL_ARTIFACT_DIR}/


Copying file://model_artifacts/model.joblib [Content-Type=application/octet-stream]...
- [1 files][ 11.4 MiB/ 11.4 MiB]                                                
Operation completed over 1 objects/11.4 MiB.                                     


## Save a preprocessing artifact

In [14]:
clarity_dict={"Flawless": "FL",
              "Internally Flawless": "IF",
              "Very Very Slightly Included": "VVS1",
              "Very Slightly Included": "VS2",
              "Slightly Included": "S12",
              "Included": "I3"}

In [15]:
import json
with open("model_artifacts/preprocessor.json", "w") as f:
    json.dump(clarity_dict, f)

!gsutil cp model_artifacts/preprocessor.json {BUCKET_NAME}/{MODEL_ARTIFACT_DIR}/

Copying file://model_artifacts/preprocessor.json [Content-Type=application/json]...
/ [1 files][  165.0 B/  165.0 B]                                                
Operation completed over 1 objects/165.0 B.                                      


## Build a custom serving container using the CPR model server

In [16]:
%%writefile $USER_SRC_DIR/predictor.py

import joblib
import numpy as np
import json

from google.cloud import storage
from google.cloud.aiplatform.prediction.sklearn.predictor import SklearnPredictor


class CprPredictor(SklearnPredictor):

    def __init__(self):
        return

    def load(self, artifacts_uri: str) -> None:
        """Loads the sklearn pipeline and preprocessing artifact."""

        super().load(artifacts_uri)

        # open preprocessing artifact
        with open("preprocessor.json", "rb") as f:
            self._preprocessor = json.load(f)


    def preprocess(self, prediction_input: np.ndarray) -> np.ndarray:
        """Performs preprocessing by checking if clarity feature is in abbreviated form."""

        inputs = super().preprocess(prediction_input)

        for sample in inputs:
            if sample[3] not in self._preprocessor.values():
                sample[3] = self._preprocessor[sample[3]]
        return inputs

    def postprocess(self, prediction_results: np.ndarray) -> dict:
        """Performs postprocessing by rounding predictions and converting to str."""

        return {"predictions": [f"${value}" for value in np.round(prediction_results)]}


Overwriting src_dir/predictor.py


In [17]:
# Build the container image
from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=REGION)

import os

from google.cloud.aiplatform.prediction import LocalModel

from src_dir.predictor import CprPredictor  # Should be path of variable $USER_SRC_DIR

local_model = LocalModel.build_cpr_model(
    USER_SRC_DIR,
    f"{REGION}-docker.pkg.dev/{PROJECT_ID}/{REPOSITORY}/{IMAGE}",
    predictor=CprPredictor,
    requirements_path=os.path.join(USER_SRC_DIR, "requirements.txt"),
)


INFO:google.cloud.aiplatform.docker_utils.build:Running command: docker build -t europe-west1-docker.pkg.dev/project-4-vertexai/diamonds/sklearn-image --rm -f- src_dir
  self.stdin = io.open(p2cwrite, 'wb', bufsize)
  self.stdout = io.open(c2pread, 'rb', bufsize)
INFO:google.cloud.aiplatform.docker_utils.local_util:Sending build context to Docker daemon  11.82kB
INFO:google.cloud.aiplatform.docker_utils.local_util:

INFO:google.cloud.aiplatform.docker_utils.local_util:Step 1/14 : FROM python:3.10

INFO:google.cloud.aiplatform.docker_utils.local_util: ---> 22546fe66182

INFO:google.cloud.aiplatform.docker_utils.local_util:Step 2/14 : ENV PYTHONDONTWRITEBYTECODE=1

INFO:google.cloud.aiplatform.docker_utils.local_util: ---> Using cache

INFO:google.cloud.aiplatform.docker_utils.local_util: ---> 8ccd0fbe21f6

INFO:google.cloud.aiplatform.docker_utils.local_util:Step 3/14 : EXPOSE 8080

INFO:google.cloud.aiplatform.docker_utils.local_util: ---> Using cache

INFO:google.cloud.aiplatform.dock

In [18]:
# Create a test json file with samples
import json

sample = {"instances": [
  [0.23, 'Ideal', 'E', 'VS2', 61.5, 55.0, 3.95, 3.98, 2.43],
  [0.29, 'Premium', 'J', 'Internally Flawless', 52.5, 49.0, 4.00, 2.13, 3.11]]}

with open('instances.json', 'w') as fp:
    json.dump(sample, fp)


In [20]:
# Test the container locally by deploying a local model.
with local_model.deploy_to_local_endpoint(
    artifact_uri = 'model_artifacts/', # local path to artifacts
) as local_endpoint:
    predict_response = local_endpoint.predict(
        request_file='instances.json',
        headers={"Content-Type": "application/json"},
    )
    print("--- --- predict response start --- ---")    
    print(predict_response, predict_response.content)
    print("--- --- predict response end   --- ---")        
    health_check_response = local_endpoint.run_health_check()
    print("--- --- health check response start --- ---")    
    print(health_check_response, health_check_response.content)
    print("--- --- health check response end --- ---")    

    local_endpoint.print_container_logs()



INFO:google.cloud.aiplatform.prediction.local_endpoint:Got the project id from the global config: project-4-vertexai.
INFO:google.cloud.aiplatform.docker_utils.run:INFO:     Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit)
INFO:google.cloud.aiplatform.docker_utils.run:INFO:uvicorn.error:Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit)
INFO:google.cloud.aiplatform.docker_utils.run:INFO:     Started parent process [1]
INFO:google.cloud.aiplatform.docker_utils.run:INFO:uvicorn.error:Started parent process [1]
INFO:google.cloud.aiplatform.docker_utils.run:INFO:     Started server process [10]
INFO:google.cloud.aiplatform.docker_utils.run:03/02/2024 09:41:03 AM: Started server process [10]
INFO:google.cloud.aiplatform.docker_utils.run:INFO:     Waiting for application startup.
INFO:google.cloud.aiplatform.docker_utils.run:03/02/2024 09:41:03 AM: Waiting for application startup.
INFO:google.cloud.aiplatform.docker_utils.run:INFO:     Application startup complete.
IN

--- --- predict response start --- ---
<Response [200]> b'{"predictions": ["$479.0", "$586.0"]}'
--- --- predict response end   --- ---
--- --- health check response start --- ---
<Response [200]> b'{}'
--- --- health check response end --- ---


In [21]:
local_endpoint.print_container_logs()

INFO:google.cloud.aiplatform.docker_utils.run:03/02/2024 09:41:04 AM: Shutting down
INFO:google.cloud.aiplatform.docker_utils.run:INFO:     Shutting down
INFO:google.cloud.aiplatform.docker_utils.run:INFO:     Waiting for application shutdown.
INFO:google.cloud.aiplatform.docker_utils.run:INFO:     Application shutdown complete.
INFO:google.cloud.aiplatform.docker_utils.run:INFO:     Finished server process [9]
INFO:google.cloud.aiplatform.docker_utils.run:03/02/2024 09:41:04 AM: Waiting for application shutdown.
INFO:google.cloud.aiplatform.docker_utils.run:03/02/2024 09:41:04 AM: Application shutdown complete.
INFO:google.cloud.aiplatform.docker_utils.run:03/02/2024 09:41:04 AM: Finished server process [9]
INFO:google.cloud.aiplatform.docker_utils.run:INFO:     Shutting down
INFO:google.cloud.aiplatform.docker_utils.run:03/02/2024 09:41:05 AM: Shutting down
INFO:google.cloud.aiplatform.docker_utils.run:INFO:     Waiting for application shutdown.
INFO:google.cloud.aiplatform.docker_ut

In [22]:
predict_response.content

b'{"predictions": ["$479.0", "$586.0"]}'

## Deploy model to Vertex AI

In [23]:
# configure Docker to access Artifact Registry
!gcloud artifacts repositories create {REPOSITORY} --repository-format=docker \
--location=europe-west1 --description="Docker repository"

!gcloud auth configure-docker {REGION}-docker.pkg.dev --quiet


[1;31mERROR:[0m (gcloud.artifacts.repositories.create) ALREADY_EXISTS: the repository already exists

{
  "credHelpers": {
    "gcr.io": "gcloud",
    "us.gcr.io": "gcloud",
    "eu.gcr.io": "gcloud",
    "asia.gcr.io": "gcloud",
    "staging-k8s.gcr.io": "gcloud",
    "marketplace.gcr.io": "gcloud",
    "europe-west1-docker.pkg.dev": "gcloud"
  }
}
Adding credentials for: europe-west1-docker.pkg.dev
gcloud credential helpers already registered correctly.


In [24]:
# push the image
local_model.push_image()

  self.stdin = io.open(p2cwrite, 'wb', bufsize)
  self.stdout = io.open(c2pread, 'rb', bufsize)
INFO:google.cloud.aiplatform.docker_utils.local_util:Using default tag: latest

INFO:google.cloud.aiplatform.docker_utils.local_util:The push refers to repository [europe-west1-docker.pkg.dev/project-4-vertexai/diamonds/sklearn-image]

INFO:google.cloud.aiplatform.docker_utils.local_util:5c4c83e9180f: Preparing

INFO:google.cloud.aiplatform.docker_utils.local_util:fc5ed6eed84f: Preparing

INFO:google.cloud.aiplatform.docker_utils.local_util:d8da26d4fe1b: Preparing

INFO:google.cloud.aiplatform.docker_utils.local_util:6c89034236a7: Preparing

INFO:google.cloud.aiplatform.docker_utils.local_util:13e9fcf92c67: Preparing

INFO:google.cloud.aiplatform.docker_utils.local_util:5b8a506fb91c: Preparing

INFO:google.cloud.aiplatform.docker_utils.local_util:a6267a497621: Preparing

INFO:google.cloud.aiplatform.docker_utils.local_util:84f540ade319: Preparing

INFO:google.cloud.aiplatform.docker_utils.lo

In [25]:
# Upload the model
model = aiplatform.Model.upload(local_model = local_model,
                                display_name=MODEL_DISPLAY_NAME,
                                artifact_uri=f"{BUCKET_NAME}/{MODEL_ARTIFACT_DIR}",)


INFO:google.auth.compute_engine._metadata:Compute Engine Metadata server call to universe/universe_domain returned 404, reason: <!DOCTYPE html>
<html lang=en>
  <meta charset=utf-8>
  <meta name=viewport content="initial-scale=1, minimum-scale=1, width=device-width">
  <title>Error 404 (Not Found)!!1</title>
  <style>
    *{margin:0;padding:0}html,code{font:15px/22px arial,sans-serif}html{background:#fff;color:#222;padding:15px}body{margin:7% auto 0;max-width:390px;min-height:180px;padding:30px 0 15px}* > body{background:url(//www.google.com/images/errors/robot.png) 100% 5px no-repeat;padding-right:205px}p{margin:11px 0 22px;overflow:hidden}ins{color:#777;text-decoration:none}a img{border:0}@media screen and (max-width:772px){body{background:none;margin-top:0;max-width:none;padding-right:0}}#logo{background:url(//www.google.com/images/branding/googlelogo/1x/googlelogo_color_150x54dp.png) no-repeat;margin-left:-5px}@media only screen and (min-resolution:192dpi){#logo{background:url(//ww

In [26]:
# deploy the model
endpoint = model.deploy(machine_type="n1-standard-2")

INFO:google.cloud.aiplatform.models:Creating Endpoint
INFO:google.cloud.aiplatform.models:Create Endpoint backing LRO: projects/17510995775/locations/europe-west1/endpoints/88092871617413120/operations/2284602093832503296
INFO:google.cloud.aiplatform.models:Endpoint created. Resource name: projects/17510995775/locations/europe-west1/endpoints/88092871617413120
INFO:google.cloud.aiplatform.models:To use this Endpoint in another session:
INFO:google.cloud.aiplatform.models:endpoint = aiplatform.Endpoint('projects/17510995775/locations/europe-west1/endpoints/88092871617413120')
INFO:google.cloud.aiplatform.models:Deploying model to Endpoint : projects/17510995775/locations/europe-west1/endpoints/88092871617413120
INFO:google.cloud.aiplatform.models:Deploy Endpoint model backing LRO: projects/17510995775/locations/europe-west1/endpoints/88092871617413120/operations/1010083399286652928
INFO:google.cloud.aiplatform.models:Endpoint model deployed. Resource name: projects/17510995775/locations

In [27]:
# Test the model
endpoint.predict(instances=[[0.23, 'Ideal', 'E', 'VS2', 61.5, 55.0, 3.95, 3.98, 2.43]])

Prediction(predictions=['$479.0'], deployed_model_id='2190618588669280256', metadata=None, model_version_id='1', model_resource_name='projects/17510995775/locations/europe-west1/models/214629067788386304', explanations=None)

In [29]:
endpoint.predict(instances=[[0.29, 'Premium', 'J', 'Internally Flawless', 52.5, 49.0, 4.00, 2.13, 3.11]])

Prediction(predictions=['$586.0'], deployed_model_id='2190618588669280256', metadata=None, model_version_id='1', model_resource_name='projects/17510995775/locations/europe-west1/models/214629067788386304', explanations=None)