## 1 - Enviroment Variables

In [1]:
PROJECT_ID = "dt-tu-sandbox-dev"
BUCKET_NAME = f"{PROJECT_ID}-cpr-bucket"
BUCKET_URI = f"gs://{BUCKET_NAME}"
REGION = "us-central1"

## 2 - CREATE BUCKET

In [2]:
from google.cloud import storage

In [3]:
storage_client = storage.Client()

In [None]:
storage_client.create_bucket(BUCKET_NAME,location=REGION)

## 3 - Setup Directories

In [15]:
%%writefile requirements.txt
fastapi
uvicorn==0.17.6
joblib~=1.1.1
numpy>=1.17.3, <1.24.0
scikit-learn~=1.0.0
pandas
google-cloud-storage>=2.2.1,<3.0.0dev
google-cloud-aiplatform[prediction]>=1.18.2

Writing requirements.txt


In [16]:
!pip install -U --user -r requirements.txt

Collecting fastapi (from -r requirements.txt (line 1))
  Downloading fastapi-0.104.1-py3-none-any.whl.metadata (24 kB)
Collecting uvicorn==0.17.6 (from -r requirements.txt (line 2))
  Downloading uvicorn-0.17.6-py3-none-any.whl (53 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.6/53.6 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0mta [36m0:00:01[0m
[?25hCollecting joblib~=1.1.1 (from -r requirements.txt (line 3))
  Downloading joblib-1.1.1-py2.py3-none-any.whl (309 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m309.8/309.8 kB[0m [31m20.2 MB/s[0m eta [36m0:00:00[0m
Collecting scikit-learn~=1.0.0 (from -r requirements.txt (line 5))
  Downloading scikit_learn-1.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (26.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m26.5/26.5 MB[0m [31m55.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting pandas (from -r requirements.txt (line 6))
  Downloading pand

In [17]:
!mkdir scr_dir

In [18]:
!cp requirements.txt scr_dir/requirements.txt

## Training the model!

In [4]:
import seaborn as sns
import numpy as np
import pandas as pd

from sklearn import preprocessing
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer

import joblib
import logging

# set logging to see the docker container logs
logging.basicConfig(level=logging.INFO)

In [5]:
MODEL_ARTIFACT_DIR = "sklearn-model-artifacts"
REPOSITORY = "diamonds"
IMAGE = "sklearn-image"
MODEL_DISPLAY_NAME = "diamonds-cpr"

In [6]:
data = sns.load_dataset('diamonds', cache=True, data_home=None)

label = 'price'

y_train = data['price']
x_train = data.drop(columns=['price'])

In [7]:
column_transform = make_column_transformer(
    (preprocessing.OneHotEncoder(sparse=False), [1,2,3]),
    (preprocessing.StandardScaler(), [0,4,5,6,7,8]))

In [8]:
regr = RandomForestRegressor(max_depth=10, random_state=0)

In [9]:
my_pipeline = make_pipeline(column_transform, regr)

In [10]:
my_pipeline.fit(x_train, y_train)

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():


Pipeline(steps=[('columntransformer',
                 ColumnTransformer(transformers=[('onehotencoder',
                                                  OneHotEncoder(sparse=False),
                                                  [1, 2, 3]),
                                                 ('standardscaler',
                                                  StandardScaler(),
                                                  [0, 4, 5, 6, 7, 8])])),
                ('randomforestregressor',
                 RandomForestRegressor(max_depth=10, random_state=0))])

In [11]:
my_pipeline.predict([[0.23, 'Ideal', 'E', 'SI2', 61.5, 55.0, 3.95, 3.98, 2.43]])



array([497.61779835])

In [12]:
!mkdir model_artifacts

mkdir: cannot create directory ‘model_artifacts’: File exists


In [13]:
joblib.dump(my_pipeline, 'model_artifacts/model.joblib')

!gsutil cp model_artifacts/model.joblib $BUCKET_URI/$MODEL_ARTIFACT_DIR/

Copying file://model_artifacts/model.joblib [Content-Type=application/octet-stream]...
| [1 files][ 10.2 MiB/ 10.2 MiB]                                                
Operation completed over 1 objects/10.2 MiB.                                     


In [14]:
clarity_dict={"Flawless": "FL",
              "Internally Flawless": "IF",
              "Very Very Slightly Included": "VVS1",
              "Very Slightly Included": "VS2",
              "Slightly Included": "S12",
              "Included": "I3"}

import json
with open("model_artifacts/preprocessor.json", "w") as f:
    json.dump(clarity_dict, f)

!gsutil cp model_artifacts/preprocessor.json $BUCKET_URI/$MODEL_ARTIFACT_DIR/

Copying file://model_artifacts/preprocessor.json [Content-Type=application/json]...
- [1 files][  165.0 B/  165.0 B]                                                
Operation completed over 1 objects/165.0 B.                                      


In [15]:
%%writefile scr_dir/predictor.py

import joblib
import numpy as np
import json

from google.cloud import storage
from google.cloud.aiplatform.prediction.sklearn.predictor import SklearnPredictor


class CprPredictor(SklearnPredictor):

    def __init__(self):
        return

    def load(self, artifacts_uri: str) -> None:
        """Loads the sklearn pipeline and preprocessing artifact."""

        super().load(artifacts_uri)

        # open preprocessing artifact
        with open("preprocessor.json", "rb") as f:
            self._preprocessor = json.load(f)


    def preprocess(self, prediction_input: np.ndarray) -> np.ndarray:
        """Performs preprocessing by checking if clarity feature is in abbreviated form."""

        inputs = super().preprocess(prediction_input)

        for sample in inputs:
            if sample[3] not in self._preprocessor.values():
                sample[3] = self._preprocessor[sample[3]]
        return inputs

    def postprocess(self, prediction_results: np.ndarray) -> dict:
        """Performs postprocessing by rounding predictions and converting to str."""

        return {"predictions": [f"${value}" for value in np.round(prediction_results)]}

Overwriting scr_dir/predictor.py


In [16]:
from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=REGION)

import os

from google.cloud.aiplatform.prediction import LocalModel

from scr_dir.predictor import CprPredictor  # Should be path of variable $USER_SRC_DIR

local_model = LocalModel.build_cpr_model(
    "scr_dir",
    f"{REGION}-docker.pkg.dev/{PROJECT_ID}/{REPOSITORY}/{IMAGE}",
    predictor=CprPredictor,
    requirements_path=os.path.join("scr_dir", "requirements.txt"),
)

INFO:google.cloud.aiplatform.docker_utils.build:Running command: docker build -t us-central1-docker.pkg.dev/dt-tu-sandbox-dev/diamonds/sklearn-image --rm -f- scr_dir
  self.stdin = io.open(p2cwrite, 'wb', bufsize)
  self.stdout = io.open(c2pread, 'rb', bufsize)
INFO:google.cloud.aiplatform.docker_utils.local_util:Sending build context to Docker daemon  8.455kB
INFO:google.cloud.aiplatform.docker_utils.local_util:

INFO:google.cloud.aiplatform.docker_utils.local_util:Step 1/14 : FROM python:3.10

INFO:google.cloud.aiplatform.docker_utils.local_util: ---> c2fedeb2859f

INFO:google.cloud.aiplatform.docker_utils.local_util:Step 2/14 : ENV PYTHONDONTWRITEBYTECODE=1

INFO:google.cloud.aiplatform.docker_utils.local_util: ---> Using cache

INFO:google.cloud.aiplatform.docker_utils.local_util: ---> 8c6c2f52aaab

INFO:google.cloud.aiplatform.docker_utils.local_util:Step 3/14 : EXPOSE 8080

INFO:google.cloud.aiplatform.docker_utils.local_util: ---> Using cache

INFO:google.cloud.aiplatform.docker

In [17]:
import json

sample = {"instances": [
  [0.23, 'Ideal', 'E', 'VS2', 61.5, 55.0, 3.95, 3.98, 2.43],
  [0.29, 'Premium', 'J', 'Internally Flawless', 52.5, 49.0, 4.00, 2.13, 3.11]]}

with open('instances.json', 'w') as fp:
    json.dump(sample, fp)

In [18]:
local_model

<google.cloud.aiplatform.prediction.local_model.LocalModel at 0x7f4dc0ff4f10>

In [19]:
with local_model.deploy_to_local_endpoint(
    artifact_uri = 'model_artifacts/', # local path to artifacts
) as local_endpoint:
    predict_response = local_endpoint.predict(
        request_file='instances.json',
        headers={"Content-Type": "application/json"},
    )

    health_check_response = local_endpoint.run_health_check()

INFO:google.cloud.aiplatform.prediction.local_endpoint:Got the project id from the global config: dt-tu-sandbox-dev.


In [20]:
predict_response.content

b'{"predictions": ["$479.0", "$586.0"]}'

In [21]:
!gcloud artifacts repositories create $REPOSITORY --repository-format=docker \
--location=$REGION --description="Docker repository"

!gcloud auth configure-docker $REGION-docker.pkg.dev --quiet

Create request issued for: [diamonds]
Waiting for operation [projects/dt-tu-sandbox-dev/locations/us-central1/operati
ons/6d82573f-c53d-4963-919f-70c064b4e877] to complete...done.                  
Created repository [diamonds].

{
  "credHelpers": {
    "gcr.io": "gcloud",
    "us.gcr.io": "gcloud",
    "eu.gcr.io": "gcloud",
    "asia.gcr.io": "gcloud",
    "staging-k8s.gcr.io": "gcloud",
    "marketplace.gcr.io": "gcloud"
  }
}
Adding credentials for: us-central1-docker.pkg.dev
Docker configuration file updated.


In [22]:
local_model.push_image()

  self.stdin = io.open(p2cwrite, 'wb', bufsize)
  self.stdout = io.open(c2pread, 'rb', bufsize)
INFO:google.cloud.aiplatform.docker_utils.local_util:Using default tag: latest

INFO:google.cloud.aiplatform.docker_utils.local_util:The push refers to repository [us-central1-docker.pkg.dev/dt-tu-sandbox-dev/diamonds/sklearn-image]

INFO:google.cloud.aiplatform.docker_utils.local_util:021c5079dd6b: Preparing

INFO:google.cloud.aiplatform.docker_utils.local_util:05d3d3f62945: Preparing

INFO:google.cloud.aiplatform.docker_utils.local_util:55a6d7583fcf: Preparing

INFO:google.cloud.aiplatform.docker_utils.local_util:a409087c67a3: Preparing

INFO:google.cloud.aiplatform.docker_utils.local_util:0b582dde5b32: Preparing

INFO:google.cloud.aiplatform.docker_utils.local_util:0dec43bdcdbe: Preparing

INFO:google.cloud.aiplatform.docker_utils.local_util:ab93482a9cdb: Preparing

INFO:google.cloud.aiplatform.docker_utils.local_util:2b4cf8a5bd5e: Preparing

INFO:google.cloud.aiplatform.docker_utils.loca

In [24]:
model = aiplatform.Model.upload(local_model = local_model,
                                display_name=MODEL_DISPLAY_NAME,
                                artifact_uri=f"{BUCKET_URI}/{MODEL_ARTIFACT_DIR}",)

INFO:google.cloud.aiplatform.models:Creating Model
INFO:google.cloud.aiplatform.models:Create Model backing LRO: projects/435046587974/locations/us-central1/models/3104965861258035200/operations/7247586406661357568
INFO:google.cloud.aiplatform.models:Model created. Resource name: projects/435046587974/locations/us-central1/models/3104965861258035200@1
INFO:google.cloud.aiplatform.models:To use this Model in another session:
INFO:google.cloud.aiplatform.models:model = aiplatform.Model('projects/435046587974/locations/us-central1/models/3104965861258035200@1')


In [25]:
endpoint = model.deploy(machine_type="n1-standard-2")

INFO:google.cloud.aiplatform.models:Creating Endpoint
INFO:google.cloud.aiplatform.models:Create Endpoint backing LRO: projects/435046587974/locations/us-central1/endpoints/2902413829189140480/operations/1573050876174532608
INFO:google.cloud.aiplatform.models:Endpoint created. Resource name: projects/435046587974/locations/us-central1/endpoints/2902413829189140480
INFO:google.cloud.aiplatform.models:To use this Endpoint in another session:
INFO:google.cloud.aiplatform.models:endpoint = aiplatform.Endpoint('projects/435046587974/locations/us-central1/endpoints/2902413829189140480')
INFO:google.cloud.aiplatform.models:Deploying model to Endpoint : projects/435046587974/locations/us-central1/endpoints/2902413829189140480
INFO:google.cloud.aiplatform.models:Deploy Endpoint model backing LRO: projects/435046587974/locations/us-central1/endpoints/2902413829189140480/operations/2671929185252933632
INFO:google.cloud.aiplatform.models:Endpoint model deployed. Resource name: projects/43504658797

In [26]:
endpoint.predict(instances=[[0.23, 'Ideal', 'E', 'VS2', 61.5, 55.0, 3.95, 3.98, 2.43]])

Prediction(predictions=['$479.0'], deployed_model_id='6016534224260038656', model_version_id='1', model_resource_name='projects/435046587974/locations/us-central1/models/3104965861258035200', explanations=None)

In [27]:
endpoint.delete(force=True)

INFO:google.cloud.aiplatform.models:Undeploying Endpoint model: projects/435046587974/locations/us-central1/endpoints/2902413829189140480
INFO:google.cloud.aiplatform.models:Undeploy Endpoint model backing LRO: projects/435046587974/locations/us-central1/endpoints/2902413829189140480/operations/5238980972854116352
INFO:google.cloud.aiplatform.models:Endpoint model undeployed. Resource name: projects/435046587974/locations/us-central1/endpoints/2902413829189140480
INFO:google.cloud.aiplatform.base:Deleting Endpoint : projects/435046587974/locations/us-central1/endpoints/2902413829189140480
INFO:google.cloud.aiplatform.base:Delete Endpoint  backing LRO: projects/435046587974/locations/us-central1/operations/8210230827011801088
INFO:google.cloud.aiplatform.base:Endpoint deleted. . Resource name: projects/435046587974/locations/us-central1/endpoints/2902413829189140480
