## 1 - Enviroment Variables

In [1]:
PROJECT_ID = "dt-tu-sandbox-dev"
BUCKET_NAME = f"{PROJECT_ID}-cpr-bucket-eu"
BUCKET_URI = f"gs://{BUCKET_NAME}"
REGION = "europe-west2"

MODEL_ARTIFACT_DIR = "sklearn-model-artifacts"
REPOSITORY = "diamonds"
IMAGE = "sklearn-image"
MODEL_DISPLAY_NAME = "diamonds-cpr"

## 2 - CREATE BUCKET

In [2]:
from google.cloud import storage

In [3]:
storage_client = storage.Client()

In [6]:
storage_client.create_bucket(BUCKET_NAME,location=REGION)

<Bucket: dt-tu-sandbox-dev-cpr-bucket-eu>

## 3 - Setup Directories

In [34]:
%%writefile requirements.txt
fastapi
uvicorn==0.17.6
joblib~=1.1.1
numpy>=1.17.3, <1.24.0
scikit-learn~=1.2.0
pandas
google-cloud-storage>=2.2.1,<3.0.0dev
google-cloud-aiplatform[prediction]>=1.18.2

Overwriting requirements.txt


In [35]:
!pip install -U --user -r requirements.txt

Collecting fastapi (from -r requirements.txt (line 1))
  Using cached fastapi-0.108.0-py3-none-any.whl.metadata (24 kB)
Collecting scikit-learn~=1.2.0 (from -r requirements.txt (line 5))
  Downloading scikit_learn-1.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.6/9.6 MB[0m [31m68.0 MB/s[0m eta [36m0:00:00[0m00:01[0m:00:01[0m
Collecting starlette<0.33.0,>=0.29.0 (from fastapi->-r requirements.txt (line 1))
  Using cached starlette-0.32.0.post1-py3-none-any.whl.metadata (5.8 kB)
Installing collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.0.2
    Uninstalling scikit-learn-1.0.2:
      Successfully uninstalled scikit-learn-1.0.2
Successfully installed scikit-learn-1.2.2


In [9]:
!mkdir scr_dir

mkdir: cannot create directory ‘scr_dir’: File exists


In [4]:
!cp requirements.txt scr_dir/requirements.txt

## Training the model!

In [5]:
import seaborn as sns
import numpy as np
import pandas as pd

from sklearn import preprocessing
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer

import joblib
import logging

# set logging to see the docker container logs
logging.basicConfig(level=logging.INFO)

In [16]:
!pip install --upgrade seaborn

Collecting seaborn
  Downloading seaborn-0.13.1-py3-none-any.whl.metadata (5.4 kB)
Downloading seaborn-0.13.1-py3-none-any.whl (294 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.8/294.8 kB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: seaborn
  Attempting uninstall: seaborn
    Found existing installation: seaborn 0.12.2
    Uninstalling seaborn-0.12.2:
      Successfully uninstalled seaborn-0.12.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
ydata-profiling 4.6.0 requires pandas!=1.4.0,<2.1,>1.1, but you have pandas 2.1.4 which is incompatible.
ydata-profiling 4.6.0 requires seaborn<0.13,>=0.10.1, but you have seaborn 0.13.1 which is incompatible.[0m[31m
[0mSuccessfully installed seaborn-0.13.1


In [6]:
data = sns.load_dataset('diamonds', cache=True, data_home=None)

label = 'price'

y_train = data['price']
x_train = data.drop(columns=['price'])

In [7]:
data.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [8]:
column_transform = make_column_transformer(
    (preprocessing.OneHotEncoder(sparse=False), [1,2,3]),
    (preprocessing.StandardScaler(), [0,4,5,6,7,8]))

In [9]:
regr = RandomForestRegressor(max_depth=10, random_state=0)

In [10]:
my_pipeline = make_pipeline(column_transform, regr)

In [11]:
my_pipeline.fit(x_train, y_train)



In [12]:
my_pipeline.predict([[0.23, 'Ideal', 'E', 'SI2', 61.5, 55.0, 3.95, 3.98, 2.43]])



array([497.61779835])

In [13]:
!mkdir model_artifacts

mkdir: cannot create directory ‘model_artifacts’: File exists


In [14]:
joblib.dump(my_pipeline, 'model_artifacts/model.joblib')

!gsutil cp model_artifacts/model.joblib $BUCKET_URI/$MODEL_ARTIFACT_DIR/

Copying file://model_artifacts/model.joblib [Content-Type=application/octet-stream]...
/ [1 files][ 10.2 MiB/ 10.2 MiB]                                                
Operation completed over 1 objects/10.2 MiB.                                     


In [15]:
clarity_dict={"Flawless": "FL",
              "Internally Flawless": "IF",
              "Very Very Slightly Included": "VVS1",
              "Very Slightly Included": "VS2",
              "Slightly Included": "S12",
              "Included": "I3"}

import json
with open("model_artifacts/preprocessor.json", "w") as f:
    json.dump(clarity_dict, f)

!gsutil cp model_artifacts/preprocessor.json $BUCKET_URI/$MODEL_ARTIFACT_DIR/

Copying file://model_artifacts/preprocessor.json [Content-Type=application/json]...
/ [1 files][  165.0 B/  165.0 B]                                                
Operation completed over 1 objects/165.0 B.                                      


In [16]:
%%writefile scr_dir/predictor.py

import joblib
import numpy as np
import json

from google.cloud import storage
from google.cloud.aiplatform.prediction.sklearn.predictor import SklearnPredictor


class CprPredictor(SklearnPredictor):

    def __init__(self):
        return

    def load(self, artifacts_uri: str) -> None:
        """Loads the sklearn pipeline and preprocessing artifact."""

        super().load(artifacts_uri)

        # open preprocessing artifact
        with open("preprocessor.json", "rb") as f:
            self._preprocessor = json.load(f)


    def preprocess(self, prediction_input: np.ndarray) -> np.ndarray:
        """Performs preprocessing by checking if clarity feature is in abbreviated form."""

        inputs = super().preprocess(prediction_input)

        for sample in inputs:
            if sample[3] not in self._preprocessor.values():
                sample[3] = self._preprocessor[sample[3]]
        return inputs

    def postprocess(self, prediction_results: np.ndarray) -> dict:
        """Performs postprocessing by rounding predictions and converting to str."""

        return {"predictions": [f"${value}" for value in np.round(prediction_results)]}

Overwriting scr_dir/predictor.py


In [17]:
from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=REGION)

import os

from google.cloud.aiplatform.prediction import LocalModel

from scr_dir.predictor import CprPredictor  # Should be path of variable $USER_SRC_DIR

local_model = LocalModel.build_cpr_model(
    "scr_dir",
    f"{REGION}-docker.pkg.dev/{PROJECT_ID}/{REPOSITORY}/{IMAGE}",
    predictor=CprPredictor,
    requirements_path=os.path.join("scr_dir", "requirements.txt"),
)

INFO:google.cloud.aiplatform.docker_utils.build:Running command: docker build -t europe-west2-docker.pkg.dev/dt-tu-sandbox-dev/diamonds/sklearn-image --rm -f- scr_dir
  self.stdin = io.open(p2cwrite, 'wb', bufsize)
  self.stdout = io.open(c2pread, 'rb', bufsize)
INFO:google.cloud.aiplatform.docker_utils.local_util:Sending build context to Docker daemon  8.239kB
INFO:google.cloud.aiplatform.docker_utils.local_util:

INFO:google.cloud.aiplatform.docker_utils.local_util:Step 1/14 : FROM python:3.10

INFO:google.cloud.aiplatform.docker_utils.local_util: ---> c9a36876f4e4

INFO:google.cloud.aiplatform.docker_utils.local_util:Step 2/14 : ENV PYTHONDONTWRITEBYTECODE=1

INFO:google.cloud.aiplatform.docker_utils.local_util: ---> Using cache

INFO:google.cloud.aiplatform.docker_utils.local_util: ---> df3b77699c55

INFO:google.cloud.aiplatform.docker_utils.local_util:Step 3/14 : EXPOSE 8080

INFO:google.cloud.aiplatform.docker_utils.local_util: ---> Using cache

INFO:google.cloud.aiplatform.docke

In [18]:
import json

sample = {"instances": [
  [0.23, 'Ideal', 'E', 'VS2', 61.5, 55.0, 3.95, 3.98, 2.43],
  [0.29, 'Premium', 'J', 'Internally Flawless', 52.5, 49.0, 4.00, 2.13, 3.11]]}

with open('instances.json', 'w') as fp:
    json.dump(sample, fp)

In [19]:
local_model

<google.cloud.aiplatform.prediction.local_model.LocalModel at 0x7f05a3f758a0>

In [20]:
with local_model.deploy_to_local_endpoint(
    artifact_uri = 'model_artifacts/', # local path to artifacts
) as local_endpoint:
    predict_response = local_endpoint.predict(
        request_file='instances.json',
        headers={"Content-Type": "application/json"},
    )

    health_check_response = local_endpoint.run_health_check()

INFO:google.cloud.aiplatform.prediction.local_endpoint:Got the project id from the global config: dt-tu-sandbox-dev.


In [21]:
predict_response.content

b'{"predictions": ["$479.0", "$586.0"]}'

In [22]:
!gcloud artifacts repositories create $REPOSITORY --repository-format=docker \
--location=$REGION --description="Docker repository"

!gcloud auth configure-docker $REGION-docker.pkg.dev --quiet

Create request issued for: [diamonds]
Waiting for operation [projects/dt-tu-sandbox-dev/locations/europe-west2/operat
ions/1d89d4a0-ce32-4281-bb84-b280d61f5147] to complete...done.                 
Created repository [diamonds].

{
  "credHelpers": {
    "gcr.io": "gcloud",
    "us.gcr.io": "gcloud",
    "eu.gcr.io": "gcloud",
    "asia.gcr.io": "gcloud",
    "staging-k8s.gcr.io": "gcloud",
    "marketplace.gcr.io": "gcloud"
  }
}
Adding credentials for: europe-west2-docker.pkg.dev
Docker configuration file updated.


In [23]:
local_model.push_image()

  self.stdin = io.open(p2cwrite, 'wb', bufsize)
  self.stdout = io.open(c2pread, 'rb', bufsize)
INFO:google.cloud.aiplatform.docker_utils.local_util:Using default tag: latest

INFO:google.cloud.aiplatform.docker_utils.local_util:The push refers to repository [europe-west2-docker.pkg.dev/dt-tu-sandbox-dev/diamonds/sklearn-image]

INFO:google.cloud.aiplatform.docker_utils.local_util:6b4411637a48: Preparing

INFO:google.cloud.aiplatform.docker_utils.local_util:119130c3cd60: Preparing

INFO:google.cloud.aiplatform.docker_utils.local_util:011033c8f486: Preparing

INFO:google.cloud.aiplatform.docker_utils.local_util:46dbf565fde4: Preparing

INFO:google.cloud.aiplatform.docker_utils.local_util:c5b80a51412c: Preparing

INFO:google.cloud.aiplatform.docker_utils.local_util:9162bff0c68b: Preparing

INFO:google.cloud.aiplatform.docker_utils.local_util:93cb76a64c1d: Preparing

INFO:google.cloud.aiplatform.docker_utils.local_util:a0814d1f5387: Preparing

INFO:google.cloud.aiplatform.docker_utils.loc

In [24]:
model = aiplatform.Model.upload(local_model = local_model,
                                display_name=MODEL_DISPLAY_NAME,
                                artifact_uri=f"{BUCKET_URI}/{MODEL_ARTIFACT_DIR}",)

INFO:google.cloud.aiplatform.models:Creating Model
INFO:google.cloud.aiplatform.models:Create Model backing LRO: projects/435046587974/locations/europe-west2/models/7949812116448346112/operations/2044001461884420096
INFO:google.cloud.aiplatform.models:Model created. Resource name: projects/435046587974/locations/europe-west2/models/7949812116448346112@1
INFO:google.cloud.aiplatform.models:To use this Model in another session:
INFO:google.cloud.aiplatform.models:model = aiplatform.Model('projects/435046587974/locations/europe-west2/models/7949812116448346112@1')


In [None]:
endpoint = model.deploy(machine_type="n1-standard-2")

INFO:google.cloud.aiplatform.models:Creating Endpoint
INFO:google.cloud.aiplatform.models:Create Endpoint backing LRO: projects/435046587974/locations/europe-west2/endpoints/1103197190752305152/operations/6254867113475833856
INFO:google.cloud.aiplatform.models:Endpoint created. Resource name: projects/435046587974/locations/europe-west2/endpoints/1103197190752305152
INFO:google.cloud.aiplatform.models:To use this Endpoint in another session:
INFO:google.cloud.aiplatform.models:endpoint = aiplatform.Endpoint('projects/435046587974/locations/europe-west2/endpoints/1103197190752305152')
INFO:google.cloud.aiplatform.models:Deploying model to Endpoint : projects/435046587974/locations/europe-west2/endpoints/1103197190752305152
INFO:google.cloud.aiplatform.models:Deploy Endpoint model backing LRO: projects/435046587974/locations/europe-west2/endpoints/1103197190752305152/operations/6682709078076030976


In [None]:
endpoint.predict(instances=[[0.23, 'Ideal', 'E', 'VS2', 61.5, 55.0, 3.95, 3.98, 2.43]])

In [None]:
endpoint.delete(force=True)