## 1 - Enviroment Variables

In [None]:
PROJECT_ID = "dt-tu-sandbox-dev"
BUCKET_NAME = f"{PROJECT_ID}-cpr-bucket-eu"
BUCKET_URI = f"gs://{BUCKET_NAME}"
REGION = "europe-west2"

MODEL_ARTIFACT_DIR = "sklearn-model-artifacts"
REPOSITORY = "diamonds"
IMAGE = "sklearn-image"
MODEL_DISPLAY_NAME = "diamonds-cpr"

## 2 - CREATE BUCKET

In [None]:
from google.cloud import storage

In [None]:
storage_client = storage.Client()

In [None]:
storage_client.create_bucket(BUCKET_NAME,location=REGION)

## 3 - Setup Directories

In [None]:
%%writefile requirements.txt
fastapi
uvicorn==0.17.6
joblib~=1.1.1
numpy>=1.17.3, <1.24.0
scikit-learn~=1.2.0
pandas
google-cloud-storage>=2.2.1,<3.0.0dev
google-cloud-aiplatform[prediction]>=1.18.2

In [None]:
!pip install -U --user -r requirements.txt

In [None]:
!mkdir scr_dir

In [None]:
!cp requirements.txt scr_dir/requirements.txt

## Training the model!

In [None]:
import seaborn as sns
import numpy as np
import pandas as pd

from sklearn import preprocessing
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer

import joblib
import logging

# set logging to see the docker container logs
logging.basicConfig(level=logging.INFO)

In [None]:
!pip install --upgrade seaborn

In [None]:
data = sns.load_dataset('diamonds', cache=True, data_home=None)

label = 'price'

y_train = data['price']
x_train = data.drop(columns=['price'])

In [None]:
data.head()

In [None]:
column_transform = make_column_transformer(
    (preprocessing.OneHotEncoder(sparse=False), [1,2,3]),
    (preprocessing.StandardScaler(), [0,4,5,6,7,8]))

In [None]:
regr = RandomForestRegressor(max_depth=10, random_state=0)

In [None]:
my_pipeline = make_pipeline(column_transform, regr)

In [None]:
my_pipeline.fit(x_train, y_train)

In [None]:
my_pipeline.predict([[0.23, 'Ideal', 'E', 'SI2', 61.5, 55.0, 3.95, 3.98, 2.43]])

In [None]:
!mkdir model_artifacts

In [None]:
joblib.dump(my_pipeline, 'model_artifacts/model.joblib')

!gsutil cp model_artifacts/model.joblib $BUCKET_URI/$MODEL_ARTIFACT_DIR/

In [None]:
clarity_dict={"Flawless": "FL",
              "Internally Flawless": "IF",
              "Very Very Slightly Included": "VVS1",
              "Very Slightly Included": "VS2",
              "Slightly Included": "S12",
              "Included": "I3"}

import json
with open("model_artifacts/preprocessor.json", "w") as f:
    json.dump(clarity_dict, f)

!gsutil cp model_artifacts/preprocessor.json $BUCKET_URI/$MODEL_ARTIFACT_DIR/

In [None]:
%%writefile scr_dir/predictor.py

import joblib
import numpy as np
import json

from google.cloud import storage
from google.cloud.aiplatform.prediction.sklearn.predictor import SklearnPredictor


class CprPredictor(SklearnPredictor):

    def __init__(self):
        return

    def load(self, artifacts_uri: str) -> None:
        """Loads the sklearn pipeline and preprocessing artifact."""

        super().load(artifacts_uri)

        # open preprocessing artifact
        with open("preprocessor.json", "rb") as f:
            self._preprocessor = json.load(f)


    def preprocess(self, prediction_input: np.ndarray) -> np.ndarray:
        """Performs preprocessing by checking if clarity feature is in abbreviated form."""

        inputs = super().preprocess(prediction_input)

        for sample in inputs:
            if sample[3] not in self._preprocessor.values():
                sample[3] = self._preprocessor[sample[3]]
        return inputs

    def postprocess(self, prediction_results: np.ndarray) -> dict:
        """Performs postprocessing by rounding predictions and converting to str."""

        return {"predictions": [f"${value}" for value in np.round(prediction_results)]}

In [None]:
from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=REGION)

import os

from google.cloud.aiplatform.prediction import LocalModel

from scr_dir.predictor import CprPredictor  # Should be path of variable $USER_SRC_DIR

local_model = LocalModel.build_cpr_model(
    "scr_dir",
    f"{REGION}-docker.pkg.dev/{PROJECT_ID}/{REPOSITORY}/{IMAGE}",
    predictor=CprPredictor,
    requirements_path=os.path.join("scr_dir", "requirements.txt"),
)

In [None]:
import json

sample = {"instances": [
  [0.23, 'Ideal', 'E', 'VS2', 61.5, 55.0, 3.95, 3.98, 2.43],
  [0.29, 'Premium', 'J', 'Internally Flawless', 52.5, 49.0, 4.00, 2.13, 3.11]]}

with open('instances.json', 'w') as fp:
    json.dump(sample, fp)

In [None]:
local_model

In [None]:
with local_model.deploy_to_local_endpoint(
    artifact_uri = 'model_artifacts/', # local path to artifacts
) as local_endpoint:
    predict_response = local_endpoint.predict(
        request_file='instances.json',
        headers={"Content-Type": "application/json"},
    )

    health_check_response = local_endpoint.run_health_check()

In [None]:
predict_response.content

In [None]:
!gcloud artifacts repositories create $REPOSITORY --repository-format=docker \
--location=$REGION --description="Docker repository"

!gcloud auth configure-docker $REGION-docker.pkg.dev --quiet

In [None]:
local_model.push_image()

In [None]:
model = aiplatform.Model.upload(local_model = local_model,
                                display_name=MODEL_DISPLAY_NAME,
                                artifact_uri=f"{BUCKET_URI}/{MODEL_ARTIFACT_DIR}",)

In [None]:
endpoint = model.deploy(machine_type="n1-standard-2")

In [None]:
endpoint.predict(instances=[[0.23, 'Ideal', 'E', 'VS2', 61.5, 55.0, 3.95, 3.98, 2.43]])

In [None]:
endpoint.delete(force=True)