Merge pull request #473 from GoogleCloudPlatform/master

Rebase on the current master
GoogleCloudPlatform · May 16, 2024 · 648b79a · 648b79a
2 parents 6cf16a7 + 6216a5e
commit 648b79a
Show file tree

Hide file tree

Showing 10 changed files with 1,718 additions and 38 deletions.
diff --git a/notebooks/kubeflow_pipelines/pipelines/challenge_labs/challenge_lab_1.ipynb b/notebooks/kubeflow_pipelines/pipelines/challenge_labs/challenge_lab_1.ipynb
diff --git a/notebooks/kubeflow_pipelines/pipelines/challenge_labs/challenge_lab_2.ipynb b/notebooks/kubeflow_pipelines/pipelines/challenge_labs/challenge_lab_2.ipynb
diff --git a/notebooks/kubeflow_pipelines/pipelines/challenge_labs/challenge_lab_3.ipynb b/notebooks/kubeflow_pipelines/pipelines/challenge_labs/challenge_lab_3.ipynb
diff --git a/notebooks/kubeflow_pipelines/pipelines/challenge_labs/pipeline_vertex/__init__.py b/notebooks/kubeflow_pipelines/pipelines/challenge_labs/pipeline_vertex/__init__.py
diff --git a/notebooks/kubeflow_pipelines/pipelines/challenge_labs/pipeline_vertex/extract_bq.py b/notebooks/kubeflow_pipelines/pipelines/challenge_labs/pipeline_vertex/extract_bq.py
@@ -0,0 +1,27 @@
+# Copyright 2021 Google LLC
+
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+
+# https://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS"
+# BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied. See the License for the specific language governing
+# permissions and limitations under the License.
+"""Extract BQ dataset function."""
+
+# TODO 3: Import necessary modules
+
+
+# TODO 3: add component decorator.
+# Use "gcr.io/ml-pipeline/google-cloud-pipeline-components:2.8.0" for base image
+def extract_bq_op(
+    # TODO 3: Add inputs
+):
+    # pylint: disable=import-outside-toplevel
+
+    # TODO 3: Define function
+    pass
diff --git a/notebooks/kubeflow_pipelines/pipelines/challenge_labs/pipeline_vertex/pipeline.py b/notebooks/kubeflow_pipelines/pipelines/challenge_labs/pipeline_vertex/pipeline.py
@@ -0,0 +1,158 @@
+# Copyright 2021 Google LLC
+
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+
+# https://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS"
+# BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied. See the License for the specific language governing
+# permissions and limitations under the License.
+
+"""Kubeflow Covertype Pipeline."""
+import os
+
+from google.cloud.aiplatform import hyperparameter_tuning as hpt
+from google_cloud_pipeline_components.types import artifact_types
+
+# TODO 2: Import a predefined componet for Batch Prediction
+# TODO 3: Import a predefined componet for BigQuery query job
+from google_cloud_pipeline_components.v1.custom_job import CustomTrainingJobOp
+from google_cloud_pipeline_components.v1.endpoint import (
+    EndpointCreateOp,
+    ModelDeployOp,
+)
+from google_cloud_pipeline_components.v1.hyperparameter_tuning_job import (
+    HyperparameterTuningJobRunOp,
+    serialize_metrics,
+    serialize_parameters,
+)
+from google_cloud_pipeline_components.v1.model import ModelUploadOp
+from kfp import dsl
+from retrieve_best_hptune_component import retrieve_best_hptune_result
+
+# TODO 3: Import extract bq_op
+
+PIPELINE_ROOT = os.getenv("PIPELINE_ROOT")
+PROJECT_ID = os.getenv("PROJECT_ID")
+REGION = os.getenv("REGION")
+
+TRAINING_CONTAINER_IMAGE_URI = os.getenv("TRAINING_CONTAINER_IMAGE_URI")
+SERVING_CONTAINER_IMAGE_URI = os.getenv("SERVING_CONTAINER_IMAGE_URI")
+SERVING_MACHINE_TYPE = os.getenv("SERVING_MACHINE_TYPE", "n1-standard-16")
+
+TRAINING_FILE_PATH = os.getenv("TRAINING_FILE_PATH")
+VALIDATION_FILE_PATH = os.getenv("VALIDATION_FILE_PATH")
+
+MAX_TRIAL_COUNT = int(os.getenv("MAX_TRIAL_COUNT", "5"))
+PARALLEL_TRIAL_COUNT = int(os.getenv("PARALLEL_TRIAL_COUNT", "5"))
+THRESHOLD = float(os.getenv("THRESHOLD", "0.6"))
+
+PIPELINE_NAME = os.getenv("PIPELINE_NAME", "covertype")
+BASE_OUTPUT_DIR = os.getenv("BASE_OUTPUT_DIR", PIPELINE_ROOT)
+MODEL_DISPLAY_NAME = os.getenv("MODEL_DISPLAY_NAME", PIPELINE_NAME)
+TIMESTAMP = os.getenv("TIMESTAMP")
+
+
+@dsl.pipeline(
+    name=f"{PIPELINE_NAME}-kfp-pipeline",
+    description="Kubeflow pipeline that tunes, trains, and deploys on Vertex",
+    pipeline_root=PIPELINE_ROOT,
+)
+def create_pipeline():
+
+    # TODO 3: Insert Data tasks here
+
+    worker_pool_specs = [
+        {
+            "machine_spec": {
+                "machine_type": "n1-standard-4",
+                # "accelerator_type": "NVIDIA_TESLA_T4",
+                # "accelerator_count": 1,
+            },
+            "replica_count": 1,
+            "container_spec": {
+                "image_uri": TRAINING_CONTAINER_IMAGE_URI,
+                "args": [
+                    f"--training_dataset_path={TRAINING_FILE_PATH}",
+                    f"--validation_dataset_path={VALIDATION_FILE_PATH}",
+                    "--hptune",
+                ],
+            },
+        }
+    ]
+
+    metric_spec = serialize_metrics({"accuracy": "maximize"})
+
+    parameter_spec = serialize_parameters(
+        {
+            "alpha": hpt.DoubleParameterSpec(
+                min=1.0e-4, max=1.0e-1, scale="log"
+            ),
+            "max_iter": hpt.DiscreteParameterSpec(
+                values=[1, 2], scale="linear"
+            ),
+        }
+    )
+
+    hp_tuning_task = HyperparameterTuningJobRunOp(
+        display_name=f"{PIPELINE_NAME}-kfp-tuning-job",
+        project=PROJECT_ID,
+        location=REGION,
+        worker_pool_specs=worker_pool_specs,
+        study_spec_metrics=metric_spec,
+        study_spec_parameters=parameter_spec,
+        max_trial_count=MAX_TRIAL_COUNT,
+        parallel_trial_count=PARALLEL_TRIAL_COUNT,
+        base_output_directory=PIPELINE_ROOT,
+    )  # TODO 3: Define dependencies for preceding tasks.
+
+    best_retrieval_task = retrieve_best_hptune_result(
+        project=PROJECT_ID,
+        location=REGION,
+        gcp_resources=hp_tuning_task.outputs["gcp_resources"],
+        container_uri=TRAINING_CONTAINER_IMAGE_URI,
+        training_file_path=TRAINING_FILE_PATH,
+        validation_file_path=VALIDATION_FILE_PATH,
+    )
+
+    training_task = CustomTrainingJobOp(
+        project=PROJECT_ID,
+        location=REGION,
+        display_name=f"{PIPELINE_NAME}-kfp-training-job",
+        worker_pool_specs=best_retrieval_task.outputs["best_worker_pool_spec"],
+        base_output_directory=BASE_OUTPUT_DIR,
+    )
+
+    importer_spec = dsl.importer(
+        artifact_uri=f"{BASE_OUTPUT_DIR}/model",
+        artifact_class=artifact_types.UnmanagedContainerModel,
+        metadata={"containerSpec": {"imageUri": SERVING_CONTAINER_IMAGE_URI}},
+    )
+    importer_spec.after(training_task)
+
+    model_upload_task = ModelUploadOp(
+        project=PROJECT_ID,
+        display_name=f"{PIPELINE_NAME}-kfp-model-upload-job",
+        unmanaged_container_model=importer_spec.output,
+    )
+
+    endpoint_create_task = EndpointCreateOp(
+        project=PROJECT_ID,
+        display_name=f"{PIPELINE_NAME}-kfp-create-endpoint-job",
+    )
+    endpoint_create_task.after(model_upload_task)
+
+    model_deploy_op = ModelDeployOp(  # pylint: disable=unused-variable
+        model=model_upload_task.outputs["model"],
+        endpoint=endpoint_create_task.outputs["endpoint"],
+        deployed_model_display_name=MODEL_DISPLAY_NAME,
+        dedicated_resources_machine_type=SERVING_MACHINE_TYPE,
+        dedicated_resources_min_replica_count=1,
+        dedicated_resources_max_replica_count=1,
+    )
+
+    # TODO 2: Add Batch Prediction task
diff --git a/...flow_pipelines/pipelines/challenge_labs/pipeline_vertex/retrieve_best_hptune_component.py b/...flow_pipelines/pipelines/challenge_labs/pipeline_vertex/retrieve_best_hptune_component.py
@@ -0,0 +1,94 @@
+# Copyright 2021 Google LLC
+
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+
+# https://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS"
+# BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied. See the License for the specific language governing
+# permissions and limitations under the License.
+"""Lightweight component tuning function."""
+from typing import Dict, List, NamedTuple
+
+from kfp.dsl import component
+
+# TODO 1: Import output and artifact object
+
+
+@component(
+    base_image="python:3.8",
+    packages_to_install=["google-cloud-aiplatform"],
+)
+def retrieve_best_hptune_result(
+    project: str,
+    location: str,
+    gcp_resources: str,
+    container_uri: str,
+    training_file_path: str,
+    validation_file_path: str,
+    # TODO 1: Add metrics output object.
+    # Use `metrics_artifact` for the variable name.
+) -> NamedTuple(
+    "Outputs",
+    [
+        ("best_parameters", Dict),
+        ("best_metrics", Dict),
+        ("best_worker_pool_spec", List),
+    ],
+):
+
+    # pylint: disable=import-outside-toplevel
+    import json
+
+    from google.cloud import aiplatform
+
+    aiplatform.init(project=project, location=location)
+
+    # Retrieve the hyperparameter tuning job result
+    gcp_resources = json.loads(gcp_resources)
+    job_id = gcp_resources["resources"][0]["resourceUri"].split("/")[-1]
+    hp_job = aiplatform.HyperparameterTuningJob.get(job_id)
+
+    # Retrieve the best trial
+    metrics = [
+        trial.final_measurement.metrics[0].value for trial in hp_job.trials
+    ]
+    goal = hp_job.to_dict()["studySpec"]["metrics"][0]["goal"]
+    goal_f = min if goal == "MINIMIZE" else max  # MINIMIZE or MAXIMIZE
+    best_result = goal_f(metrics)
+    best_trial = hp_job.trials[metrics.index(best_result)]
+
+    best_parameters = {
+        param.parameter_id: param.value for param in best_trial.parameters
+    }
+
+    best_metrics = {
+        m.metric_id: m.value for m in best_trial.final_measurement.metrics
+    }
+
+    # Construct worker_pool_spec
+    best_worker_pool_spec = [
+        {
+            "machine_spec": {"machine_type": "n1-standard-4"},
+            "replica_count": 1,
+            "container_spec": {
+                "image_uri": container_uri,
+                "args": [
+                    f"--training_dataset_path={training_file_path}",
+                    f"--validation_dataset_path={validation_file_path}",
+                    "--nohptune",
+                ],
+            },
+        }
+    ]
+
+    for k, v in best_parameters.items():
+        best_worker_pool_spec[0]["container_spec"]["args"].append(f"--{k}={v}")
+
+    # TODO 1: Add metrics reporting for Vertex AI Experiments
+
+    return best_parameters, best_metrics, best_worker_pool_spec
diff --git a/notebooks/kubeflow_pipelines/pipelines/challenge_labs/trainer_image_vertex/Dockerfile b/notebooks/kubeflow_pipelines/pipelines/challenge_labs/trainer_image_vertex/Dockerfile
@@ -0,0 +1,6 @@
+FROM us-docker.pkg.dev/vertex-ai/training/sklearn-cpu.1-0
+RUN pip install -U fire cloudml-hypertune scikit-learn==1.2.2
+WORKDIR /app
+COPY train.py .
+
+ENTRYPOINT ["python", "train.py"]
diff --git a/notebooks/kubeflow_pipelines/pipelines/challenge_labs/trainer_image_vertex/train.py b/notebooks/kubeflow_pipelines/pipelines/challenge_labs/trainer_image_vertex/train.py
@@ -0,0 +1,100 @@
+# Copyright 2021 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#            http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Covertype Classifier trainer script."""
+import os
+import pickle
+import subprocess
+import sys
+
+import fire
+import hypertune
+import pandas as pd
+from sklearn.compose import ColumnTransformer
+from sklearn.linear_model import SGDClassifier
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import OneHotEncoder, StandardScaler
+
+AIP_MODEL_DIR = os.environ["AIP_MODEL_DIR"]
+MODEL_FILENAME = "model.pkl"
+
+
+def train_evaluate(
+    training_dataset_path, validation_dataset_path, alpha, max_iter, hptune
+):
+    """Trains the Covertype Classifier model."""
+
+    df_train = pd.read_csv(training_dataset_path)
+    df_validation = pd.read_csv(validation_dataset_path)
+
+    if not hptune:
+        df_train = pd.concat([df_train, df_validation])
+
+    numeric_feature_indexes = slice(0, 10)
+    categorical_feature_indexes = slice(10, 12)
+
+    preprocessor = ColumnTransformer(
+        transformers=[
+            ("num", StandardScaler(), numeric_feature_indexes),
+            ("cat", OneHotEncoder(), categorical_feature_indexes),
+        ]
+    )
+
+    pipeline = Pipeline(
+        [
+            ("preprocessor", preprocessor),
+            ("classifier", SGDClassifier(loss="log_loss")),
+        ]
+    )
+
+    num_features_type_map = {
+        feature: "float64"
+        for feature in df_train.columns[numeric_feature_indexes]
+    }
+    df_train = df_train.astype(num_features_type_map)
+    df_validation = df_validation.astype(num_features_type_map)
+
+    print(f"Starting training: alpha={alpha}, max_iter={max_iter}")
+    # pylint: disable-next=invalid-name
+    X_train = df_train.drop("Cover_Type", axis=1)
+    y_train = df_train["Cover_Type"]
+
+    pipeline.set_params(
+        classifier__alpha=alpha, classifier__max_iter=int(max_iter)
+    )
+    pipeline.fit(X_train, y_train)
+
+    if hptune:
+        # pylint: disable-next=invalid-name
+        X_validation = df_validation.drop("Cover_Type", axis=1)
+        y_validation = df_validation["Cover_Type"]
+        accuracy = pipeline.score(X_validation, y_validation)
+        print(f"Model accuracy: {accuracy}")
+        # Log it with hypertune
+        hpt = hypertune.HyperTune()
+        hpt.report_hyperparameter_tuning_metric(
+            hyperparameter_metric_tag="accuracy", metric_value=accuracy
+        )
+
+    # Save the model
+    if not hptune:
+        with open(MODEL_FILENAME, "wb") as model_file:
+            pickle.dump(pipeline, model_file)
+        subprocess.check_call(
+            ["gsutil", "cp", MODEL_FILENAME, AIP_MODEL_DIR], stderr=sys.stdout
+        )
+        print(f"Saved model in: {AIP_MODEL_DIR}")
+
+
+if __name__ == "__main__":
+    fire.Fire(train_evaluate)