# Auto Audience Segmentation (Interest based)

### Install (and update) additional packages

Install the following packages required to execute this notebook.

In [None]:
!python --version

In [None]:
import os

# The Vertex AI Workbench Notebook product has specific requirements
IS_WORKBENCH_NOTEBOOK = os.getenv("DL_ANACONDA_HOME")
IS_USER_MANAGED_WORKBENCH_NOTEBOOK = os.path.exists(
    "/opt/deeplearning/metadata/env_version"
)

# Vertex AI Notebook requires dependencies to be installed with '--user'
USER_FLAG = ""
if IS_WORKBENCH_NOTEBOOK:
    USER_FLAG = "--user"

! pip3 install --upgrade google-cloud-aiplatform {USER_FLAG} -q google-cloud-bigquery db-dtypes
! pip3 install -q --upgrade optuna==3.2.0 {USER_FLAG}
! pip3 install -q --upgrade scikit-learn==1.2.* {USER_FLAG}
! pip3 install -q --upgrade plotly==5.16.0 matplotlib==3.7.2 seaborn==0.12.2 {USER_FLAG}

### Restart the kernel

After you install the additional packages, you need to restart the notebook kernel so it can find the packages.

In [None]:
# Automatically restart kernel after installs
import os

if not os.getenv("IS_TESTING"):
    # Automatically restart kernel after installs
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

## Before you begin

#### Set your project ID

**If you don't know your project ID**, you may be able to get your project ID using `gcloud`.

In [None]:
PROJECT_ID = "[YOUR-PROJECT-ID]"

# Get your Google Cloud project ID from gcloud
import os

if not os.getenv("IS_TESTING"):
    shell_output = !gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    print("Project ID:", PROJECT_ID)

Otherwise, set your project ID here.

In [None]:
if PROJECT_ID == "" or PROJECT_ID is None:
    PROJECT_ID = "as-dev-anze"  # @param {type:"string"}

print ("Your set Project ID is:", PROJECT_ID)

In [None]:
REGION = "[your-region]"  # @param {type: "string"}

if REGION == "[your-region]":
    REGION = "us-central1"

print ('REGION:', REGION)

### Authenticate your Google Cloud account

**If you are using Vertex AI Workbench Notebooks**, your environment is already
authenticated. Skip this step.

**If you are using Colab**, run the cell below and follow the instructions
when prompted to authenticate your account via oAuth.

**Otherwise**, follow these steps:

1. In the Cloud Console, go to the <a href="https://console.cloud.google.com/apis/credentials/serviceaccountkey" target="_blank">**Create service account key** page</a>.

2. Click **Create service account**.

3. In the **Service account name** field, enter a name, and
   click **Create**.

4. In the **Grant this service account access to project** section, click the **Role** drop-down list. Type "Vertex AI"
into the filter box, and select
   **Vertex AI Administrator**. Type "Storage Object Admin" into the filter box, and select **Storage Object Admin**.

5. Click *Create*. A JSON file that contains your key downloads to your
local environment.

6. Enter the path to your service account key as the
`GOOGLE_APPLICATION_CREDENTIALS` variable in the cell below and run the cell.

In [None]:
# If you are running this notebook in Colab, run this cell and follow the
# instructions to authenticate your GCP account.

import os
import sys

# If on Vertex AI Workbench, then don't execute this code
IS_COLAB = "google.colab" in sys.modules
if not os.path.exists("/opt/deeplearning/metadata/env_version") and not os.getenv(
    "DL_ANACONDA_HOME"
):
    if "google.colab" in sys.modules:
        from google.colab import auth as google_auth

        google_auth.authenticate_user()

    # If you are running this notebook locally, replace the string below with the
    # path to your service account key and run this cell to authenticate your GCP
    # account.
    # elif not os.getenv("IS_TESTING"):
    #     %env GOOGLE_APPLICATION_CREDENTIALS ''

In [None]:
SERVICE_ACCOUNT = "[your-service-account]"  # @param {type:"string"}

In [None]:
if (
    SERVICE_ACCOUNT == ""
    or SERVICE_ACCOUNT is None
    or SERVICE_ACCOUNT == "[your-service-account]"
):
    # Get your service account from gcloud
    if not IS_COLAB:
        shell_output = !gcloud config list account --format "value(core.account)"
        SERVICE_ACCOUNT = shell_output[0].strip()

    else:  # IS_COLAB:
        shell_output = ! gcloud projects describe  $PROJECT_ID
        project_number = shell_output[-1].split(":")[1].strip().replace("'", "")
        SERVICE_ACCOUNT = f"{project_number}-compute@developer.gserviceaccount.com"

    print("Service Account:", SERVICE_ACCOUNT)

### Import libraries

In [None]:
import numpy as np
import pandas as pd
pd.options.plotting.backend = "plotly"

import json
import math
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)

import matplotlib.pyplot as plt
import matplotlib.cm as cm
import seaborn as sns
import plotly

import sklearn
print('The scikit-learn version is {}.'.format(sklearn.__version__))
from google.cloud import bigquery
import jinja2
import re

import optuna
optuna.logging.set_verbosity(optuna.logging.WARNING)

from sklearn.pipeline import Pipeline
from sklearn.cluster import KMeans
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import FunctionTransformer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.metrics import silhouette_samples, silhouette_score

### Configuration

In [None]:
#@title Settings
SRC_PROJECT_ID = 'analyticspros.com:spotted-cinnamon-834' #@param {type:"string"}
SRC_DATASET_ID = 'analytics_206551716' #@param {type:"string"}
DST_DATASET_ID = 'interest_segments' #@param {type:"string"}

DATE_START = "2023-01-01" #@param {type:"date"}
DATE_END = "2023-12-31" #@param {type:"date"}
LOOKBACK_DAYS = 28 #@param {type:"integer"}

##### Creating BigQuery home dataset

In [None]:
from google.cloud import bigquery
client = bigquery.Client(project=PROJECT_ID)
dataset = bigquery.Dataset(f"{PROJECT_ID}.{DST_DATASET_ID}")
dataset.location = "US"
dataset = client.create_dataset(dataset, exists_ok=True, timeout=30)
print("Created dataset {}.{}".format(client.project, dataset.dataset_id))

## Creating Auto Generated Dataset

In [None]:
#@markdown RE_PAGE PATH is the regex expression that tells the query what part of page path to extract. Example: ^https://your-website.com(/[a-z-0-9]*/?).*

RE_PAGE_PATH = '^https://adswerve.com(/[a-z-0-9]*/?).*' #@param {type:"string"}

#@markdown PERC_KEEP is the percent of cumulative traffic you'd like to keep. (Give me all pages/folders which combine for up to X% of all traffic)
PERC_KEEP = 95 #@param {type:"slider", min:1, max:99, step:1}

#@markdown MIN_ENGAGEMENT is the number of pageviews (which are also features) a vistior needs to be in the visitor pool for the analysis.
MIN_ENGAGEMENT = 2 #@param {type:"slider", min:0, max:25, step:1}

In [None]:
client = bigquery.Client(project=PROJECT_ID)

In [None]:
sql = f"""
SELECT
    feature,
    ROUND(100 * SUM(users) OVER (ORDER BY users DESC) / SUM(users) OVER (), 2) as cumulative_traffic_percent,

FROM (
    SELECT
        REGEXP_EXTRACT(page_path, @RE_PAGE_PATH) as feature,
        COUNT(DISTINCT user_id) as users

    FROM (
        SELECT
            user_pseudo_id as user_id,
            (SELECT value.string_value FROM UNNEST(event_params) as ep WHERE ep.key = 'page_location') as page_path
        FROM `{SRC_PROJECT_ID}.{SRC_DATASET_ID}.events_*`
        WHERE
            event_name = 'page_view'
            AND SAFE.PARSE_DATE('%Y%m%d', _TABLE_SUFFIX) BETWEEN @DATE_START AND @DATE_END
    )
    GROUP BY 1
)
WHERE
    feature IS NOT NULL
QUALIFY
    cumulative_traffic_percent <= @PERC_KEEP
ORDER BY 2 ASC
"""

In [None]:
df_features = client.query(query=sql,
                  job_config=bigquery.QueryJobConfig(
                    query_parameters=[
                        bigquery.ScalarQueryParameter("DATE_START", "DATE", DATE_START),
                        bigquery.ScalarQueryParameter("DATE_END", "DATE", DATE_END),
                        bigquery.ScalarQueryParameter("RE_PAGE_PATH", "STRING", RE_PAGE_PATH),
                        bigquery.ScalarQueryParameter("PERC_KEEP", "FLOAT64", PERC_KEEP)
                    ]
                )
).to_dataframe()

In [None]:
print (f'Number of page path categories kept: {len(df_features)}')

In [None]:
def column_name_clean(f):
    if f == '/' or f == '' or f is None: return 'homepage'
    if f.startswith('/'): f = f[1:]
    if f.endswith('/'): f = f[:-1]
    return re.sub('[^0-9a-zA-Z]+', '_', f)

In [None]:
df_features['feature_name'] = df_features.feature.apply(column_name_clean)
df_features

In [None]:
procedure_template = jinja2.Template("""
CREATE OR REPLACE PROCEDURE {{ DST_DATASET_ID }}.create_dataset(
  table_name STRING,
  date_start DATE,
  date_end DATE,
  mode STRING
)
BEGIN

    DECLARE RE_PAGE_PATH STRING DEFAULT "{{ re_page_path|e }}";
    DECLARE LOOKBACK_DAYS INT64 DEFAULT {{ lookback_days }};

    CREATE OR REPLACE TEMP TABLE dataset
    AS
    WITH
        visitor_pool AS (
            SELECT
              user_pseudo_id,
              PARSE_DATE('%Y%m%d', MAX(event_date)) as date,
              PARSE_DATE('%Y%m%d', MAX(event_date)) - LOOKBACK_DAYS as date_lookback,
              TIMESTAMP_MICROS(MAX(event_timestamp)) as feature_timestamp,

            FROM `{{ PROJECT_ID }}.{{ DATASET_ID }}.events_*`
            WHERE SAFE.PARSE_DATE('%Y%m%d', _TABLE_SUFFIX) BETWEEN date_start AND date_end
            GROUP BY 1
    )

    SELECT
        user_id,
        date,
        feature_timestamp,
        {% for f in features %}COUNTIF( REGEXP_EXTRACT(page_path, RE_PAGE_PATH) = '{{ f }}' ) as {{ column_name_clean(f) }},
        {% endfor %}
    FROM (
        SELECT
            ga.user_pseudo_id as user_id,
            vp.date,
            vp.feature_timestamp,
            (SELECT value.string_value FROM UNNEST(event_params) as ep WHERE ep.key = 'page_location') as page_path
        FROM `{{ PROJECT_ID }}.{{ DATASET_ID }}.events_*` as ga
        INNER JOIN visitor_pool as vp
            ON vp.user_pseudo_id = ga.user_pseudo_id
                AND PARSE_DATE('%Y%m%d', GA.event_date) >= vp.date_lookback
        WHERE
            event_name = 'page_view'
            AND SAFE.PARSE_DATE('%Y%m%d', _TABLE_SUFFIX) BETWEEN date_start - LOOKBACK_DAYS AND date_end
    )
    GROUP BY 1, 2, 3;

    EXECUTE IMMEDIATE FORMAT('''
        CREATE OR REPLACE TABLE %s
        OPTIONS(
          expiration_timestamp=TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL 1 DAY)
        )
        AS
        SELECT * FROM dataset
        WHERE
          (
            {% for f in features %}{{ column_name_clean(f) }} {% if not loop.last %}+{% endif %}
            {% endfor %}
          ) >= {{ min_engagement }};
    ''', table_name);

END
""")
procedure_template.globals.update({'column_name_clean': column_name_clean})

In [None]:
def create_procedure(features):
    sql = procedure_template.render(
            PROJECT_ID=SRC_PROJECT_ID,
            DATASET_ID=SRC_DATASET_ID,
            DST_DATASET_ID=DST_DATASET_ID,
            re_page_path=RE_PAGE_PATH,
            lookback_days=LOOKBACK_DAYS,
            min_engagement=MIN_ENGAGEMENT,
            features=features
    )
    client.query(query=sql).result()

create_procedure(df_features.feature.tolist())

client.query(
    query=f"CALL `{PROJECT_ID}.{DST_DATASET_ID}.create_dataset`(@table_name, @date_start, @date_end, @mode);",
    job_config=bigquery.QueryJobConfig(
        query_parameters=[
            bigquery.ScalarQueryParameter("table_name", "STRING", f"{DST_DATASET_ID }.dataset"),
            bigquery.ScalarQueryParameter("date_start", "DATE", DATE_START),
            bigquery.ScalarQueryParameter("date_end", "DATE", DATE_END),
            bigquery.ScalarQueryParameter("mode", "STRING", f"NA"),
        ]
    )
).result()

In [None]:
df = client.query(query=f"SELECT * FROM `{PROJECT_ID}.{DST_DATASET_ID}.dataset`").to_dataframe()

In [None]:
df

## Cluster Model [for Interests on Site]
Find the best set of clusters with the most features as you can, while keeping the clusters sound (Silhuette Score) and even in size (Gini Coeff).


In [None]:
#@markdown ####Optimization weights
#@markdown Put different discounts on the importance of optimization targets. 0 means the target will remain fully important (not discounted), 1 means it will be fully dicounted (ignored)

#@markdown Silhouette score provides a metric about how compact and sound the clusters are.
#@markdown Typically dicsounting this metric is not advised as it would allow for the optimization process to favor worse set of clusters.
W_silhouette_score = 0 #@param {type:"slider", min:0, max:1, step:0.01}

#@markdown Gini coeff. provides a metric about the inequality of cluster sizes.
#@markdown Pushing the weight closer to 1 will likely produce more clusters with more inbalance, keeping it close to 0 will likely produce less clusters of more equal sizes.
W_gini = 0.7 #@param {type:"slider", min:0, max:1, step:0.01}

#@markdown 'Features used ratio' measures what % of features have been kept. Pushing the weight closer to 1 will likely produce clusters with less features, keeping it closer to 0 will likely produce clusters with more features.
#@markdown Typically we would want to use as many features as we can while just dropping those clearly irrelevant.
W_feat_used_ratio = 0 #@param {type:"slider", min:0, max:1, step:0.01}

#@markdown 'Max clusters ratio' helps calculate what is the max number of clusters for each trial run. 
#@markdown 1 will make max clusters equal to the number of features selected at each trial run, while 0.5 will ensure that the max number of clusters will be half of the number of features selected on each trial run. 
W_max_clusters_ratio = 0.8 #@param {type:"slider", min:0.2, max:1, step:0.01}

In [None]:
X = df.copy()
features = list(X.columns[3:])  # need to skip first three columns -> user_id, date, feature_timestamp
min_num_clusters = 3

In [None]:
def create_model(params, n_features):
    model = Pipeline([
        ('transform', ColumnTransformer(
            transformers=[
                ('tfidf',
                 TfidfTransformer(norm='l2'),
                 list(range(3, n_features + 3))  # need to skip first three columns -> user_id, date, feature_timestamp
                )
            ]
        )),
        ('model', KMeans(
            init='k-means++', n_init='auto',
            random_state=42,
            **params)
        )
    ])

    return model

def gini(x):
    total = 0
    for i, xi in enumerate(x[:-1], 1):
        total += np.sum(np.abs(xi - x[i:]))
    return total / (len(x)**2 * np.mean(x))

def objective(trial):
    f_list = []
    for f in features:
        include = trial.suggest_categorical(f"f_{f}", [False, True])
        if include:
            f_list.append(f)

    if len(f_list) == 0:
        raise optuna.TrialPruned()

    max_num_clusters = max(min_num_clusters, round(len(f_list) * W_max_clusters_ratio))

    params_model = {
      "n_clusters": trial.suggest_int("n_clusters", min_num_clusters, max_num_clusters),
      "max_iter": 500, # trial.suggest_int("max_iter", 10, 1000, step=10),
      "tol": 1e-4 # trial.suggest_float("tol", 1e-6, 1e-2, step=1e-6),
    }

    X_f = X[list(X.columns[:3]) + f_list]
    model = create_model(params_model, len(f_list))
    model.fit(X_f)
    labels = model.predict(X_f)

    sil_score = silhouette_score(
        model.named_steps['transform'].transform(X),
        labels, metric='euclidean',
        sample_size=int(len(df) * 0.1) if int(len(df) * 0.1) < 10_000 else 10_000,
        random_state=42
    )

    gini_inv = 1 - gini(np.bincount(labels))  # 1 is perfectly equal sizes, 0 is perfectly inequal sizes

    f_used_ratio = (len(f_list) / len(features))

    trial.set_user_attr("params_model", params_model)
    trial.set_user_attr("features", f_list)
    trial.set_user_attr("features_all", list(X_f.columns))
    return (
        sil_score * (1 - W_silhouette_score) + W_silhouette_score,
        gini_inv * (1 - W_gini) + W_gini,
        f_used_ratio * (1 - W_feat_used_ratio) + W_feat_used_ratio
    )

In [None]:
study = optuna.create_study(
    directions=["maximize", "maximize", "maximize"],
    sampler=optuna.samplers.NSGAIISampler(population_size=len(features) * 5, seed=42)
)
study.optimize(objective,
               n_trials=max(len(features)**2 * 5, 500),  # ADJUST THIS ACCORDINGLY, BUT PROB. SHOULDN'T BE BELOW 500
               show_progress_bar=True,
               n_jobs=-1
)

### Optimization Results

In [None]:
fig = optuna.visualization.plot_pareto_front(study, target_names=['Silhuette', 'Inv.Gini', "Features Used Ratio"], include_dominated_trials=False)
fig.layout.height = 600
fig.show()

#### Find a trial that has the best ratio of all three optimization targets.
Which trial is closest to (1, 1, 1) point in space.

In [None]:
best_trials = sorted([t for t in study.best_trials], key=lambda x: math.dist(x.values, [1, 1, 1]))

In [None]:
#@markdown Pick model by rank:
pick_best_n_ranked_model = 1 #@param {type:"integer", min:1}

#@markdown Pick model by number of clusters with best rank: (rank pick will be ignored if pick_num_clusters is >= 3)
pick_num_clusters = 0 #@param {type:"integer", min:3}

trial_chosen = best_trials[pick_best_n_ranked_model - 1]
if pick_num_clusters and pick_num_clusters >= 3:
    pick_best_n_ranked_model = 0
    for t in best_trials:
        pick_best_n_ranked_model += 1
        if t.user_attrs['params_model']['n_clusters'] == pick_num_clusters:
            trial_chosen = t
            break

print ("TRIAL CHOSEN:")
print (f" Rank: {pick_best_n_ranked_model} out of {len(best_trials)}")
print (f' Number: {trial_chosen.number}')
print (f" Num. clusters: {trial_chosen.user_attrs['params_model']['n_clusters']}")
print (f" Sil. Score: {round(trial_chosen.values[0], 4)} / Inv. Gini Coeff: {round(trial_chosen.values[1], 4)} / Features Used Ratio: {round(trial_chosen.values[2], 4)}")
print (f" Num. Features Total: {len(features)} / Num. Features Used: {len(trial_chosen.user_attrs['features'])}")
print (f" Features Used: {trial_chosen.user_attrs['features']}")
print (f" Features Dropped: {list(set(features) - set(trial_chosen.user_attrs['features']))}")
print (f" Params:")
print (json.dumps(trial_chosen.params, indent=2))

##### Build model

In [None]:
features_sel = trial_chosen.user_attrs['features']
features_sel_all = trial_chosen.user_attrs['features_all']
model = create_model(
    trial_chosen.user_attrs['params_model'],
    len(features_sel)
)
model.fit(X[features_sel_all])
labels = model.predict(X[features_sel_all])

## Visualization

### Silhouette Analysis

In [None]:
def silhouette_visualization(X, model):
    np.random.seed(42)
    # Create a subplot with 1 row and 2 columns
    fig, ax1 = plt.subplots(1, 1)
    fig.set_size_inches(18, 7)

    sample = np.random.choice(len(X),
                              size=int(len(X) * 0.1) if int(len(X) * 0.1) < 10_000 else 10_000)
    model_cluster_centers = model.named_steps['model'].cluster_centers_
    X_tr = model.named_steps['transform'].transform(X)

    ax1.set_xlim([-0.1, 1])
    ax1.set_ylim([0, len(sample) + (len(model_cluster_centers) + 1) * 10])

    cluster_labels = model.predict(X)
    print("Clustering done for n_clusters={}".format(len(model_cluster_centers)))

    cluster_labels = cluster_labels[sample]

    # The silhouette_score gives the average value for all the samples.
    # This gives a perspective into the density and separation of the formed
    # clusters
    silhouette_avg = silhouette_score(X_tr[ sample, :], cluster_labels)
    print("For n_clusters =", len(model_cluster_centers),
          "The average silhouette_score is :", silhouette_avg)

    # Compute the silhouette scores for each sample

    sample_silhouette_values = silhouette_samples(X_tr[ sample, :], cluster_labels)

    y_lower = 10
    for i in range(len(model_cluster_centers)):
        # Aggregate the silhouette scores for samples belonging to
        # cluster i, and sort them
        ith_cluster_silhouette_values = \
            sample_silhouette_values[cluster_labels == i]

        ith_cluster_silhouette_values.sort()

        size_cluster_i = ith_cluster_silhouette_values.shape[0]
        y_upper = y_lower + size_cluster_i

        color = cm.nipy_spectral(float(i) / len(model_cluster_centers))
        ax1.fill_betweenx(np.arange(y_lower, y_upper),
                          0, ith_cluster_silhouette_values,
                          facecolor=color, edgecolor=color, alpha=0.7)

        # Label the silhouette plots with their cluster numbers at the middle
        ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))

        # Compute the new y_lower for next plot
        y_lower = y_upper + 10  # 10 for the 0 samples

    ax1.set_title("The silhouette plot.")
    ax1.set_xlabel("The silhouette coefficient values")
    ax1.set_ylabel("Cluster label")

    # The vertical line for average silhouette score of all the values
    ax1.axvline(x=silhouette_avg, color="red", linestyle="--")

    ax1.set_yticks([])  # Clear the yaxis labels / ticks
    ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])

    plt.suptitle(f"Silhouette analysis for KMeans clustering on sample data with n_clusters = {len(model_cluster_centers)}",
                 fontsize=14, fontweight='bold')

    plt.show()

silhouette_visualization(X[features_sel_all], model)

### Heatmap

In [None]:
model_cluster_centers = pd.DataFrame(model.named_steps['model'].cluster_centers_, columns=features_sel)
mcc = model_cluster_centers.T / model_cluster_centers.T.sum(axis=1).values.reshape(1, len(features_sel)).T

sns.set(font_scale=0.75)
fig, ax = plt.subplots(figsize=(
        max(int(model_cluster_centers.shape[1]/2), 4),
        max(model_cluster_centers.shape[0]/2, 4)
    )
)
_ = sns.heatmap(mcc, ax=ax, cbar=False, annot=np.round(model_cluster_centers, 2).T)
ax.set_title('Heatmap')
ax.set(xlabel='Cluster')
plt.yticks(rotation=0)
plt.show()

### Cluster Sizes

In [None]:
c = np.array(np.bincount(labels), dtype=np.float64)
c /= c.sum() /100
df_c = pd.DataFrame(c, index=[f"Cluster {n}" for n in range(c.size)], columns=['Size %'])
df_c.plot.bar()

## [Optional] Deployment to Vertex AI Model Registry

##### Update stored procedure with only the selected features

In [None]:
create_procedure(
    df_features[df_features.feature_name.isin(features_sel)].feature.tolist()
)

##### Deploy

In [None]:
import pickle
with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)

In [None]:
MODEL_NAME = DST_DATASET_ID
GCS_BUCKET=f'{PROJECT_ID}-models'
ARTIFACT_GCS_URI = f"gs://{GCS_BUCKET}/{MODEL_NAME}"
PREBUILT_CONTAINER_URI = "us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-2:latest"

In [None]:
!gsutil mb -p $PROJECT_ID -l $REGION gs://$GCS_BUCKET

In [None]:
!gsutil cp model.pkl $ARTIFACT_GCS_URI/model.pkl
!rm model.pkl

In [None]:
!gcloud ai models upload --region=$REGION --display-name=$MODEL_NAME --container-image-uri=$PREBUILT_CONTAINER_URI --artifact-uri=$ARTIFACT_GCS_URI

In [None]:
!gcloud ai models list --region=$REGION --filter=display_name=$MODEL_NAME

## [Optional] Delete GCP Resources

In [None]:
import google.cloud.aiplatform as aiplatform
aiplatform.init(project=PROJECT_ID, location=REGION)

In [None]:
for m in aiplatform.Model.list(filter=f"display_name={MODEL_NAME}", order_by=f"create_time desc"):
    m.delete()