In [1]:
import os

# The Vertex AI Workbench Notebook product has specific requirements
IS_WORKBENCH_NOTEBOOK = os.getenv("DL_ANACONDA_HOME") and not os.getenv("VIRTUAL_ENV")
IS_USER_MANAGED_WORKBENCH_NOTEBOOK = os.path.exists(
    "/opt/deeplearning/metadata/env_version"
)

# Vertex AI Notebook requires dependencies to be installed with '--user'
USER_FLAG = ""
if IS_WORKBENCH_NOTEBOOK:
    USER_FLAG = "--user"

! pip3 install --upgrade google-cloud-aiplatform {USER_FLAG} -q
! pip3 install -U google-cloud-storage {USER_FLAG} -q
! pip3 install {USER_FLAG} kfp google-cloud-pipeline-components --upgrade -q

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
kfp 1.8.13 requires google-cloud-storage<2,>=1.20.0, but you have google-cloud-storage 2.4.0 which is incompatible.
google-cloud-pipeline-components 1.0.15 requires google-cloud-storage<2,>=1.20.0, but you have google-cloud-storage 2.4.0 which is incompatible.[0m[31m
[0m

In [2]:
! pip3 install --user pip pickle-mixin



In [3]:
import os

if not os.getenv("IS_TESTING"):
    # Automatically restart kernel after installs
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

In [4]:
! python3 -c "import kfp; print('KFP SDK version: {}'.format(kfp.__version__))"
! python3 -c "import google_cloud_pipeline_components; print('google_cloud_pipeline_components version: {}'.format(google_cloud_pipeline_components.__version__))"

KFP SDK version: 1.8.13
google_cloud_pipeline_components version: 1.0.15


In [1]:
PROJECT_ID = "***"  # @param {type:"string"}

In [2]:
REGION = "europe-west6-a"  # @param {type: "string"}

In [3]:
from datetime import datetime

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

In [4]:
# If you are running this notebook in Colab, run this cell and follow the
# instructions to authenticate your GCP account. This provides access to your
# Cloud Storage bucket and lets you submit training jobs and prediction
# requests.

import os
import sys

# If on Vertex AI Workbench, then don't execute this code
IS_COLAB = "google.colab" in sys.modules
if not os.path.exists("/opt/deeplearning/metadata/env_version") and not os.getenv(
    "DL_ANACONDA_HOME"
):
    if "google.colab" in sys.modules:
        from google.colab import auth as google_auth

        google_auth.authenticate_user()

    # If you are running this notebook locally, replace the string below with the
    # path to your service account key and run this cell to authenticate your GCP
    # account.
    elif not os.getenv("IS_TESTING"):
        %env GOOGLE_APPLICATION_CREDENTIALS ''

In [5]:
BUCKET_NAME = "bucket_pipeline_previsionale"  # @param {type:"string"}
BUCKET_URI = f"gs://{BUCKET_NAME}"

In [6]:
if BUCKET_NAME == "" or BUCKET_NAME is None or BUCKET_NAME == "[your-bucket-name]":
    BUCKET_NAME = PROJECT_ID + "aip-" + TIMESTAMP
    BUCKET_URI = "gs://" + BUCKET_NAME

In [7]:
SERVICE_ACCOUNT = "[your-service-account]"  # @param {type:"string"}

In [8]:
if (
    SERVICE_ACCOUNT == ""
    or SERVICE_ACCOUNT is None
    or SERVICE_ACCOUNT == "[your-service-account]"
):
    # Get your service account from gcloud
    if not IS_COLAB:
        shell_output = !gcloud auth list 2>/dev/null
        SERVICE_ACCOUNT = shell_output[2].replace("*", "").strip()

    if IS_COLAB:
        shell_output = ! gcloud projects describe  $PROJECT_ID
        project_number = shell_output[-1].split(":")[1].strip().replace("'", "")
        SERVICE_ACCOUNT = f"{project_number}-compute@developer.gserviceaccount.com"

    print("Service Account:", SERVICE_ACCOUNT)

Service Account: 896262813704-compute@developer.gserviceaccount.com


In [9]:
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectCreator $BUCKET_URI

! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectViewer $BUCKET_URI

No changes made to gs://bucket_pipeline_previsionale/
No changes made to gs://bucket_pipeline_previsionale/


In [10]:
import google.cloud.aiplatform as aip

In [11]:
PIPELINE_ROOT = "{}/pipeline_root/bucket_pipeline_previsionale".format(BUCKET_URI)

In [12]:
import kfp
from kfp.v2 import dsl
from kfp.v2.dsl import (Artifact, Dataset, Input, InputPath, Model, Output, OutputPath, component)

In [13]:
aip.init(project=PROJECT_ID, staging_bucket=BUCKET_URI)

In [14]:
@component(
    packages_to_install=["google-cloud-bigquery","pandas","db-dtypes","pickle-mixin","numpy","sklearn"],
    base_image="python:3.9"
)
def preprocess(
    message: str,
    output_dataset: Output[Dataset],
    scaler_out: Output[Model]
):
    import pandas as pd
    import numpy as np
    from sklearn.preprocessing import MinMaxScaler
    from google.cloud import bigquery
    import db_dtypes
    import pickle
    
    output_dataset.metadata["Message"] = message + " preprocessed dataset"
    
    client = bigquery.Client.from_service_account_info({
      "type": "service_account",
      "project_id": "***",
      "private_key_id": "***",
      "private_key": "***",
      "client_email": "***",
      "client_id": "***",
      "auth_uri": "***",
      "token_uri":"***",
      "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
      "client_x509_cert_url": "***"
    })
    
    df = client.list_rows("Acqua_Farina.mezze_giornate_base").to_dataframe(create_bqstorage_client=True)
    client.close()
    df["Pranzo_Cena"] = df["Pranzo_Cena"].apply(lambda x: "A_Pranzo" if x == "Pranzo" else x)
    df.sort_values(by=["Data","Pranzo_Cena"],inplace=True)
    df.drop(columns=['Importo_netto', 'IVA','ARTICOLO_DA_VENDITA', 'BANCHETTI', 'BIMBI','CENTRIFUGATI', 'COCKTAIL', 'COLAZIONI', 'COMBO',
       'CONDPRIMIBABY','CUCINA', 'DESSERT', 'FORMAGGI', 'FORMATI','IMPASTPIZZA','INSEGNE', 'MESDI_',
       'MESDI_KARNE','PRODOTTIVEN', 'PROMO',
       'SECONDI', 'SMARTBOX', 'Ospiti','Fascia_oraria'],inplace=True)
    df.set_index(["Data","Pranzo_Cena"],inplace=True)
    
    n_lag_predizione = 14
    n_input = 90
    df = df.iloc[-150:]#408
    df_train = df[:][:-n_lag_predizione]
    df_test = df[:][-n_lag_predizione:]

    n_features = len(df.columns)
    train = df_train.to_numpy()
    train = train.reshape(-1, n_features)

    scaler = MinMaxScaler()
    scaler.fit(train)
    scaled_train = scaler.transform(train)

    df.to_csv(output_dataset.path+".csv")

    with open(scaler_out.path+'.pkl','wb') as f:
        pickle.dump(scaler,f)

In [39]:
@component(
    packages_to_install=["google-cloud-bigquery","pandas","db-dtypes","pickle-mixin","tensorflow","keras","numpy","sklearn"],
    base_image="python:3.9",
)
def make_prediction(
    message: str,
    df_in: Input[Dataset],
    scaler_in: Input[Model],
):
    import os
    import numpy as np
    import pandas as pd
    from sklearn.preprocessing import MinMaxScaler
    from google.cloud import bigquery
    import db_dtypes
    import pickle
    
    import tensorflow as tf
    from keras.preprocessing.sequence import TimeseriesGenerator
    from keras.models import Sequential
    from keras.layers import Dense
    from keras.layers import Reshape
    from keras.layers import Dropout
    
    df = pd.read_csv(df_in.path+".csv")
    
    with open(scaler_in.path+'.pkl', 'rb') as f:
        scaler = pickle.load(f)
    
    n_lag_predizione = 14
    n_input = 90 
    n_features = len(df.columns)
    
    generator = TimeseriesGenerator(df, df, length=n_input, batch_size=128)
    
    # Create a function to implement a ModelCheckpoint callback with a specific filename 
    def create_model_checkpoint(model_name, save_path="model_experiments"):
      return tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(save_path, model_name), # create filepath to save model
                                            verbose=0, # only output a limited amount of text
                                            save_best_only=True,monitor='acc',mode='max',) # save only the best model to file
    
    model = Sequential(name="model_1_dense_mse_adam")
    model.add(tf.keras.layers.Flatten())
    model.add(Dense(500, activation="relu"))
    model.add(Dropout(rate=0.2))
    model.add(Dense(500, activation="relu"))
    model.add(Dropout(rate=0.2))
    model.add(Dense(500, activation="relu"))
    model.add(Dropout(rate=0.2))
    model.add(Dense(500, activation="relu"))
    model.add(Dropout(rate=0.2))
    model.add(Dense(500, activation="relu"))
    model.add(Dropout(rate=0.2))
    model.add(Dense(500, activation="relu"))
    model.add(Dropout(rate=0.2))
    model.add(Dense(500, activation="relu"))
    model.add(Dropout(rate=0.2))
    model.add(Dense(n_features))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0015), loss="mse",metrics=['acc'])
    hystory = model.fit(generator, epochs=1000,callbacks=[create_model_checkpoint(model_name=model.name)]) #generator 
    model = tf.keras.models.load_model("model_experiments/"+model.name)
    
    test_predictions = []

    first_eval_batch = df[-n_input:]
    current_batch = first_eval_batch.reshape((1, n_input, n_features))

    for i in range(n_lag_predizione):
        current_pred = model.predict(current_batch)[0]
        test_predictions.append(current_pred) 
        current_batch = np.append(current_batch[:,1:,:],[[current_pred]],axis=1) #[[current_pred]]
    
    true_predictions = scaler.inverse_transform(test_predictions)
    df_prediction = pd.DataFrame(data=true_predictions, index = df.index, columns=df.columns)
    df_prediction = df_prediction.apply(lambda x: round(x))
    df_prediction.reset_index(inplace=True)
    
    
    client2 = bigquery.Client.from_service_account_info({
      "type": "service_account",
      "project_id": "***",
      "private_key_id": "***",
      "private_key": "***",
      "client_email":"***",
      "client_id": "***",
      "auth_uri": "***",
      "token_uri": "***",
      "auth_provider_x509_cert_url": "***",
      "client_x509_cert_url": "***"
    })
    
    job = client2.load_table_from_dataframe(
        df_prediction, "Acqua_Farina.previsioni_test", job_config=None
    )  # Make an API request.
    job.result()  # Wait for the job to complete.
    client2.close()


In [40]:
@dsl.pipeline(
    pipeline_root=PIPELINE_ROOT,
    name="test-pipeline",
)
def pipeline(message: str):
    preprocess_task = preprocess("INIZIO PREPROCESSING")
    prediction_task = make_prediction("FORECASTING", 
                    df_in=preprocess_task.outputs["output_dataset"], 
                    scaler_in = preprocess_task.outputs["scaler_out"])

In [41]:
from kfp.v2 import compiler  # noqa: F811

compiler.Compiler().compile(
    pipeline_func=pipeline, package_path="pipeline_previsionale.json"
)

In [1]:
DISPLAY_NAME = "test_" + TIMESTAMP

job = aip.PipelineJob(
    display_name=DISPLAY_NAME,
    template_path="pipeline_previsionale.json",
    pipeline_root=PIPELINE_ROOT,
    parameter_values={"message": "INIZIO"},
)

job.run()

! rm pipeline_test.json