## Define environment variables

In [None]:
# Set `PATH` to include user python binary directory and a directory containing `skaffold`.
PATH=%env PATH
%env PATH={PATH}:/home/jupyter/.local/bin

In [None]:
# Read GCP project id from env.
shell_output=!gcloud config list --format 'value(core.project)' 2>/dev/null
GOOGLE_CLOUD_PROJECT=shell_output[0]
%env GOOGLE_CLOUD_PROJECT={GOOGLE_CLOUD_PROJECT}
print("GCP project ID:" + GOOGLE_CLOUD_PROJECT)

## Create Bigquery Dataset

In [None]:
BQ_DATASET_NAME = 'data_validation'
BQ_TABLE_NAME = 'sentiment_analysis_logs' 

from google.cloud import bigquery
client = bigquery.Client(GOOGLE_CLOUD_PROJECT)
dataset_names = [dataset.dataset_id for dataset in client.list_datasets(GOOGLE_CLOUD_PROJECT)]

dataset = bigquery.Dataset("{}.{}".format(GOOGLE_CLOUD_PROJECT, BQ_DATASET_NAME))
dataset.location = "US"

if BQ_DATASET_NAME not in dataset_names:
    dataset = client.create_dataset(dataset)
    print("Created dataset {}.{}".format(client.project, dataset.dataset_id))

print("BigQuery dataset is ready.")

## Create BQ Table

In [None]:
import json

table_schema_json = [
 {"name":"model", "type": "STRING", "mode": "REQUIRED"},
 {"name":"model_version", "type": "STRING", "mode":"REQUIRED"},
 {"name":"time", "type": "TIMESTAMP", "mode": "REQUIRED"},
 {"name":"raw_data", "type": "STRING", "mode": "REQUIRED"},
 {"name":"raw_prediction", "type": "STRING", "mode": "NULLABLE"},
 {"name":"groundtruth", "type": "STRING", "mode": "NULLABLE"}]

json.dump(table_schema_json, open('table_schema.json', 'w'))

In [None]:
!bq mk --table \
 --project_id={GOOGLE_CLOUD_PROJECT} \
 {GOOGLE_CLOUD_PROJECT}:{BQ_DATASET_NAME}.{BQ_TABLE_NAME} \
 'table_schema.json'

## Activate logging

In [None]:
import googleapiclient.discovery
import re

PIPELINE_NAME = 'TFX_CICD_sentiment_analysis'
LOCATION = 'us-central1'
pattern = re.compile(f'(?<=versions/).+')
#  a Python client library for interacting with Google APIs.
# 'ml' for Google Cloud Machine Learning Engine.
# The service object can then be used to make requests to ML Engine, such as listing model versions or deploying a model.
service = googleapiclient.discovery.build(api_name='ml', api_version='v1')

parent = f"projects/{GOOGLE_CLOUD_PROJECT}/models/{PIPELINE_NAME}"
versions_list = service.projects().models().versions().list(parent=parent).execute()

# Get Model Version Name 
try:
    name = [i['name'] for i in versions_list['versions'] if i.get('isDefault')][0]
    VERSION_NAME = pattern.search(name).group()
    print(name)
except:
    print('The request did not return a default version')

In [None]:
# Enable logging
sampling_percentage = 1.0
bq_full_table_name = '{}.{}.{}'.format(GOOGLE_CLOUD_PROJECT, BQ_DATASET_NAME, BQ_TABLE_NAME)

service = googleapiclient.discovery.build('ml', 'v1')

logging_config = {
   "requestLoggingConfig":{
       "samplingPercentage": sampling_percentage,
       "bigqueryTableName": bq_full_table_name
       }
   }

service.projects().models().versions().patch(
   name=name,
   body=logging_config,
   updateMask="requestLoggingConfig"
   ).execute()