# Notes:

---
## Create json file for predictions:

In [None]:
%%bigquery pred
SELECT *
FROM ML.PREDICT(MODEL `statmike-mlops.digits.digits_lr`,(
    SELECT *
    FROM `statmike-mlops.digits.digits_prepped`)
  )

In [None]:
newob = pred.loc[:0,'p0':'p63'].to_dict(orient='records')
newob = {'instances':newob}
newob

In [None]:
import json
with open('newob.json','w') as f:
    json.dump(newob,f)

---
### KFP Example

https://cloud.google.com/vertex-ai/docs/pipelines/build-pipeline#google-cloud-components

https://github.com/kubeflow/pipelines/tree/master/components/google-cloud

https://codelabs.developers.google.com/vertex-pipelines-intro#4

In [1]:
from google.cloud import aiplatform
from datetime import datetime
import kfp
import kfp.v2.dsl as dsl
from google_cloud_pipeline_components import aiplatform as gcc_aip

In [43]:
# Locations
REGION = 'us-central1'
PROJECT_ID='statmike-mlops'
BUCKET_NAME='gs://{}/digits/model/02c_automl'.format(PROJECT_ID)
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
EXPERIMENT_NAME = '02C_AUTOML'
JOB_NAME = EXPERIMENT_NAME+'_'+TIMESTAMP
MODEL_DIR = '{}/{}'.format(BUCKET_NAME, JOB_NAME)
PIPELINE_ROOT = f"{MODEL_DIR}/pipeline_root/"
PARENT = "projects/" + PROJECT_ID + "/locations/" + REGION

# files
PACKAGE = EXPERIMENT_NAME

Give service account roles/storage.objectAdmin permissions: Console > IAM > Select account projectnumber-compute@developer.gserviceaccount.com > edit - give it the role

In [38]:
SERVICE_ACCOUNT = !gcloud config list --format='value(core.account)' 
SERVICE_ACCOUNT = SERVICE_ACCOUNT[0]
SERVICE_ACCOUNT

'691911073727-compute@developer.gserviceaccount.com'

---

In [9]:
aiplatform.init(project=PROJECT_ID, location=REGION)

In [10]:
!rm -rf {PACKAGE}
!mkdir {PACKAGE}

In [11]:
@dsl.component(base_image="python:3.9")
def product_name(text: str) -> str:
    return text

In [12]:
from typing import NamedTuple

@dsl.component(packages_to_install=["emoji"])
def emoji(text: str) -> NamedTuple("Outputs",
    [
        ("emoji_text", str),  # Return parameters
        ("emoji", str),
    ]
):
    import emoji
    emoji_text = text
    emoji_str = emoji.emojize(':' + emoji_text + ':', use_aliases=True)
    print("output one: {}; output_two: {}".format(emoji_text, emoji_str))
    return (emoji_text, emoji_str)

In [13]:
@dsl.component
def build_sentence(product: str, emoji: str, emojitext: str) -> str:
    print("We completed the pipeline, hooray!")
    end_str = product + " is "
    if len(emoji) > 0:
        end_str += emoji
    else:
        end_str += emojitext
    return(end_str)

In [14]:
@kfp.dsl.pipeline(name="hello-world", description="An intro pipeline", pipeline_root=PIPELINE_ROOT)
def intro_pipeline(text: str = "Vertex Pipelines", emoji_str: str = "sparkles"):
    product_task = product_name(text)
    emoji_task = emoji(emoji_str)
    consumer_task = build_sentence(
        product_task.output,
        emoji_task.outputs["emoji"],
        emoji_task.outputs["emoji_text"]
    )

In [15]:
kfp.v2.compiler.Compiler().compile(
    pipeline_func=intro_pipeline, package_path=f"{PACKAGE}/intro_pipeline_job.json"
)

In [16]:
!gsutil cp {PACKAGE}/*.json $PIPELINE_ROOT

Copying file://automl_02c/intro_pipeline_job.json [Content-Type=application/json]...
/ [1 files][  8.0 KiB/  8.0 KiB]                                                
Operation completed over 1 objects/8.0 KiB.                                      


In [17]:
plJob = aiplatform.PipelineJob(
    display_name=JOB_NAME,
    template_path=f"{PIPELINE_ROOT}intro_pipeline_job.json", #can be gs:, move file there first
    pipeline_root=PIPELINE_ROOT
)

In [18]:
response = plJob.run(service_account = SERVICE_ACCOUNT)

INFO:google.cloud.aiplatform.pipeline_jobs:Creating PipelineJob
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob created. Resource name: projects/691911073727/locations/us-central1/pipelineJobs/hello-world-20210907225736
INFO:google.cloud.aiplatform.pipeline_jobs:To use this PipelineJob in another session:
INFO:google.cloud.aiplatform.pipeline_jobs:pipeline_job = aiplatform.PipelineJob.get('projects/691911073727/locations/us-central1/pipelineJobs/hello-world-20210907225736')
INFO:google.cloud.aiplatform.pipeline_jobs:View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/hello-world-20210907225736?project=691911073727
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob projects/691911073727/locations/us-central1/pipelineJobs/hello-world-20210907225736 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob projects/691911073727/locations/us-central1/pipelineJobs/hello-world-202109072

---

# 01 non-SQL versions

---
## Create Dataset

List BigQuery datasets in the project:

In [8]:
datum=[]
for ds in list(bq.list_datasets()): datum.append(ds.dataset_id)
print(datum)

[]


Create the dataset if missing:

In [9]:
if DATASET_ID not in datum:
    dataset = bigquery.Dataset(bigquery.dataset.DatasetReference(PROJECT_ID, DATASET_ID))
    dataset.location = REGION
    dataset = bq.create_dataset(DATASET_ID)

In [10]:
print(dataset)

Dataset(DatasetReference('statmike-mlops', 'digits'))


---
## Create Table

Load data to a table in the dataset:
- define job inputs
- run load job
- review resulting table

In [12]:
dataset_ref = bq.dataset(DATASET_ID)
table_ref = dataset_ref.table(TABLE_ID)

job_config = bigquery.LoadJobConfig(write_disposition="WRITE_TRUNCATE")
job_config.source_format = bigquery.SourceFormat.CSV
job_config.autodetect = True

job = bq.load_table_from_uri(URI+f'/{DATANAME}.csv', table_ref, job_config=job_config)
print("Starting job {}".format(job.job_id))
job.result()
      
bq_table = bq.get_table(table_ref) 
print("Loaded {} rows and {} columns to {}.".format(bq_table.num_rows,len(bq_table.schema),bq_table))

Starting job e2a69d0a-bb03-4a88-b8f3-42d2c58a367c
Loaded 1797 rows and 66 columns to Table(TableReference(DatasetReference('statmike-mlops', 'digits'), 'digits')).


Use the BigQuery Jupyter Magic to review a few records:

In [14]:
%%bigquery
SELECT * FROM `digits.digits` LIMIT 5

Query complete after 0.00s: 100%|██████████| 1/1 [00:00<00:00, 511.06query/s]                          
Downloading: 100%|██████████| 5/5 [00:00<00:00,  6.23rows/s]


Unnamed: 0,p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,...,p56,p57,p58,p59,p60,p61,p62,p63,target,target_OE
0,0.0,5.0,16.0,15.0,5.0,0.0,0.0,0.0,0.0,2.0,...,0.0,6.0,16.0,16.0,16.0,16.0,7.0,0.0,2,Even
1,0.0,5.0,16.0,12.0,1.0,0.0,0.0,0.0,0.0,5.0,...,0.0,8.0,16.0,16.0,16.0,16.0,4.0,0.0,2,Even
2,0.0,5.0,15.0,16.0,6.0,0.0,0.0,0.0,0.0,11.0,...,0.0,6.0,16.0,16.0,16.0,13.0,3.0,0.0,2,Even
3,0.0,4.0,15.0,15.0,8.0,0.0,0.0,0.0,0.0,8.0,...,0.0,7.0,14.0,11.0,0.0,0.0,0.0,0.0,2,Even
4,0.0,6.0,16.0,16.0,16.0,15.0,10.0,0.0,0.0,9.0,...,0.0,9.0,16.0,11.0,0.0,0.0,0.0,0.0,5,Odd


---
# Getting AutoML prediction Schema:

### Get Predictions: Python Client

In [143]:
endpoint = aiplatform.Endpoint.list(filter=f'display_name={NOTEBOOK}_{DATANAME}_{TIMESTAMP}_endpoint')[0]
endpoint.display_name

'02c_fraud_20210922094451_endpoint'

Need to understand the format of variables that the predictions expect.  AutoML may convert the type of some variables. The following cells retrieve the model from the endpoint and its schemata:

In [144]:
endpoint.list_models()[0].model

'projects/691911073727/locations/us-central1/models/976639004348776448'

In [145]:
model = aiplatform.Model(model_name = endpoint.list_models()[0].model)

In [146]:
url = model.predict_schemata.instance_schema_uri

In [147]:
import yaml
import urllib

predict_schema = urllib.request.urlopen(url)
predict_schema = yaml.load(predict_schema, Loader = yaml.BaseLoader)

In [148]:
predict_schema['properties']

{'Time': {'nullable': 'true', 'type': 'string'},
 'V1': {'nullable': 'true', 'type': 'number'},
 'V2': {'nullable': 'true', 'type': 'number'},
 'V3': {'nullable': 'true', 'type': 'number'},
 'V4': {'nullable': 'true', 'type': 'number'},
 'V5': {'nullable': 'true', 'type': 'number'},
 'V6': {'nullable': 'true', 'type': 'number'},
 'V7': {'nullable': 'true', 'type': 'number'},
 'V8': {'nullable': 'true', 'type': 'number'},
 'V9': {'nullable': 'true', 'type': 'number'},
 'V10': {'nullable': 'true', 'type': 'number'},
 'V11': {'nullable': 'true', 'type': 'number'},
 'V12': {'nullable': 'true', 'type': 'number'},
 'V13': {'nullable': 'true', 'type': 'number'},
 'V14': {'nullable': 'true', 'type': 'number'},
 'V15': {'nullable': 'true', 'type': 'number'},
 'V16': {'nullable': 'true', 'type': 'number'},
 'V17': {'nullable': 'true', 'type': 'number'},
 'V18': {'nullable': 'true', 'type': 'number'},
 'V19': {'nullable': 'true', 'type': 'number'},
 'V20': {'nullable': 'true', 'type': 'number'},
