# Labeled Patents - Vertex AI Pipeline

## Importing Auxiliary Libraries

In [1]:
import os

#!pip install --upgrade kfp
import kfp
from google.cloud import aiplatform
from kfp.v2 import compiler
from kfp.v2.dsl import component
from kfp.v2.google import experimental
from kfp.v2.google.client import AIPlatformClient

print('Kubeflow pipelines version: {}'.format(kfp.__version__))

Kubeflow pipelines version: 1.7.1


## Setting Notebook Inputs

In [2]:
PROJECT='qwiklabs-gcp-00-373ac55d0e0a'
REGION = 'us-central1'
BUCKET='qwiklabs-gcp-00-373ac55d0e0a'

PIPELINE_NAME = 'process-pdf-patents'
PIPELINE_ROOT = f"gs://{BUCKET}/labeled_patents/pipeline_root"
LOCAL_PIPELINE_PATH = './vertex_pipelines'
LOCAL_PIPELINE_JSON = os.path.join(LOCAL_PIPELINE_PATH, 'labeled_patents_pipeline2.json')

RESULTS_BQ_DATASET='demo_dataset'
RESULTS_OBJDET_TABLE='objdet'



MODEL_DISPLAY_NAME=f"labpat_model"
MACHINE_TYPE="n1-standard-16"
REPLICA_COUNT=1
DOCKER_IMAGE_URI_CREATE_BQDATASET="us-docker.pkg.dev/vertex-ai/training/tf-cpu.2-3:latest"

## Defining Vertex AI Components

### Component 1: Performing OCR on PDFs

In [3]:
@component()
def perform_ocr_on_pdfs():
    pass

### Component 2: PDF to PNG conversion

In [4]:
@component()
def transform_pdfs_into_png():
    pass

### Component 3: Creating a BigQuery dataset to save results

In [5]:
@component()
def create_bq_results_dataset():
    pass

### Component 4.1: Creating image classification results table

In [6]:
@component()
def create_text_class_results_table():
    pass

### Component 4.2: Performing text classification

In [7]:
@component()
def text_class_predict():
    pass

### Component 4.3: Storing text classification results

In [8]:
@component()
def store_text_class_results():
    pass

### Component 5.1: Creating image classification results table

In [9]:
@component()
def create_img_class_results_table():
    pass

### Component 5.2: Performing image classification

In [10]:
@component()
def img_class_predict():
    pass

### Component 5.3: Storing image classification results

In [11]:
@component()
def store_img_class_results():
    pass

### Component 6.1: Creating object detection results table

In [12]:
@component()
def create_obj_detection_results_table():
    pass

### Component 6.2: Performing object detection

In [13]:
@component()
def obj_detection_predict():
    pass

### Component 6.3: Storing object detection results

In [14]:
@component()
def store_obj_detection_results():
    pass

## Creating and Compiling the Vertex AI Pipeline

In [18]:
@kfp.dsl.pipeline(name=PIPELINE_NAME, 
                  description='Pipeline that process patents pdf files.',
                  pipeline_root=PIPELINE_ROOT)

def pipeline():
    # Preprocessing pipeline:
    perform_ocr_on_pdfs_task = perform_ocr_on_pdfs()
    
    transform_pdfs_into_png_task = transform_pdfs_into_png()
    transform_pdfs_into_png_task.after(perform_ocr_on_pdfs_task)

    create_bq_results_dataset_task = create_bq_results_dataset()
    create_bq_results_dataset_task.after(transform_pdfs_into_png_task)
    
    # Text classification pipeline:
    create_text_class_results_table_task = create_text_class_results_table()
    create_text_class_results_table_task.after(create_bq_results_dataset_task)
    
    text_class_predict_task = text_class_predict()
    text_class_predict_task.after(create_text_class_results_table_task)
    
    store_text_class_results_task = store_text_class_results()
    store_text_class_results_task.after(text_class_predict_task)
    
    # Image classification pipeline:
    create_img_class_results_table_task = create_img_class_results_table()
    create_img_class_results_table_task.after(create_bq_results_dataset_task)
    
    img_class_predict_task = img_class_predict()
    img_class_predict_task.after(create_img_class_results_table_task)
    
    store_img_class_results_task = store_img_class_results()
    store_img_class_results_task.after(img_class_predict_task)
        
    # Object detection pipeline:
    create_obj_detection_results_table_task = create_obj_detection_results_table()
    create_obj_detection_results_table_task.after(create_bq_results_dataset_task)
    
    obj_detection_predict_task = obj_detection_predict()
    obj_detection_predict_task.after(create_obj_detection_results_table_task)
    
    store_obj_detection_results_task = store_obj_detection_results()
    store_obj_detection_results_task.after(obj_detection_predict_task)
    
    

In [19]:
if not os.path.isdir(LOCAL_PIPELINE_PATH):
    os.mkdir(LOCAL_PIPELINE_PATH)

compiler.Compiler().compile(
    pipeline_func=pipeline,
    package_path=LOCAL_PIPELINE_JSON
)

## Launching the Vertex AI Pipeline

In [20]:
# Instantiating an API client object:
# TODO: use the new Vertex AI.
api_client = AIPlatformClient(
    project_id=PROJECT,
    region=REGION,
)

In [21]:
response = api_client.create_run_from_job_spec(
    LOCAL_PIPELINE_JSON,
    pipeline_root=f"{PIPELINE_ROOT}",
)