# Labeled Patents - Vertex AI Pipeline

## Importing Auxiliary Libraries

In [1]:
import os
from datetime import datetime

#!pip install --upgrade kfp
import kfp
from google.cloud import aiplatform
from kfp.v2 import compiler
from kfp.v2.dsl import component
from kfp.v2.google import experimental
from kfp.v2.google.client import AIPlatformClient

print('Kubeflow pipelines version: {}'.format(kfp.__version__))

Kubeflow pipelines version: 1.7.1


## Setting Notebook Inputs

In [2]:
UUID = datetime.now().strftime('%y%m%d_%H%M%S') #str
PROJECT = 'qwiklabs-gcp-00-373ac55d0e0a'
REGION = 'us-central1'

BUCKET = 'patents_pipetest'
PDF_BUCKET_PATH = 'pdf'

RES_DATASET_NAME = 'docprocessing_iaa_' + UUID
RES_DATASET_ID = f'{PROJECT}.{RES_DATASET_NAME}'

TCN_MODEL_NAME = '2393478483993952256'
TCN_RESTABLE_NAME = f'{RES_DATASET_ID}.tcn'
TCN_RESTABLE_SCHEMA = """
[
{"name": "file", "field_type": "STRING", "mode": "REQUIRED", "description": "File path."},
 {"name": "subject", "field_type": "STRING", "mode": "REQUIRED", "description": "Predicted class."},
 {"name": "score", "field_type": "STRING", "mode": "REQUIRED", "description": "Confidence of the prediction."}
]
"""

ICN_MODEL_NAME = '8925034949820547072'
ICN_ENDPT_NAME = ''
ICN_RESTABLE_NAME = f'{RES_DATASET_ID}.icn'
ICN_RESTABLE_SCHEMA = """
[
 {"name":  "file", "field_type": "STRING", "mode": "REQUIRED", "description": "File path."},
 {"name": "label", "field_type": "STRING", "mode": "REQUIRED", "description": "Predicted class."},
 {"name": "score", "field_type": "STRING", "mode": "REQUIRED", "description": "Confidence of the prediction."}
]
"""

ODM_MODEL_NAME = '3409814256151953408'
ODM_ENDPT_NAME = '2074030773706424320'
ODM_RESTABLE_NAME = f'{RES_DATASET_ID}.odm'
ODM_RESTABLE_SCHEMA = """
[
 {"name": "file",  "field_type": "STRING", "mode": "REQUIRED", "description": "File path."},
 {"name": "label", "field_type": "STRING", "mode": "REQUIRED", "description": "Predicted class."},
 {"name": "score", "field_type":  "FLOAT", "mode": "REQUIRED", "description": "Confidence of the prediction."},
 {"name": "xmin",  "field_type":  "FLOAT", "mode": "REQUIRED", "description": "X coordinate of the top left corner."},
 {"name": "xmax",  "field_type":  "FLOAT", "mode": "REQUIRED", "description": "Y coordinate of the top left corner."},
 {"name": "ymin",  "field_type":  "FLOAT", "mode": "REQUIRED", "description": "X coordinate of the bottom right corner."},
 {"name": "ymax",  "field_type":  "FLOAT", "mode": "REQUIRED", "description": "Y coordinate of the bottom right corner."}
]
"""

src_path = "gs://2021_08_16_tcn_dev"
dst_path = "gs://2021_08_16_tcn_dev"



PIPELINE_NAME = 'process-pdf-patents-iago'
PIPELINE_ROOT = f"gs://{BUCKET}/labeled_patents/pipeline_root"
LOCAL_PIPELINE_PATH = './vertex_pipelines'
LOCAL_PIPELINE_JSON = os.path.join(LOCAL_PIPELINE_PATH, 'labeled_patents_pipeline2.json')

RESULTS_BQ_DATASET='demo_dataset'
RESULTS_OBJDET_TABLE='objdet'



MODEL_DISPLAY_NAME=f"labpat_model"
MACHINE_TYPE="n1-standard-16"
REPLICA_COUNT=1
DOCKER_IMAGE_URI_CREATE_BQDATASET="us-docker.pkg.dev/vertex-ai/training/tf-cpu.2-3:latest"


os.environ['PROJECT'] = PROJECT
os.environ['REGION'] = REGION
os.environ['BUCKET'] = BUCKET 
os.environ['PDF_BUCKET_PATH'] = PDF_BUCKET_PATH

**Copying some demo files into the Bucket**

In [3]:
!gsutil -m cp gs://2021_08_16_tcn_dev/*.pdf gs://$BUCKET/$PDF_BUCKET_PATH

Copying gs://2021_08_16_tcn_dev/computer_vision_1.pdf [Content-Type=application/pdf]...
Copying gs://2021_08_16_tcn_dev/med_tech_8.pdf [Content-Type=application/pdf]...
Copying gs://2021_08_16_tcn_dev/us_076.pdf [Content-Type=application/pdf]...    
/ [3/3 files][168.9 KiB/168.9 KiB] 100% Done                                    
Operation completed over 3 objects/168.9 KiB.                                    


## Defining Vertex AI Components

### Component 1: Performing OCR on PDFs

In [4]:
@component(packages_to_install=['google-cloud-storage',  'google-cloud-vision'])
def perform_ocr_on_pdfs(src_path: str, 
                        dst_path: str,
                        uuid: str,
                        project: str):
    
    # IMPORTS:
    import os
    import logging
    import traceback as tb
    import time
    from pathlib import Path
    from google.cloud import storage
    from google.cloud import vision
    # from google.cloud import aiplatform

    
    # AUXILIARY FUNCTIONS:
    def to_trace_str(e):
        return ''.join(tb.format_exception(None, e, e.__traceback__))   
    
    
    def dismantle_path(gcs_path):
        parts = Path(gcs_path).parts
        bucket_idx = 1 if parts[0].startswith("gs") else 0
        filename_idx = -1 if "." in parts[-1] else None

        bucket_name = parts[bucket_idx]
        filename = parts[filename_idx] if filename_idx else ""
        directory = "/".join(parts[bucket_idx:filename_idx] if filename_idx else parts[bucket_idx+1:])
        return bucket_name, directory, filename
    
    
    def ocr(src_path, dst_path, project):
        """Perform optical character recognition in pdf files.
        
        Args
            src_path
            dst_path
        
        Returns
            google.api_core.operation.Operation
            To check if done use method .done()
            
        Link to documentation:  
            https://googleapis.dev/python/vision/latest/vision_v1/types.html#google.cloud.vision_v1.types.OutputConfig
            https://cloud.google.com/vision/docs/pdf
        
        """
        try:
            logging.info("started optical character recognition")
            
            src_bucket_name, src_directory, _ = dismantle_path(src_path)
            dst_bucket_name, dst_directory, _ = dismantle_path(dst_path)
            
            storage_client = storage.Client(project=project)
            src_bucket = storage_client.bucket(src_bucket_name)
            dst_bucket = storage_client.bucket(dst_bucket_name)
            
            logging.info(f"src_bucket_name {src_bucket_name}, src_directory {src_directory}")

            blob_list = [blob for blob in list(src_bucket.list_blobs()) if \
                         os.path.basename(src_directory) in blob.name and \
                         blob.name.endswith(".pdf")]
            
            logging.info(f"found {len(blob_list)} pdf files in bucket {src_bucket_name}")

            client = vision.ImageAnnotatorClient()
            feature = vision.Feature(type_=vision.Feature.Type.DOCUMENT_TEXT_DETECTION)
            
            operations = []
            async_requests = []
            
            for b_idx, blob in enumerate(blob_list):
                gcs_source_uri = os.path.join(src_path, blob.name)
                gcs_destination_uri = os.path.join(dst_path, blob.name)

                # source
                gcs_source = vision.GcsSource(uri=gcs_source_uri)
                input_config = vision.InputConfig(gcs_source=gcs_source, mime_type='application/pdf')

                # destination
                gcs_destination = vision.GcsDestination(uri=gcs_destination_uri)
                output_config = vision.OutputConfig(gcs_destination=gcs_destination, batch_size=1)

                logging.info(f"started ocr for {b_idx} of {len(blob_list)} files")
                async_request = vision.AsyncAnnotateFileRequest(
                    features=[feature], 
                    input_config=input_config,
                    output_config=output_config
                )
                async_requests.append(async_request)

            operation = client.async_batch_annotate_files(requests=async_requests)
            return operation
            
        except Exception as e:
            logging.error(f"Error in method ocr: {to_trace_str(e)}")
            
            
    def create_text_files(gcs_path, project):
        try:
            # init bucket
            bucket_name, directory, _ = dismantle_path(gcs_path)
            storage_client = storage.Client(project=project)
            bucket = storage_client.bucket(bucket_name)
            blob_list = [blob for blob in list(bucket.list_blobs()) if \
                             os.path.basename(directory) in blob.name and \
                             blob.name.endswith("output-1-to-1.json")]
            
            for b_idx, blob in enumerate(blob_list):
                logging.info(f"creating {b_idx+1} of {len(blob_list)} text files")
                json_string = blob.download_as_string()
                response = json.loads(json_string)
                text = response['responses'][0]['fullTextAnnotation']['text'] 
                txt_path = blob.name.replace("output-1-to-1.json", ".txt")
                text_blob = bucket.blob(txt_path)
                text_blob.upload_from_string(text)
                
            logging.info("finished creating text files")
            
        except Exception as e:
            logging.error(f"Error in method create_text_files: {to_trace_str(e)}") 
            
    def get_extension(mime_type):
        if mime_type == "text/plain":
            return ".txt"
        elif mime_type == "image/png":
            return ".png"
        else:
            return ".txt"
    
    def create_jsonl(gcs_path, mime_type, filename,project):
        """create jsonl out of files in bucket
        
        Args
            gcs_path (str): bucket or dir where files are located
            mime_type (str): the files mimetype 
            filename (str): the jsonl filename
        
        Returns
            full path of jsonl
        """
        try:
            bucket_name, directory, _ = dismantle_path(gcs_path)
            storage_client = storage.Client(project=project)
            bucket = storage_client.bucket(bucket_name)
            extension = get_extension(mime_type)

            blob_list = [blob for blob in list(bucket.list_blobs()) if \
                             os.path.basename(directory) in blob.name and \
                             blob.name.endswith(extension)]

            jsonl_content = ""

            for b_idx, blob in enumerate(blob_list):
                full_path = os.path.join(gcs_path,blob.name)

                d = json.dumps(
                    {
                    "content": full_path,
                    "mimeType": mime_type
                    }
                )+"\n"

                jsonl_content = jsonl_content+d



            bucket.blob(filename).upload_from_string(jsonl_content)
            logging.info(f"uploaded jsonl {filename} to bucket {bucket_name}")

            return os.path.join(gcs_path,filename)
        
        except Exception as e:
            logging.error(f"Error in jsonl creation: {to_trace_str(e)}")
    
    
    def preprocess_ocr(src_path, dst_path, jsonl_filename, project):
        ocr_operation = ocr(src_path, dst_path, project)
        
        while not ocr_operation.done():
            logging.info("wait for ocr to finish")
            time.sleep(5)
            
        create_text_files(dst_path, project)
        return create_jsonl(gcs_path=dst_path, mime_type="text/plain", filename=jsonl_filename, project=project)
    
    
    
    # PIPELINE COMPONENT MAIN CODE:
    pass
#    logging.basicConfig(level=logging.INFO)
#    logger = logging.getLogger(__name__)
#     logging.info(f"Starting the processing of pdfs with the OCR functionality of Google Vision API.")
#     
#         
#     # save everything in the same bucket
#     dst_path = src_path
#     jsonl_filename_tcn = f"tcn_{uuid}.jsonl"
#         
#     # create ocr
#     jsonl_path_tcn = preprocess_ocr(src_path, src_path, jsonl_filename_tcn, project)
#     
#     # return path where jsonl with .txt files is saved
#     return jsonl_path_tcn 

### Component 2: PDF to PNG conversion

In [5]:
@component()
def transform_pdfs_into_png():
    pass

### Component 3: Creating a BigQuery dataset to save results

In [6]:
@component(packages_to_install=['google-cloud-bigquery'])
def create_bq_results_dataset(project: str, 
                              dataset_id: str):
    """loads csv data in storage to BQ"""
    # Send the dataset to the API for creation, with an explicit timeout.
    # Raises google.api_core.exceptions.Conflict if the Dataset already
    # exists within the project.
    import logging
    from google.cloud import bigquery

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__name__)
    logging.info(f"Starting the creation of a BigQuery dataset to store analyses results.")

    bq = bigquery.Client(project=project)
    try:
        dataset = bigquery.Dataset(dataset_id)
        dataset.location = "US"
        bq.get_dataset(dataset_id)  # Make an API request.
        logging.info("Dataset {} already exists".format(dataset_id))
    except Exception as e:
        logging.info("Dataset {} is not found".format(dataset_id))
        dataset = bq.create_dataset(dataset, timeout=30)  # Make an API request.
        dataset.location = "US"
        logging.info("Created dataset {}.{}".format(bq.project, dataset.dataset_id))
    finally:
        logging.info(f"Finished creating or loading dataset {dataset_id}")

### Component 4.1: Creating text classification results table

In [7]:
@component(packages_to_install=['google-cloud-bigquery'])
def create_text_class_results_table(project:str, 
                                    dataset_id:str, 
                                    table_id:str, 
                                    schema:str):
    
    import ast
    from google.cloud import bigquery
    import logging
    
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__name__)
    logging.info(f"Starting the creation of a BQ table to store text classification results.")
    
    bq = bigquery.Client(project=project)
    
    try:
        dataset = bigquery.Dataset(dataset_id)
        dataset.location = "US"
        bq.get_dataset(dataset_id)  # Make an API request.
        logging.info("Dataset {} already exists".format(dataset_id))
    except Exception as e:
        logging.info("Dataset {} is not found".format(dataset_id))
        dataset = bq.create_dataset(dataset, timeout=30)  # Make an API request.
        dataset.location = "US"
        logging.info("Created dataset {}.{}".format(bq.project, dataset.dataset_id))
    finally:
        # create table
        schema = [bigquery.SchemaField(**dct) for dct in ast.literal_eval(schema)]
        table = bigquery.Table(table_id, schema=schema)
        table = bq.create_table(table)
        logging.info(f"Created table {table_id}")
    

### Component 4.2: Performing text classification

In [8]:
@component()
def text_class_predict():
    pass

### Component 4.3: Storing text classification results

In [9]:
@component()
def store_text_class_results():
    pass

### Component 5.1: Creating image classification results table

In [10]:
@component(packages_to_install=['google-cloud-bigquery'])
def create_img_class_results_table(project:str, 
                                    dataset_id:str, 
                                    table_id:str, 
                                    schema:str):
    
    import ast
    from google.cloud import bigquery
    import logging
    
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__name__)
    logging.info(f"Starting the creation of a BQ table to store image classification results.")
    
    bq = bigquery.Client(project=project)
    
    try:
        dataset = bigquery.Dataset(dataset_id)
        dataset.location = "US"
        bq.get_dataset(dataset_id)  # Make an API request.
        logging.info("Dataset {} already exists".format(dataset_id))
    except Exception as e:
        logging.info("Dataset {} is not found".format(dataset_id))
        dataset = bq.create_dataset(dataset, timeout=30)  # Make an API request.
        dataset.location = "US"
        logging.info("Created dataset {}.{}".format(bq.project, dataset.dataset_id))
    finally:
        # create table
        schema = [bigquery.SchemaField(**dct) for dct in ast.literal_eval(schema)]
        table = bigquery.Table(table_id, schema=schema)
        table = bq.create_table(table)
        logging.info(f"Created table {table_id}")


### Component 5.2: Performing image classification

In [11]:
@component()
def img_class_predict():
    pass

### Component 5.3: Storing image classification results

In [12]:
@component()
def store_img_class_results():
    pass

### Component 6.1: Creating object detection results table

In [13]:
@component(packages_to_install=['google-cloud-bigquery'])
def create_obj_detection_results_table(project:str, 
                                       dataset_id:str, 
                                       table_id:str, 
                                       schema:str):
    
    import ast
    from google.cloud import bigquery
    import logging
    
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__name__)
    logging.info(f"Starting the creation of a BQ table t store object detection results.")
    
    
    bq = bigquery.Client(project=project)
    
    try:
        dataset = bigquery.Dataset(dataset_id)
        dataset.location = "US"
        bq.get_dataset(dataset_id)  # Make an API request.
        logging.info("Dataset {} already exists".format(dataset_id))
    except Exception as e:
        logging.info("Dataset {} is not found".format(dataset_id))
        dataset = bq.create_dataset(dataset, timeout=30)  # Make an API request.
        dataset.location = "US"
        logging.info("Created dataset {}.{}".format(bq.project, dataset.dataset_id))
    finally:
        # create table
        schema = [bigquery.SchemaField(**dct) for dct in ast.literal_eval(schema)]
        table = bigquery.Table(table_id, schema=schema)
        table = bq.create_table(table)
        logging.info(f"Created table {table_id}")
    

### Component 6.2: Performing object detection

In [14]:
@component()
def obj_detection_predict():
    pass


@component(packages_to_install=['google-cloud-bigquery', 'google-cloud-storage',  'google-cloud-aiplatform'])
def perform_object_detection(project: str,
                             region: str,
                             bucket: str,
                             img_blob: str,
                             objdet_endpoint: str,
                             dataset_name: str,
                             table_name: str):
    
    # IMPORTS     
    import os
    import tempfile
    import logging
    import traceback as tb
    from google.cloud import bigquery
    from google.cloud import storage
    from fnmatch import fnmatch
    import base64
    from google.cloud import aiplatform
    from google.cloud.aiplatform.gapic.schema import predict
    
    
    # AUXILIARY LIBRARIES
    def get_bucket_file_list(bucket_name, fname_template='*'):
        '''!@brief Function that returns the list of files in a bucket.
        @param bucket (string) Bucket name.
        @param fname_template (string) Template for filtering blob names 
        that supports Unix shell-style wildcards. For more info: 
        https://docs.python.org/3/library/fnmatch.html

        @return (list of srtings) List of blob names in a bucket which 
        fullfills template structure.
        '''
        storage_client = storage.Client()
        blobs = storage_client.list_blobs(bucket_name)
        blob_lst = [blob.name for blob in blobs]  
        file_lst = [fname for fname in blob_lst if fnmatch(fname, fname_template)]

        return file_lst
    
    
    def predict_image_classification_sample(
        project: str,
        endpoint_id: str,
        filename: str,
        location: str = "us-central1",
        api_endpoint: str = "us-central1-aiplatform.googleapis.com"):
        
        # The AI Platform services require regional API endpoints.
        client_options = {"api_endpoint": api_endpoint}
        client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)
        with open(filename, "rb") as f:
            file_content = f.read()

        # The format of each instance should conform to the deployed model's prediction input schema.
        encoded_content = base64.b64encode(file_content).decode("utf-8")
        instance = predict.instance.ImageObjectDetectionPredictionInstance(content=encoded_content).to_value()
        instances = [instance]
        parameters = predict.params.ImageObjectDetectionPredictionParams(confidence_threshold=0.5, max_predictions=5).to_value()
        endpoint = client.endpoint_path(project=project, location=location, endpoint=endpoint_id)
        response = client.predict(endpoint=endpoint, instances=instances, parameters=parameters)
        predictions = response.predictions
        return [dict(prediction) for prediction in predictions]
    

    # MAIN BODY
    client = bigquery.Client()
    files = get_bucket_file_list(bucket_name=f'{bucket}',
                                 fname_template=img_blob)
    
    predictions = []
    for file in files:             
        # Downloading the file as a temporal file:
        storage_client = storage.Client()
        bucket = storage_client.bucket(project)
        blob = bucket.blob(file)
        _, path = tempfile.mkstemp()
        blob.download_to_filename(path + '.png')    

        # Obtaining online prediction:
        preds = predict_image_classification_sample(
            project=project,
            endpoint_id=objdet_endpoint,
            filename=f'{path}.png',
            location=region,
            api_endpoint='us-central1-aiplatform.googleapis.com')

        # Parsing prediction:
        objdet_pred = preds[0]['displayNames'][0]
        objdet_confidence = preds[0]['confidences'][0]
        objdet_xmin, objdet_xmax = preds[0]['bboxes'][0][0], preds[0]['bboxes'][0][1]
        objdet_ymin, objdet_ymax = preds[0]['bboxes'][0][2], preds[0]['bboxes'][0][3]
        
        # Storing prediction into the BQ table:
        predictions.append(
            {'file_name': f'{file}'.split('/')[-1],
             'objdet_pred': f'{objdet_pred}',
             'objdet_confidence': f'{objdet_confidence}',
             'objdet_xmin': f'{objdet_xmin}',
             'objdet_xmax': f'{objdet_xmax}',
             'objdet_ymin': f'{objdet_ymin}',
             'objdet_ymax': f'{objdet_ymax}'}
        )


        


### Component 6.3: Storing object detection results

In [15]:
@component()
def store_obj_detection_results():
    pass

#         table_id = f'{project}.{dataset_name}.{table_name}'

#         errors = client.insert_rows_json(table_id, rows_to_insert)  # Make an API request.
#         if errors == []:
#             print("New rows have been added.")
#         else:
#             print("Encountered errors while inserting rows: {}".format(errors))
#         os.remove(f'{path}.png')
#         os.remove(path)    

## Creating and Compiling the Vertex AI Pipeline

In [20]:
@kfp.dsl.pipeline(name=PIPELINE_NAME, 
                  description='Pipeline that process patents pdf files.',
                  pipeline_root=PIPELINE_ROOT)

def pipeline():
    # Preprocessing pipeline:
    perform_ocr_on_pdfs_task = perform_ocr_on_pdfs(
    src_path=f"gs://{BUCKET}/{PDF_BUCKET_PATH}", 
    dst_path=f"gs://{BUCKET}/{PDF_BUCKET_PATH}",
    uuid=UUID,
    project=PROJECT)
    
    transform_pdfs_into_png_task = transform_pdfs_into_png()
    transform_pdfs_into_png_task.after(perform_ocr_on_pdfs_task)

    create_bq_results_dataset_task = create_bq_results_dataset(project=PROJECT,
                                                               dataset_id=RES_DATASET_ID)
    create_bq_results_dataset_task.after(transform_pdfs_into_png_task)
    
    # Text classification pipeline:
    create_text_class_results_table_task = create_text_class_results_table(project=PROJECT, 
                                                                           dataset_id=RES_DATASET_ID, 
                                                                           table_id=TCN_RESTABLE_NAME, 
                                                                           schema=TCN_RESTABLE_SCHEMA)
    create_text_class_results_table_task.after(create_bq_results_dataset_task)
    
    text_class_predict_task = text_class_predict()
    text_class_predict_task.after(create_text_class_results_table_task)
    
    store_text_class_results_task = store_text_class_results()
    store_text_class_results_task.after(text_class_predict_task)
    
    # Image classification pipeline:
    create_img_class_results_table_task = create_img_class_results_table(project=PROJECT, 
                                                                         dataset_id=RES_DATASET_ID, 
                                                                         table_id=ICN_RESTABLE_NAME, 
                                                                         schema=ICN_RESTABLE_SCHEMA)
    create_img_class_results_table_task.after(create_bq_results_dataset_task)
    
    img_class_predict_task = img_class_predict()
    img_class_predict_task.after(create_img_class_results_table_task)
    
    store_img_class_results_task = store_img_class_results()
    store_img_class_results_task.after(img_class_predict_task)
        
    # Object detection pipeline:
    create_obj_detection_results_table_task = create_obj_detection_results_table(project=PROJECT, 
                                                                                 dataset_id=RES_DATASET_ID, 
                                                                                 table_id=ODM_RESTABLE_NAME, 
                                                                                 schema=ODM_RESTABLE_SCHEMA)
    create_obj_detection_results_table_task.after(create_bq_results_dataset_task)
    
    obj_detection_predict_task = obj_detection_predict()
    obj_detection_predict_task.after(create_obj_detection_results_table_task)
    
    store_obj_detection_results_task = store_obj_detection_results()
    store_obj_detection_results_task.after(obj_detection_predict_task)
    
    

In [21]:
if not os.path.isdir(LOCAL_PIPELINE_PATH):
    os.mkdir(LOCAL_PIPELINE_PATH)

compiler.Compiler().compile(
    pipeline_func=pipeline,
    package_path=LOCAL_PIPELINE_JSON
)

## Launching the Vertex AI Pipeline

In [22]:
# Instantiating an API client object:
# TODO: use the new Vertex AI.
api_client = AIPlatformClient(
    project_id=PROJECT,
    region=REGION,
)

In [23]:
response = api_client.create_run_from_job_spec(
    LOCAL_PIPELINE_JSON,
    pipeline_root=f"{PIPELINE_ROOT}",
)