#### This is an tutorial on using the bodo platform sdk. It will also guide you on how to trigger/ schedule Bodo jobs on the platform, from either your laptop, Lambda, cron etc.

#### Step 1
Install Bodo SDK , you can installing simply by using pip

In [2]:
!pip install bodosdk

Collecting bodosdk
  Downloading bodosdk-1.2.0-py3-none-any.whl (34 kB)
Collecting pyjwt~=2.3.0
  Using cached PyJWT-2.3.0-py3-none-any.whl (16 kB)
Collecting pydantic~=1.9
  Downloading pydantic-1.10.2-cp39-cp39-macosx_10_9_x86_64.whl (3.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
Collecting requests~=2.27.1
  Using cached requests-2.27.1-py2.py3-none-any.whl (63 kB)
Collecting charset-normalizer~=2.0.0
  Using cached charset_normalizer-2.0.12-py3-none-any.whl (39 kB)
Installing collected packages: pyjwt, pydantic, charset-normalizer, requests, bodosdk
  Attempting uninstall: charset-normalizer
    Found existing installation: charset-normalizer 2.1.0
    Uninstalling charset-normalizer-2.1.0:
      Successfully uninstalled charset-normalizer-2.1.0
  Attempting uninstall: requests
    Found existing installation: requests 2.28.1
    Uninstalling requests-2.28.1:
      Successfully uninst

#### Step 2
Create an access token for your workspace and reference it here. check the short video below 

In [17]:
%%HTML
<div align="center">
<iframe align="middle" width="700" height="400" src="https://www.youtube.com/embed/qAPZba0cQQg" frameborder="0" allowfullscreen></iframe>
</div>

#### Generate an SDK client using the API token, as below

In [36]:
from bodosdk.models import WorkspaceKeys, JobResponse,WorkspaceSource,JobCluster,ClusterStatus
from bodosdk.client import get_bodo_client

from bodosdk.models.job import JobClusterDefinition, JobDefinition

import os

with open("../credentials/bodo.creds","r") as f:
    for line in f:
        if(line.startswith("#")):
            continue
        else:
            args=line.strip().split("=")
            os.environ[args[0]]=args[1]

def getBodoClient():
    keys = WorkspaceKeys(
        client_id=os.environ["key"],
        secret_key=os.environ["secret"]
    )
    return get_bodo_client(keys, print_logs=False)

client = getBodoClient()

#### Once you have an instance of the Bodo SDK Client, you can use this client to platform operations.
#### Lets get a list of clusters from the platform and their status.

In [37]:
import pandas as pd
def listClusters(client):
    resp=client.cluster.list()
    clusters={"name":[],"status":[],"uuid":[],"instance_type":[],"workers_quantity":[]}
    for cluster in resp:
        clusters["uuid"].append(cluster.uuid)
        clusters["name"].append(cluster.name)
        clusters["status"].append(cluster.status)
        clusters["instance_type"].append(cluster.instance_type)
        clusters["workers_quantity"].append(cluster.workers_quantity)
    clusterdf=pd.DataFrame(clusters)
    return clusterdf
clusterdf = listClusters(client)
clusterdf

Unnamed: 0,name,status,uuid,instance_type,workers_quantity
0,cluster-exampleJob,ClusterStatus.NEW,917fdccc-9c6f-4acf-839f-14d9e6ece98c,c5.2xlarge,1
1,Snowflake-demo-v1,ClusterStatus.PAUSED,98b2e2d0-75c1-4bf9-9830-1ffbe52fe657,c5.2xlarge,2
2,clustersmall,ClusterStatus.PAUSED,1a90d8ec-b452-47fe-9ad1-85e59c439cab,c5.2xlarge,2
3,cluster20225,ClusterStatus.RUNNING,b2460a46-44ae-46ed-9bbe-511543b66979,c5.2xlarge,2


#### Lets Start a cluster that is paused currently
#### copy the uuid of one of the clusters from the list above and paste in the code below

In [38]:
clusterId="b2460a46-44ae-46ed-9bbe-511543b66979"

In [8]:
cluster=client.cluster.resume(clusterId)

In [9]:
client.cluster.get(clusterId).status

<ClusterStatus.RESUMING: 'RESUMING'>

In [10]:
### check the status in a loop till the cluster is running. In the future, the api will have a blocking method for resume.
import time
def checkStatus(client,clusterId):
    while(True):
        clusterstatus = client.cluster.get(clusterId).status
        print(clusterstatus)
        if(clusterstatus==ClusterStatus.RUNNING or clusterstatus==ClusterStatus.PAUSING or clusterstatus==ClusterStatus.PAUSED):
            break
        time.sleep(2)
checkStatus(client,clusterId)


ClusterStatus.RESUMING
ClusterStatus.RUNNING


### Pause a cluster

In [11]:
client.cluster.pause(clusterId)
checkStatus(client,clusterId)

ClusterStatus.INPROGRESS
ClusterStatus.PAUSING


#### Get list of available images ami, for creating clusters using SDK
#### call the get_available_images method of cluster SDK , pass the region where you want the cluster to be created, and you will get a list of available images.
#### the image will correspond to a bodo version, this can be used to launch a bodo cluster programmatically with a particular bodo version

In [13]:
import pandas as pd
def listImages(client,region):
    
    resp = client.cluster.get_available_images("us-east-2")
    images={"ami":[],"bodo_version":[]}
    for image in resp.keys():
        images["ami"].append(resp[image].image_id)
        images["bodo_version"].append(resp[image].bodo_version)
    df=pd.DataFrame(images)
    return df
images = listImages(client,"us-east-2")
images
    

Unnamed: 0,ami,bodo_version
0,ami-02e3a743ce609ad86,2022.8
1,ami-0d790d4cf8f8dac5b,2022.7
2,ami-0b49e47df3cefe51e,2022.6.2
3,ami-0fc5e82c9f7c572f9,2022.6
4,ami-03d1638bccc8c357b,2022.05.7 (Iceberg Alpha)
5,ami-02e0871982f31ed86,2022.05.8
6,ami-077ffa6e2fb413c7e,2022.05.7
7,ami-07a2e9276a352e411,2022.05.6
8,ami-0e6823b4142b81319,2022.05.4 SQL
9,ami-05af5bb246896bf9e,2022.05.4


#### Lets use the bodo sdk to run a job.
## There are two ways to run jobs
## 1. you can, submit the job with a cluster definition. Bodo platfrom will use the cluster definition to start a new cluster and run the job on this cluster. After the job finishes, cluster will be removed
## 2. Submit job to an existing cluster by passing the uuid of the cluster in cluster definition. 

## we will use both approaches to run a job here.


In [4]:
### Run a job with a new cluster Definition 
from datetime import date
def runJob(client):
    today = date.today()
    job_definition = JobDefinition(
        name=f'exampleJob-{today}-2', ### name of the job run instance , from a use case perspective ex. "daily eod report job" .
        args='example.py', ### the python script to run,
        source_config=WorkspaceSource(
            path='/shared/11-BodoSDK/' ### absolute path for the python script, this folder will be added to PYTHONPATH
        ),
    cluster_object=JobClusterDefinition(
        instance_type='c5.2xlarge',
        accelerated_networking=False,
        image_id='ami-0d790d4cf8f8dac5b', ### the image to use, this is for helping pick a bodo version, look at the image list section to see the versions and ami-ids
        workers_quantity=2
    ),
        variables=[],
        timeout=10,
        retries=3,
        retries_delay=0,
        retry_on_timeout=False
    )
    resp = client.job.create(job_definition)
    print(resp)
    uuid=str(resp.uuid)
    result = waitforJobFinish(uuid,client)
    #return result


def success_callback(job):
    print('Job has finished')
    return job


def waitforJobFinish(uuid,client):
    waiter = client.job.get_waiter()
    return waiter.wait(uuid, on_success=success_callback)

if __name__ == "__main__":


    
    result = runJob(client)
    print(result)

uuid=UUID('d95702a1-e5af-4d35-8c42-8c4498001de3') status=<JobStatus.NEW: 'NEW'> name='exampleJob-2022-09-14-2' args='example.py' variables={} source_config=WorkspaceSource(type=<JobSourceType.WORKSPACE: 'WORKSPACE'>, path='/shared/11-BodoSDK/') cluster_config=JobClusterDefinition(instance_type='c5.2xlarge', workers_quantity=2, accelerated_networking=False, image_id='ami-0d790d4cf8f8dac5b', bodo_version=None) cluster=JobClusterResponse(uuid='f93e6c2f-717e-4d0f-9c2b-2495302d5c92', name='cluster-exampleJob-2022-09-14-2', instance_type='c5.2xlarge', workers_quantity=2, accelerated_networking=False, bodo_version='2022.7', image_id='ami-0d790d4cf8f8dac5b') timeout=10 retries=3 retries_delay=0 retry_on_timeout=False
None


In [43]:
### Run a job with a new cluster Definition 
from datetime import date
from uuid import UUID
def resumeCluster(clusterid):
    resp=client.cluster.get(clusterid)
    if(resp.status==ClusterStatus.PAUSED):
        client.cluster.resume(clusterid)
        while(resp.status!=ClusterStatus.RUNNING):
            time.sleep(10)
            resp=client.cluster.get(clusterid)
    return True

def pauseCluster(clusterid):
    resp=client.cluster.get(clusterid)
    if(resp.status!=ClusterStatus.PAUSED):
        client.cluster.pause(clusterid)
        while(resp.status!=ClusterStatus.PAUSED):
            time.sleep(10)
            resp=client.cluster.get(clusterid)
    return True

def success_callback(job):
    print('Job has finished')
    return job

today = date.today()

job_definition = JobDefinition(
        name=f'exampleJob-{today}-2', ### name of the job run instance , from a use case perspective ex. "daily eod report job" .
        args='example.py', ### the python script to run,
        source_config=WorkspaceSource(
            path='/shared/11-BodoSDK/' ### absolute path for the python script, this folder will be added to PYTHONPATH
        ),
    cluster_object=JobCluster(
        uuid=UUID(clusterId)
    ),
        variables=[],
        timeout=60,
        retries=1,
        retries_delay=1,
        retry_on_timeout=False
    )

resumeCluster(clusterId)
resp = client.job.create(job_definition)
uuid=str(resp.uuid)
print(f"job created {uuid}")
waiter = client.job.get_waiter()
result=waiter.wait(uuid, on_success=success_callback)
print(result)
pauseCluster(clusterId)


job created 76400e94-48c8-40cf-bb2d-8b7afb601532
Job has finished
uuid=UUID('76400e94-48c8-40cf-bb2d-8b7afb601532') name='exampleJob-2022-09-15-2' status=<JobStatus.FINISHED: 'FINISHED'> schedule=datetime.datetime(2022, 9, 15, 16, 4) command='example.py' variables={} workspace_path='/shared/11-BodoSDK/' workspace_reference='' cluster=JobClusterResponse(uuid='b2460a46-44ae-46ed-9bbe-511543b66979', name='cluster20225', instance_type='c5.2xlarge', workers_quantity=2, accelerated_networking=False, bodo_version='2022.05.8', image_id='ami-02e0871982f31ed86')


True