#### This is an tutorial on using the bodo platform sdk. It will also guide you on how to trigger/ schedule Bodo jobs on the platform, from either your laptop, Lambda, cron etc.

#### Step 1
Install Bodo SDK , you can installing simply by using pip

In [1]:
!pip install bodosdk

Collecting requests~=2.27.1
  Using cached requests-2.27.1-py2.py3-none-any.whl (63 kB)
Installing collected packages: requests
  Attempting uninstall: requests
    Found existing installation: requests 2.28.1
    Uninstalling requests-2.28.1:
      Successfully uninstalled requests-2.28.1
Successfully installed requests-2.27.1


#### Step 2
Create an access token for your workspace and reference it here. check the short video below 

In [17]:
%%HTML
<div align="center">
<iframe align="middle" width="700" height="400" src="https://www.youtube.com/embed/qAPZba0cQQg" frameborder="0" allowfullscreen></iframe>
</div>

#### Generate an SDK client using the API token, as below

In [4]:
from bodosdk.models import WorkspaceKeys, JobResponse,WorkspaceSource,JobCluster,ClusterStatus
from bodosdk.client import get_bodo_client

from bodosdk.models.job import JobClusterDefinition, JobDefinition

def getBodoClient():
    keys = WorkspaceKeys(
        client_id='',
        secret_key=''
    )
    return get_bodo_client(keys, print_logs=False)

client = getBodoClient()

#### Once you have an instance of the Bodo SDK Client, you can use this client to platform operations.
#### Lets get a list of clusters from the platform and their status.

In [5]:
import pandas as pd
def listClusters(client):
    resp=client.cluster.list()
    clusters={"name":[],"status":[],"uuid":[],"instance_type":[],"workers_quantity":[]}
    for cluster in resp:
        clusters["uuid"].append(cluster.uuid)
        clusters["name"].append(cluster.name)
        clusters["status"].append(cluster.status)
        clusters["instance_type"].append(cluster.instance_type)
        clusters["workers_quantity"].append(cluster.workers_quantity)
    clusterdf=pd.DataFrame(clusters)
    return clusterdf
clusterdf = listClusters(client)
clusterdf

Unnamed: 0,name,status,uuid,instance_type,workers_quantity
0,hosted-trial-size,ClusterStatus.PAUSED,25ff2ce3-308d-47c6-8c18-5720774c53c3,c5.2xlarge,2
1,cluster-exampleJob,ClusterStatus.NEW,917fdccc-9c6f-4acf-839f-14d9e6ece98c,c5.2xlarge,1
2,Ali-prod-cluster,ClusterStatus.PAUSED,e7ccaebb-3698-4888-b36a-dfd5e3217b53,c5n.18xlarge,1
3,2022.6-test,ClusterStatus.PAUSED,d9e4d04a-eb20-4147-9420-ee62ebd3ff1d,c5.2xlarge,2
4,vignesh-test,ClusterStatus.PAUSED,bf8e0bd7-7e78-483d-aa03-16bfe85dd645,c5.xlarge,2
5,Nick-Test-BodoSQL,ClusterStatus.PAUSED,4e95cd56-9ff3-4f3c-acc5-f6d769ccca0c,c5.2xlarge,1
6,Ali-test,ClusterStatus.RUNNING,ea23a4f2-4a85-42b6-9e9c-652dbeac751d,c5.2xlarge,2


#### Lets Start a cluster that is paused currently
#### copy the uuid of one of the clusters from the list above and paste in the code below

In [34]:
clusterId="25ff2ce3-308d-47c6-8c18-5720774c53c3"

In [31]:
cluster=client.cluster.resume(clusterId)

ClusterResponse(name='hosted-trial-size', uuid='25ff2ce3-308d-47c6-8c18-5720774c53c3', status=<ClusterStatus.INPROGRESS: 'INPROGRESS'>, description='', instance_type='c5.2xlarge', workers_quantity=2, auto_shutdown=0, auto_pause=60, nodes_ip=['10.30.2.147', '10.30.2.121'], bodo_version='2022.05.4', image_id='ami-05af5bb246896bf9e', cores_per_worker=4, accelerated_networking=False, autoscaling_identifier='arn:aws:autoscaling:us-east-2:427443013497:autoScalingGroup:5789134c-3e3d-481d-b3eb-decacea44088:autoScalingGroupName/BodoASG-25ff2ce3-308d-47c6-8c18-5720774c53c3', last_asg_activity_id='9ab605c9-0f29-e1c6-d5e6-332ecb6fa779', created_at='2022-06-17 15:18:30.392512+00', is_job_dedicated=False, last_known_activity='2022-06-17T16:00:33.433Z', workspace={'id': 21, 'uuid': 'de215a95-0f9c-4493-8310-87b12878644f', 'name': 'DataCouncil-Demo-Workspace', 'status': 'READY', 'type': 'ENTERPRISE', 'organization_uuid': 'eab58676-5fe6-4b95-9f19-5bcaa856bc33', 'region': 'us-east-2', 'data': {'policies_

In [41]:
client.cluster.get(clusterId).status

<ClusterStatus.RUNNING: 'RUNNING'>

In [48]:
### check the status in a loop till the cluster is running. In the future, the api will have a blocking method for resume.
import time
def checkStatus(client,clusterId):
    while(True):
        clusterstatus = client.cluster.get(clusterId).status
        print(clusterstatus)
        if(clusterstatus==ClusterStatus.RUNNING or clusterstatus==ClusterStatus.PAUSING or clusterstatus==ClusterStatus.PAUSED):
            break
        time.sleep(2)
checkStatus(client,clusterId)


ClusterStatus.PAUSED


### Pause a cluster

In [49]:
client.cluster.pause(clusterId)
checkStatus(client,clusterId)

ClusterStatus.INPROGRESS
ClusterStatus.PAUSED


#### Get list of available images ami, for creating clusters using SDK
#### call the get_available_images method of cluster SDK , pass the region where you want the cluster to be created, and you will get a list of available images.
#### the image will correspond to a bodo version, this can be used to launch a bodo cluster programmatically with a particular bodo version

In [7]:
import pandas as pd
def listImages(client,region):
    
    resp = client.cluster.get_available_images("us-east-2")
    images={"ami":[],"bodo_version":[]}
    for image in resp.keys():
        images["ami"].append(resp[image].image_id)
        images["bodo_version"].append(resp[image].bodo_version)
    df=pd.DataFrame(images)
    return df
images = listImages(client,"us-east-1")
images
    

Unnamed: 0,ami,bodo_version
0,ami-0d790d4cf8f8dac5b,2022.7
1,ami-0b49e47df3cefe51e,2022.6.2
2,ami-0fc5e82c9f7c572f9,2022.6
3,ami-03d1638bccc8c357b,2022.05.7 (Iceberg Alpha)
4,ami-02e0871982f31ed86,2022.05.8
5,ami-077ffa6e2fb413c7e,2022.05.7
6,ami-07a2e9276a352e411,2022.05.6
7,ami-0e6823b4142b81319,2022.05.4 SQL
8,ami-05af5bb246896bf9e,2022.05.4
9,ami-0e00b3234e13295db,2022.05.3 SQL


#### Lets use the bodo sdk to run a job.
## There are two ways to run jobs
## 1. you can, submit the job with a cluster definition. Bodo platfrom will use the cluster definition to start a new cluster and run the job on this cluster. After the job finishes, cluster will be removed
## 2. Submit job to an existing cluster by passing the uuid of the cluster in cluster definition. 

## we will use both approaches to run a job here.


In [8]:
### Run a job with a new cluster Definition 
from datetime import date
def runJob(client):
    today = date.today()
    job_definition = JobDefinition(
        name=f'exampleJob-{today}', ### name of the job run instance , from a use case perspective ex. "daily eod report job" .
        args='example.py', ### the python script to run,
        source_config=WorkspaceSource(
            path='/shared/bodo-examples/11-BodoSDK/' ### absolute path for the python script, this folder will be added to PYTHONPATH
        ),
    cluster_object=JobClusterDefinition(
        instance_type='c5.2xlarge',
        accelerated_networking=False,
        image_id='ami-0d790d4cf8f8dac5b', ### the image to use, this is for helping pick a bodo version, look at the image list section to see the versions and ami-ids
        workers_quantity=1
    ),
        variables=[],
        timeout=10,
        retries=3,
        retries_delay=0,
        retry_on_timeout=False
    )
    resp = client.job.create(job_definition)
    print(resp)
    uuid=str(resp.uuid)
    result = waitforJobFinish(uuid,client)
    #return result


def success_callback(job):
    print('Job has finished')
    return job


def waitforJobFinish(uuid,client):
    waiter = client.job.get_waiter()
    return waiter.wait(uuid, on_success=success_callback)

if __name__ == "__main__":


    
    result = runJob(client)
    print(result)

uuid=UUID('10fda43d-3878-4796-bb2b-a56ef343db1d') status=<JobStatus.NEW: 'NEW'> name='exampleJob-2022-08-30' args='example.py' variables=[] source_config=WorkspaceSource(type=<JobSourceType.WORKSPACE: 'WORKSPACE'>, path='/shared/bodo-examples/Jobs/') cluster_config=JobClusterDefinition(instance_type='c5.2xlarge', workers_quantity=1, accelerated_networking=False, image_id='ami-0d790d4cf8f8dac5b') cluster=JobClusterResponse(uuid='319c52aa-74e7-4ef4-89bb-0e41e53b0966', name='cluster-exampleJob-2022-08-30', instance_type='c5.2xlarge', workers_quantity=1, accelerated_networking=False, bodo_version='2022.7', image_id='ami-0d790d4cf8f8dac5b') timeout=10 retries=3 retries_delay=0 retry_on_timeout=False
Job has finished
None
