# ibm_sql_query

Execute arbitrary SQL queries against CSV and PARQUET files using IBM Cloud SQL Query and Cloud Object Store

In [None]:
!pip install ibmcloudsql==0.4.29

In [None]:
import glob
import logging
import ibmcloudsql
from ibmcloudsql import SQLQuery
import os
import shutil
import sys
import re

In [None]:
# IBM Cloud API key (alternative to token)
api_key = os.environ.get('api_key')

# IBM Cloud Token (alternative to API key)
token = os.environ.get('token')

# (unique) Custom Resource Name (CRN) of IBM SQL Query Service
sql_query_crn = os.environ.get('sql_query_crn')

# URI of resulting file (example: cos://s3.eu-de.cloud-object-storage.appdomain.cloud/cos-rkie-sqlquery-test/result)
output_uri = os.environ.get('output_uri')

# default: CSV - (will be generated into according STORED AS … clause in the INTO clause)
out_format = os.environ.get('out_format' , 'CSV')

# if set - will be generated into according PARTITIONED BY (<columns>) clause in the INTO clause)
out_partition_columns = os.environ.get('out_partition_columns')

# will be generated into according PARTITIONED INTO <num> OBJECTS clause in INTO clause
out_number_of_objects = os.environ.get('out_number_of_objects')

# will be generated into according PARTITIONED EVERY <num> ROWS clause in INTO clause
out_rows_per_object = os.environ.get('out_rows_per_object')

# default: False - only valid when no partitioning option is specified. Will be generated into sqlClient.rename_exact_result(jobid) after SQL has run.
out_exact_name = bool(os.environ.get('out_exact_name', False))

# default: False - will be generated into JOBPREFIX NONE in the INTO clause. Will cause results of previous runs with same output_uri to be overwritten, because no unique sub folder will be created for the result)
out_no_jobid_folder = bool(os.environ.get('out_no_jobid_folder', False))

# sql statement to execute ()
sql = os.environ.get('sql')

In [None]:
parameters = list(
    map(lambda s: re.sub('$', '"', s),
        map(
            lambda s: s.replace('=', '="'),
            filter(
                lambda s: s.find('=') > -1 and bool(re.match(r'[A-Za-z0-9_]*=[.\/A-Za-z0-9]*', s)),
                sys.argv
            )
    )))

out_exact_name = bool(out_exact_name)
out_no_jobid_folder = bool(out_no_jobid_folder)


for parameter in parameters:
    logging.warning('Parameter: ' + parameter)
    exec(parameter)

In [None]:
if token is None or len(api_key) > 0:
    sqlClient = SQLQuery(api_key, sql_query_crn)
else:
    sqlClient = SQLQuery(api_key=None, token=token, instance_crn=sql_query_crn)

sql = sql + ' INTO {} STORED AS {}'.format(output_uri, out_format)

partitioned_by = False

if out_partition_columns is not None and len(out_partition_columns) > 0:
    if not partitioned_by:
        sql = sql + ' PARTITIONED'
        partitioned_by = True
    sql = sql + ' BY ({})'.format(out_partition_columns)

if out_number_of_objects is not None and len(out_number_of_objects) > 0:
    if not partitioned_by:
        sql = sql + ' PARTITIONED'
        partitioned_by = True
    sql = sql + ' INTO {} OBJECTS'.format(out_number_of_objects)    

if out_rows_per_object is not None and len(out_rows_per_object) > 0:
    if not partitioned_by:
        sql = sql + ' PARTITIONED'
        partitioned_by = True
    sql = sql + ' EVERY {} ROWS'.format(out_rows_per_object)

if out_no_jobid_folder:
    sql = sql + ' JOBPREFIX NONE'

if out_exact_name:
    job_id = sqlClient.submit_sql(sql)
    job_status = sqlClient.wait_for_job(job_id)
    print("Job " + jobId + " terminated with status: " + job_status)
    sqlClient.rename_exact_result(job_id) 
else:
    sqlClient.run_sql(sql)

In [None]:
print(sql)