# ibm_sql_query

Execute arbitrary SQL queries against CSV and PARQUET files using IBM Cloud SQL Query and Cloud Object Store

In [None]:
!pip install ibmcloudsql==0.4.29

In [None]:
import glob
import logging
import ibmcloudsql
from ibmcloudsql import SQLQuery
import os
import shutil
import sys
import re

In [None]:
# IBM Cloud API key (alternative to token)
api_key = os.environ.get('api_key')

# IBM Cloud Token (alternative to API key)
token = os.environ.get('token')

# (unique) Custom Resource Name (CRN) of IBM SQL Query Service
sql_query_crn = os.environ.get('sql_query_crn')

# URI of resulting file (example: cos://s3.eu-de.cloud-object-storage.appdomain.cloud/cos-rkie-sqlquery-test/result)
output_uri = os.environ.get('output_uri')

# default: CSV - (will be generated into according STORED AS … clause in the INTO clause)
output_format = os.environ.get('output_format' , 'CSV')

# if set - will be generated into according PARTITIONED BY (<columns>) clause in the INTO clause)
output_partition_columns = os.environ.get('output_partition_columns')

# will be generated into according PARTITIONED INTO <num> OBJECTS clause in INTO clause
output_number_of_objects = os.environ.get('output_number_of_objects')

# will be generated into according PARTITIONED EVERY <num> ROWS clause in INTO clause
output_rows_per_object = os.environ.get('output_rows_per_object')

# default: False - only valid when no partitioning option is specified. Will be generated into sqlClient.rename_exact_result(jobid) after SQL has run.
output_exact_name = bool(os.environ.get('output_exact_name', False))

# default: False - will be generated into JOBPREFIX NONE in the INTO clause. Will cause results of previous runs with same output_uri to be overwritten, because no unique sub folder will be created for the result)
output_no_jobid_folder = bool(os.environ.get('output_no_jobid_folder', False))
  
# sql statement to execute ()
sql = os.environ.get('sql')

In [None]:
parameters = list(
    map(lambda s: re.sub('$', '"', s),
        map(
            lambda s: s.replace('=', '="'),
            filter(
                lambda s: s.find('=') > -1 and bool(re.match(r'[A-Za-z0-9_]*=[.\/A-Za-z0-9]*', s)),
                sys.argv
            )
    )))

output_exact_name = bool(output_exact_name)
output_no_jobid_folder = bool(output_no_jobid_folder)


for parameter in parameters:
    logging.warning('Parameter: ' + parameter)
    exec(parameter)

In [None]:
if token is None or len(api_key) > 0:
    sqlClient = SQLQuery(api_key, sql_query_crn)
else:
    sqlClient = SQLQuery(api_key=None, token=token, instance_crn=sql_query_crn)

sql = sql + ' INTO {} STORED AS {}'.format(output_uri, output_format)

if output_partition_columns is not None and len(output_partition_columns) > 0:
    sql = sql + ' PARTITIONED BY ({})'.format(output_partition_columns)

if output_number_of_objects is not None and len(output_number_of_objects) > 0:
    sql = sql + ' PARTITIONED INTO {} OBJECTS'.format(output_number_of_objects)    

if output_rows_per_object is not None and len(output_rows_per_object) > 0:
    sql = sql + ' PARTITIONED EVERY {} ROWS'.format(output_rows_per_object)

if output_no_jobid_folder:
    sql = sql + ' JOBPREFIX NONE'

if output_exact_name:
    job_id = sqlClient.submit_sql(sql)
    job_status = sqlClient.wait_for_job(job_id)
    print("Job " + jobId + " terminated with status: " + job_status)
    sqlClient.rename_exact_result(job_id) 
else:
    sqlClient.run_sql(sql)

In [None]:
print(sql)

In [None]:
#!ipython ibm-sql-query.ipynb output_result_file_uri="cos://s3.eu-de.cloud-object-storage.appdomain.cloud/cos-rkie-sqlquery-test/result" api_key="" token="eyJraWQiOiIyMDIxMTExNzA4MjAiLCJhbGciOiJSUzI1NiJ9.eyJpYW1faWQiOiJJQk1pZC0yNzAwMDI1NzNZIiwiaWQiOiJJQk1pZC0yNzAwMDI1NzNZIiwicmVhbG1pZCI6IklCTWlkIiwic2Vzc2lvbl9pZCI6IkMtMjA1YmU4NTAtMTIwNS00NjRjLTlhMjMtNzVhZjBkNzJiMDZiIiwianRpIjoiY2JlZmY5NjQtN2M3ZC00OWJjLTkwOWEtYWNiYWQzYTQ1MDVhIiwiaWRlbnRpZmllciI6IjI3MDAwMjU3M1kiLCJnaXZlbl9uYW1lIjoiUm9tZW8iLCJmYW1pbHlfbmFtZSI6IktpZW56bGVyIiwibmFtZSI6IlJvbWVvIEtpZW56bGVyIiwiZW1haWwiOiJyb21lby5raWVuemxlckBjaC5pYm0uY29tIiwic3ViIjoicm9tZW8ua2llbnpsZXJAY2guaWJtLmNvbSIsImF1dGhuIjp7InN1YiI6InJvbWVvLmtpZW56bGVyQGNoLmlibS5jb20iLCJpYW1faWQiOiJJQk1pZC0yNzAwMDI1NzNZIiwibmFtZSI6IlJvbWVvIEtpZW56bGVyIiwiZ2l2ZW5fbmFtZSI6IlJvbWVvIiwiZmFtaWx5X25hbWUiOiJLaWVuemxlciIsImVtYWlsIjoicm9tZW8ua2llbnpsZXJAY2guaWJtLmNvbSJ9LCJhY2NvdW50Ijp7ImJvdW5kYXJ5IjoiZ2xvYmFsIiwidmFsaWQiOnRydWUsImJzcyI6IjliMTNiODU3YTMyMzQxYjcxNjcyNTVkZTcxNzE3MmY1IiwiaW1zX3VzZXJfaWQiOiI4MDgwMjIyIiwiaW1zIjoiMjAzMjQ3NCJ9LCJpYXQiOjE2Mzc4NDk5MjEsImV4cCI6MTYzNzg1MTEyMSwiaXNzIjoiaHR0cHM6Ly9pYW0uY2xvdWQuaWJtLmNvbS9pZGVudGl0eSIsImdyYW50X3R5cGUiOiJ1cm46aWJtOnBhcmFtczpvYXV0aDpncmFudC10eXBlOnBhc3Njb2RlIiwic2NvcGUiOiJpYm0gb3BlbmlkIiwiY2xpZW50X2lkIjoiYngiLCJhY3IiOjEsImFtciI6WyJwd2QiXX0.VeZxGTnrSNkiE_iL_ajqeRdEgx1YEx_FCWOEFnL6whuHu7MM3ev6QaL5BiSQbxyXzY17aUvVWFZUxZEEX45NhKXsXkbWUDl0gAWbZFoVyXYXOiGyysbGi1d4WIlA4Kc4uwgttI-amcTOxeE0wkKjh4jSrKMLhEtnglxwgXYeC-KDz-NtegcvQG3YnZBD-Bk0f6A9sdRGMfH8_Ej8U0yYsn_ub-xgTAhD4TW-hUI9ICIcWkDxgp36_5WMZzHqpHyYxobXJudBZ-F2FKsaJwpdcNVc6VOFe32ItObS17dDh35uw4y0r-SXjnQwPo8BRXHV" sql_query_crn="crn:v1:bluemix:public:sql-query:us-south:a/9b13b857a32341b7167255de717172f5:f9dd6c9e-b24b-4506-819e-e038c92339e4::" sql="SELECT * FROM cos://eu-de/cos-rkie-sqlquery-test/12613-0004_flat-2.csv STORED AS CSV" 