# BlazingSQL Cheat Sheets sample code

(c) 2020 NVIDIA, Blazing SQL

Distributed under Apache License 2.0

### Imports

In [1]:
import cudf
import numpy as np
from blazingsql import BlazingContext

### Sample DataFrame

In [2]:
df = cudf.DataFrame(
    [
          (39, 6.88, np.datetime64('2020-10-08T12:12:01'), 'C', 'D', 'data'
            , 'RAPIDS.ai is a suite of open-source libraries that allow you to run your end to end data science and analytics pipelines on GPUs.')
        , (11, 4.21, None,                                 'A', 'D', 'cuDF'
            , 'cuDF is a Python GPU DataFrame (built on the Apache Arrow columnar memory format)')
        , (31, 4.71, np.datetime64('2020-10-10T09:26:43'), 'U', 'D', 'memory'
            , 'cuDF allows for loading, joining, aggregating, filtering, and otherwise manipulating tabular data using a DataFrame style API.')
        , (40, 0.93, np.datetime64('2020-10-11T17:10:00'), 'P', 'B', 'tabular'
            , '''If your workflow is fast enough on a single GPU or your data comfortably fits in memory on 
                 a single GPU, you would want to use cuDF.''')
        , (33, 9.26, np.datetime64('2020-10-15T10:58:02'), 'O', 'D', 'parallel'
            , '''If you want to distribute your workflow across multiple GPUs or have more data than you can fit 
                 in memory on a single GPU you would want to use Dask-cuDF''')
        , (42, 4.21, np.datetime64('2020-10-01T10:02:23'), 'U', 'C', 'GPUs'
            , 'BlazingSQL provides a high-performance distributed SQL engine in Python')
        , (36, 3.01, np.datetime64('2020-09-30T14:36:26'), 'T', 'D', None
            , 'BlazingSQL is built on the RAPIDS GPU data science ecosystem')
        , (38, 6.44, np.datetime64('2020-10-10T08:34:36'), 'X', 'B', 'csv'
            , 'BlazingSQL lets you ETL raw data directly into GPU memory as a GPU DataFrame (GDF)')
        , (17, 5.28, np.datetime64('2020-10-09T08:34:40'), 'P', 'D', 'dataframes'
            , 'Dask is a flexible library for parallel computing in Python')
        , (10, 8.28, np.datetime64('2020-10-03T03:31:21'), 'W', 'B', 'python'
            , None)
    ]
    , columns = ['number', 'float_number', 'datetime', 'letter', 'category', 'word', 'string']
)

---

# BlazingContext

---

#### blazignsql.BlazingContext()

In [3]:
bc = BlazingContext()

BlazingContext ready


In [4]:
bc = BlazingContext(
    dask_client=None
    , pool=True
    , initial_pool_size=4*(1024**3) ## 4 GB
    , network_interface='eth0'
)

BlazingContext ready


#### blazignsql.BlazingContext.s3()

In [5]:
_ = bc.s3(
    'bsql'
    , bucket_name = 'bsql'
)

S3 Storage Plugin Registered Successfully


In [None]:
_ = bc.s3(
    '<dir_name>'
    , bucket_name='<bucket_name>'
    , access_key_id='<ACCESS_KEY>'
    , secret_key='<SECRET_KEY>'
    , region="<region>"
    , endpoint_override="https://s3.filebase.com"
)

#### blazignsql.BlazingContext.gs()

In [None]:
bc.gs('dir_name', 
      project_id='<project_id>', 
      bucket_name='<bucket_name>', 
      use_default_adc_json_file=False, 
      adc_json_file='<../file_path_to/credentials.json>')

#### blazignsql.BlazingContext.create_table()

In [6]:
bc.create_table('df', df)

In [7]:
bc.create_table('apache', '../data/apache_sample_head.parquet')

In [8]:
bc.create_table('apache', '../data/apache_sample_head.csv', header=1)

In [9]:
bc.create_table('parking_locations','s3://bsql/data/seattle_parking/parking_locations.parquet/')

#### blazignsql.BlazingContext.drop_table()

In [10]:
bc.drop_table('apache')

#### blazignsql.BlazingContext.list_tables()

In [11]:
bc.list_tables()

['df', 'parking_locations']

#### blazignsql.BlazingContext.describe_table()

In [12]:
bc.describe_table('parking_locations')

{'SourceElementKey': 'int64',
 'BlockfaceName': 'str',
 'SideOfStreet': 'str',
 'ParkingTimeLimitCategory': 'int64',
 'ParkingSpaceCount': 'int64',
 'PaidParkingArea': 'str',
 'PaidParkingSubArea': 'str',
 'ParkingCategory': 'str',
 'Location': 'str'}

#### blazignsql.BlazingContext.sql()

In [13]:
bc.sql('SELECT * FROM parking_locations LIMIT 3')

Unnamed: 0,SourceElementKey,BlockfaceName,SideOfStreet,ParkingTimeLimitCategory,ParkingSpaceCount,PaidParkingArea,PaidParkingSubArea,ParkingCategory,Location
0,1009,1ST AVE BETWEEN MADISON ST AND SPRING ST,SW,30,5,Commercial Core,Waterfront,Paid Parking,POINT (-122.3366575 47.60501765)
1,1018,1ST AVE BETWEEN STEWART ST AND VIRGINIA ST,NE,120,12,Belltown,South,Paid Parking,POINT (-122.34188878 47.61073498)
2,1045,1ST AVE N BETWEEN REPUBLICAN ST AND MERCER ST,W,120,11,Uptown,Core,Paid Parking,POINT (-122.35549857 47.62391565)


#### blazignsql.BlazingContext.explain()

In [14]:
bc.explain('SELECT * FROM parking_locations LIMIT 3')

'LogicalSort(fetch=[3])\n  LogicalTableScan(table=[[main, parking_locations]])\n'

#### blazignsql.BlazingContext.log()

In [15]:
bc.log('''
    SELECT log_time
        , query_id
        , duration 
    FROM bsql_logs 
    WHERE info = 'Query Execution Done' 
    ORDER BY log_time DESC
''')

Unnamed: 0,log_time,query_id,duration
0,2021-02-22 19:32:54.380,1354187413,0.0
1,2021-02-22 19:32:06.398,1221019996,0.0
2,2021-02-22 19:32:06.165,903985917,0.0
3,2021-02-22 19:31:30.310,42308095,0.0
4,2021-02-22 19:31:30.087,1190851056,0.0
...,...,...,...
125,2020-12-07 05:42:53.225,350886341,28.0
126,2020-12-05 00:07:49.350,896922691,60.0
127,2020-12-05 00:07:12.421,984749867,781.0
128,2020-12-05 00:07:07.608,1292265483,1156.0
