# BlazingSQL Cheat Sheets sample code

(c) 2020 NVIDIA, Blazing SQL

Distributed under Apache License 2.0

### Imports

In [1]:
import cudf
import numpy as np
from blazingsql import BlazingContext

### Sample Data Table

In [2]:
df = cudf.DataFrame(
    [
          (39, -6.88, np.datetime64('2020-10-08T12:12:01'), 'C', 'D', 'data'
            , 'RAPIDS.ai is a suite of open-source libraries that allow you to run your end to end data science and analytics pipelines on GPUs.')
        , (11, 4.21, None,                                 'A', 'D', 'cuDF'
            , 'cuDF is a Python GPU DataFrame (built on the Apache Arrow columnar memory format)')
        , (31, 4.71, np.datetime64('2020-10-10T09:26:43'), 'U', 'D', 'memory'
            , 'cuDF allows for loading, joining, aggregating, filtering, and otherwise manipulating tabular data using a DataFrame style API.')
        , (40, 0.93, np.datetime64('2020-10-11T17:10:00'), 'P', 'B', 'tabular'
            , '''If your workflow is fast enough on a single GPU or your data comfortably fits in memory on 
                 a single GPU, you would want to use cuDF.''')
        , (33, 9.26, np.datetime64('2020-10-15T10:58:02'), 'O', 'D', 'parallel'
            , '''If you want to distribute your workflow across multiple GPUs or have more data than you can fit 
                 in memory on a single GPU you would want to use Dask-cuDF''')
        , (42, 4.21, np.datetime64('2020-10-01T10:02:23'), 'U', 'C', 'GPUs'
            , 'BlazingSQL provides a high-performance distributed SQL engine in Python')
        , (36, 3.01, np.datetime64('2020-09-30T14:36:26'), 'T', 'D', None
            , 'BlazingSQL is built on the RAPIDS GPU data science ecosystem')
        , (38, 6.44, np.datetime64('2020-10-10T08:34:36'), 'X', 'B', 'csv'
            , 'BlazingSQL lets you ETL raw data directly into GPU memory as a GPU DataFrame (GDF)')
        , (17, -5.28, np.datetime64('2020-10-09T08:34:40'), 'P', 'D', 'dataframes'
            , 'Dask is a flexible library for parallel computing in Python')
        , (10, 8.28, np.datetime64('2020-10-03T03:31:21'), 'W', 'B', 'python'
            , None)
    ]
    , columns = ['number', 'float_number', 'datetime', 'letter', 'category', 'word', 'string']
)

In [3]:
bc = BlazingContext()

BlazingContext ready


In [4]:
bc.create_table('df', df)

# SQL DateTime Functions

#### YEAR

In [5]:
query = '''
    SELECT datetime
        , YEAR(datetime) AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,datetime,r
0,2020-10-08 12:12:01,2020.0
1,,
2,2020-10-10 09:26:43,2020.0
3,2020-10-11 17:10:00,2020.0
4,2020-10-15 10:58:02,2020.0
5,2020-10-01 10:02:23,2020.0
6,2020-09-30 14:36:26,2020.0
7,2020-10-10 08:34:36,2020.0
8,2020-10-09 08:34:40,2020.0
9,2020-10-03 03:31:21,2020.0


#### MONTH

In [6]:
query = '''
    SELECT datetime
        , MONTH(datetime) AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,datetime,r
0,2020-10-08 12:12:01,10.0
1,,
2,2020-10-10 09:26:43,10.0
3,2020-10-11 17:10:00,10.0
4,2020-10-15 10:58:02,10.0
5,2020-10-01 10:02:23,10.0
6,2020-09-30 14:36:26,9.0
7,2020-10-10 08:34:36,10.0
8,2020-10-09 08:34:40,10.0
9,2020-10-03 03:31:21,10.0


#### DAYOFMONTH

In [7]:
query = '''
    SELECT datetime
        , DAYOFMONTH(datetime) AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,datetime,r
0,2020-10-08 12:12:01,8.0
1,,
2,2020-10-10 09:26:43,10.0
3,2020-10-11 17:10:00,11.0
4,2020-10-15 10:58:02,15.0
5,2020-10-01 10:02:23,1.0
6,2020-09-30 14:36:26,30.0
7,2020-10-10 08:34:36,10.0
8,2020-10-09 08:34:40,9.0
9,2020-10-03 03:31:21,3.0


#### HOUR

In [8]:
query = '''
    SELECT datetime
        , HOUR(datetime) AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,datetime,r
0,2020-10-08 12:12:01,12.0
1,,
2,2020-10-10 09:26:43,9.0
3,2020-10-11 17:10:00,17.0
4,2020-10-15 10:58:02,10.0
5,2020-10-01 10:02:23,10.0
6,2020-09-30 14:36:26,14.0
7,2020-10-10 08:34:36,8.0
8,2020-10-09 08:34:40,8.0
9,2020-10-03 03:31:21,3.0


#### MINUTE

In [9]:
query = '''
    SELECT datetime
        , MINUTE(datetime) AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,datetime,r
0,2020-10-08 12:12:01,12.0
1,,
2,2020-10-10 09:26:43,26.0
3,2020-10-11 17:10:00,10.0
4,2020-10-15 10:58:02,58.0
5,2020-10-01 10:02:23,2.0
6,2020-09-30 14:36:26,36.0
7,2020-10-10 08:34:36,34.0
8,2020-10-09 08:34:40,34.0
9,2020-10-03 03:31:21,31.0


#### SECOND

In [10]:
query = '''
    SELECT datetime
        , SECOND(datetime) AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,datetime,r
0,2020-10-08 12:12:01,1.0
1,,
2,2020-10-10 09:26:43,43.0
3,2020-10-11 17:10:00,0.0
4,2020-10-15 10:58:02,2.0
5,2020-10-01 10:02:23,23.0
6,2020-09-30 14:36:26,26.0
7,2020-10-10 08:34:36,36.0
8,2020-10-09 08:34:40,40.0
9,2020-10-03 03:31:21,21.0


#### DAYOFWEEK

In [11]:
query = '''
    SELECT datetime
        , DAYOFWEEK(datetime) AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,datetime,r
0,2020-10-08 12:12:01,4.0
1,,
2,2020-10-10 09:26:43,6.0
3,2020-10-11 17:10:00,7.0
4,2020-10-15 10:58:02,4.0
5,2020-10-01 10:02:23,4.0
6,2020-09-30 14:36:26,3.0
7,2020-10-10 08:34:36,6.0
8,2020-10-09 08:34:40,5.0
9,2020-10-03 03:31:21,6.0
