# BlazingSQL Cheat Sheets sample code

(c) 2020 NVIDIA, Blazing SQL

Distributed under Apache License 2.0

### Imports

In [1]:
import cudf
import numpy as np
from blazingsql import BlazingContext

### Sample Data Table

In [2]:
df = cudf.DataFrame(
    [
          (39, -6.88, np.datetime64('2020-10-08T12:12:01'), 'C', 'D', 'data'
            , 'RAPIDS.ai is a suite of open-source libraries that allow you to run your end to end data science and analytics pipelines on GPUs.')
        , (11, 4.21, None,                                 'A', 'D', 'cuDF'
            , 'cuDF is a Python GPU DataFrame (built on the Apache Arrow columnar memory format)')
        , (31, 4.71, np.datetime64('2020-10-10T09:26:43'), 'U', 'D', 'memory'
            , 'cuDF allows for loading, joining, aggregating, filtering, and otherwise manipulating tabular data using a DataFrame style API.')
        , (40, 0.93, np.datetime64('2020-10-11T17:10:00'), 'P', 'B', 'tabular'
            , '''If your workflow is fast enough on a single GPU or your data comfortably fits in memory on 
                 a single GPU, you would want to use cuDF.''')
        , (33, 9.26, np.datetime64('2020-10-15T10:58:02'), 'O', 'D', 'parallel'
            , '''If you want to distribute your workflow across multiple GPUs or have more data than you can fit 
                 in memory on a single GPU you would want to use Dask-cuDF''')
        , (42, 4.21, np.datetime64('2020-10-01T10:02:23'), 'U', 'C', 'GPUs'
            , 'BlazingSQL provides a high-performance distributed SQL engine in Python')
        , (36, 3.01, np.datetime64('2020-09-30T14:36:26'), 'T', 'D', None
            , 'BlazingSQL is built on the RAPIDS GPU data science ecosystem')
        , (38, 6.44, np.datetime64('2020-10-10T08:34:36'), 'X', 'B', 'csv'
            , 'BlazingSQL lets you ETL raw data directly into GPU memory as a GPU DataFrame (GDF)')
        , (17, -5.28, np.datetime64('2020-10-09T08:34:40'), 'P', 'D', 'dataframes'
            , 'Dask is a flexible library for parallel computing in Python')
        , (10, 8.28, np.datetime64('2020-10-03T03:31:21'), 'W', 'B', 'python'
            , None)
    ]
    , columns = ['number', 'float_number', 'datetime', 'letter', 'category', 'word', 'string']
)

In [3]:
bc = BlazingContext()

BlazingContext ready


In [4]:
bc.create_table('df', df)

---

# SQL Binary Functions

---

#### Addition

In [5]:
query = '''
    SELECT number
        , float_number
        , number + float_number AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,number,float_number,r
0,39,-6.88,32.12
1,11,4.21,15.21
2,31,4.71,35.71
3,40,0.93,40.93
4,33,9.26,42.26
5,42,4.21,46.21
6,36,3.01,39.01
7,38,6.44,44.44
8,17,-5.28,11.72
9,10,8.28,18.28


#### Subtraction

In [6]:
query = '''
    SELECT number
        , float_number
        , number - float_number AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,number,float_number,r
0,39,-6.88,45.88
1,11,4.21,6.79
2,31,4.71,26.29
3,40,0.93,39.07
4,33,9.26,23.74
5,42,4.21,37.79
6,36,3.01,32.99
7,38,6.44,31.56
8,17,-5.28,22.28
9,10,8.28,1.72


#### Multiplication

In [7]:
query = '''
    SELECT number
        , float_number
        , number * float_number AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,number,float_number,r
0,39,-6.88,-268.32
1,11,4.21,46.31
2,31,4.71,146.01
3,40,0.93,37.2
4,33,9.26,305.58
5,42,4.21,176.82
6,36,3.01,108.36
7,38,6.44,244.72
8,17,-5.28,-89.76
9,10,8.28,82.8


#### Division

In [8]:
query = '''
    SELECT number
        , float_number
        , number / float_number AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,number,float_number,r
0,39,-6.88,-5.668605
1,11,4.21,2.612827
2,31,4.71,6.581741
3,40,0.93,43.010753
4,33,9.26,3.563715
5,42,4.21,9.976247
6,36,3.01,11.960133
7,38,6.44,5.900621
8,17,-5.28,-3.219697
9,10,8.28,1.207729


#### MOD

In [9]:
query = '''
    SELECT number
        , float_number
        , MOD(number, 5) AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,number,float_number,r
0,39,-6.88,4
1,11,4.21,1
2,31,4.71,1
3,40,0.93,0
4,33,9.26,3
5,42,4.21,2
6,36,3.01,1
7,38,6.44,3
8,17,-5.28,2
9,10,8.28,0


#### POWER

In [10]:
query = '''
    SELECT number
        , float_number
        , POWER(number, 2) AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,number,float_number,r
0,39,-6.88,1521.0
1,11,4.21,121.0
2,31,4.71,961.0
3,40,0.93,1600.0
4,33,9.26,1089.0
5,42,4.21,1764.0
6,36,3.01,1296.0
7,38,6.44,1444.0
8,17,-5.28,289.0
9,10,8.28,100.0


#### EQUAL TO

In [11]:
query = '''
    SELECT number
        , float_number
        , number = 33 AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,number,float_number,r
0,39,-6.88,False
1,11,4.21,False
2,31,4.71,False
3,40,0.93,False
4,33,9.26,True
5,42,4.21,False
6,36,3.01,False
7,38,6.44,False
8,17,-5.28,False
9,10,8.28,False


#### NOT EQUAL TO

In [12]:
query = '''
    SELECT number
        , float_number
        , number <> 33 AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,number,float_number,r
0,39,-6.88,True
1,11,4.21,True
2,31,4.71,True
3,40,0.93,True
4,33,9.26,False
5,42,4.21,True
6,36,3.01,True
7,38,6.44,True
8,17,-5.28,True
9,10,8.28,True


#### LESS THAN

In [13]:
query = '''
    SELECT number
        , float_number
        , number < 32 AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,number,float_number,r
0,39,-6.88,False
1,11,4.21,True
2,31,4.71,True
3,40,0.93,False
4,33,9.26,False
5,42,4.21,False
6,36,3.01,False
7,38,6.44,False
8,17,-5.28,True
9,10,8.28,True


#### GREATER THAN

In [14]:
query = '''
    SELECT number
        , float_number
        , number > 32 AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,number,float_number,r
0,39,-6.88,True
1,11,4.21,False
2,31,4.71,False
3,40,0.93,True
4,33,9.26,True
5,42,4.21,True
6,36,3.01,True
7,38,6.44,True
8,17,-5.28,False
9,10,8.28,False


#### LESS THAN OR EQUAL TO

In [15]:
query = '''
    SELECT number
        , float_number
        , number <= 32 AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,number,float_number,r
0,39,-6.88,False
1,11,4.21,True
2,31,4.71,True
3,40,0.93,False
4,33,9.26,False
5,42,4.21,False
6,36,3.01,False
7,38,6.44,False
8,17,-5.28,True
9,10,8.28,True


#### GREATER THAN OR EQUAL TO

In [16]:
query = '''
    SELECT number
        , float_number
        , number >= 32 AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,number,float_number,r
0,39,-6.88,True
1,11,4.21,False
2,31,4.71,False
3,40,0.93,True
4,33,9.26,True
5,42,4.21,True
6,36,3.01,True
7,38,6.44,True
8,17,-5.28,False
9,10,8.28,False


#### COALESCE

In [17]:
query = '''
    SELECT word
        , string
        , COALESCE(word, string) AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,word,string,r
0,data,RAPIDS.ai is a suite of open-source libraries ...,data
1,cuDF,cuDF is a Python GPU DataFrame (built on the A...,cuDF
2,memory,"cuDF allows for loading, joining, aggregating,...",memory
3,tabular,If your workflow is fast enough on a single GP...,tabular
4,parallel,If you want to distribute your workflow across...,parallel
5,GPUs,BlazingSQL provides a high-performance distrib...,GPUs
6,,BlazingSQL is built on the RAPIDS GPU data sci...,BlazingSQL is built on the RAPIDS GPU data sci...
7,csv,BlazingSQL lets you ETL raw data directly into...,csv
8,dataframes,Dask is a flexible library for parallel comput...,dataframes
9,python,,python


In [18]:
query = '''
    SELECT word
        , string
        , COALESCE(string, word) AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,word,string,r
0,data,RAPIDS.ai is a suite of open-source libraries ...,RAPIDS.ai is a suite of open-source libraries ...
1,cuDF,cuDF is a Python GPU DataFrame (built on the A...,cuDF is a Python GPU DataFrame (built on the A...
2,memory,"cuDF allows for loading, joining, aggregating,...","cuDF allows for loading, joining, aggregating,..."
3,tabular,If your workflow is fast enough on a single GP...,If your workflow is fast enough on a single GP...
4,parallel,If you want to distribute your workflow across...,If you want to distribute your workflow across...
5,GPUs,BlazingSQL provides a high-performance distrib...,BlazingSQL provides a high-performance distrib...
6,,BlazingSQL is built on the RAPIDS GPU data sci...,BlazingSQL is built on the RAPIDS GPU data sci...
7,csv,BlazingSQL lets you ETL raw data directly into...,BlazingSQL lets you ETL raw data directly into...
8,dataframes,Dask is a flexible library for parallel comput...,Dask is a flexible library for parallel comput...
9,python,,python
