# BlazingSQL Cheat Sheets sample code

(c) 2020 NVIDIA, Blazing SQL

Distributed under Apache License 2.0

### Imports

In [1]:
import cudf
import numpy as np
from blazingsql import BlazingContext

### Sample Data Table

In [2]:
df = cudf.DataFrame(
    [
          (39, -6.88, np.datetime64('2020-10-08T12:12:01'), 'C', 'D', 'data'
            , 'RAPIDS.ai is a suite of open-source libraries that allow you to run your end to end data science and analytics pipelines on GPUs.')
        , (11, 4.21, None,                                 'A', 'D', 'cuDF'
            , 'cuDF is a Python GPU DataFrame (built on the Apache Arrow columnar memory format)')
        , (31, 4.71, np.datetime64('2020-10-10T09:26:43'), 'U', 'D', 'memory'
            , 'cuDF allows for loading, joining, aggregating, filtering, and otherwise manipulating tabular data using a DataFrame style API.')
        , (40, 0.93, np.datetime64('2020-10-11T17:10:00'), 'P', 'B', 'tabular'
            , '''If your workflow is fast enough on a single GPU or your data comfortably fits in memory on 
                 a single GPU, you would want to use cuDF.''')
        , (33, 9.26, np.datetime64('2020-10-15T10:58:02'), 'O', 'D', 'parallel'
            , '''If you want to distribute your workflow across multiple GPUs or have more data than you can fit 
                 in memory on a single GPU you would want to use Dask-cuDF''')
        , (42, 4.21, np.datetime64('2020-10-01T10:02:23'), 'U', 'C', 'GPUs'
            , 'BlazingSQL provides a high-performance distributed SQL engine in Python')
        , (36, 3.01, np.datetime64('2020-09-30T14:36:26'), 'T', 'D', None
            , 'BlazingSQL is built on the RAPIDS GPU data science ecosystem')
        , (38, 6.44, np.datetime64('2020-10-10T08:34:36'), 'X', 'B', 'csv'
            , 'BlazingSQL lets you ETL raw data directly into GPU memory as a GPU DataFrame (GDF)')
        , (17, -5.28, np.datetime64('2020-10-09T08:34:40'), 'P', 'D', 'dataframes'
            , 'Dask is a flexible library for parallel computing in Python')
        , (10, 8.28, np.datetime64('2020-10-03T03:31:21'), 'W', 'B', 'python'
            , None)
    ]
    , columns = ['number', 'float_number', 'datetime', 'letter', 'category', 'word', 'string']
)

In [3]:
bc = BlazingContext()

BlazingContext ready


In [4]:
bc.create_table('df', df)

# SQL Unary Functions

#### FLOOR

In [5]:
query = '''
    SELECT float_number
        , FLOOR(float_number) AS f
    FROM df
'''

bc.sql(query)

Unnamed: 0,float_number,f
0,-6.88,-7.0
1,4.21,4.0
2,4.71,4.0
3,0.93,0.0
4,9.26,9.0
5,4.21,4.0
6,3.01,3.0
7,6.44,6.0
8,-5.28,-6.0
9,8.28,8.0


#### CEILING

In [6]:
query = '''
    SELECT float_number
        , CEILING(float_number) AS f
    FROM df
'''

bc.sql(query)

Unnamed: 0,float_number,f
0,-6.88,-6.0
1,4.21,5.0
2,4.71,5.0
3,0.93,1.0
4,9.26,10.0
5,4.21,5.0
6,3.01,4.0
7,6.44,7.0
8,-5.28,-5.0
9,8.28,9.0


#### SIN

In [7]:
query = '''
    SELECT float_number
        , SIN(float_number) AS f
    FROM df
'''

bc.sql(query)

Unnamed: 0,float_number,f
0,-6.88,-0.562011
1,4.21,-0.876435
2,4.71,-0.999997
3,0.93,0.80162
4,9.26,0.164033
5,4.21,-0.876435
6,3.01,0.131213
7,6.44,0.156173
8,-5.28,0.843188
9,8.28,0.910618


#### COS

In [8]:
query = '''
    SELECT float_number 
        , COS(float_number) AS f
    FROM df
'''

bc.sql(query)

Unnamed: 0,float_number,f
0,-6.88,0.82713
1,4.21,-0.481521
2,4.71,-0.002389
3,0.93,0.597834
4,9.26,-0.986455
5,4.21,-0.481521
6,3.01,-0.991354
7,6.44,0.98773
8,-5.28,0.537619
9,8.28,-0.413248


#### ASIN

In [9]:
query = '''
    SELECT float_number
        , ASIN(float_number) AS f
    FROM df
'''

bc.sql(query)

Unnamed: 0,float_number,f
0,-6.88,
1,4.21,
2,4.71,
3,0.93,1.194413
4,9.26,
5,4.21,
6,3.01,
7,6.44,
8,-5.28,
9,8.28,


#### ACOS

In [10]:
query = '''
    SELECT float_number
        , ACOS(float_number) AS f
    FROM df
'''

bc.sql(query)

Unnamed: 0,float_number,f
0,-6.88,
1,4.21,
2,4.71,
3,0.93,0.376383
4,9.26,
5,4.21,
6,3.01,
7,6.44,
8,-5.28,
9,8.28,


#### TAN

In [11]:
query = '''
    SELECT float_number
        , TAN(float_number) AS f
    FROM df
'''

bc.sql(query)

Unnamed: 0,float_number,f
0,-6.88,-0.679471
1,4.21,1.820139
2,4.71,418.587823
3,0.93,1.340874
4,9.26,-0.166286
5,4.21,1.820139
6,3.01,-0.132358
7,6.44,0.158113
8,-5.28,1.568373
9,8.28,-2.203562


#### ATAN

In [12]:
query = '''
    SELECT float_number
        , ATAN(float_number) AS f
    FROM df
'''

bc.sql(query)

Unnamed: 0,float_number,f
0,-6.88,-1.426458
1,4.21,1.337588
2,4.71,1.361589
3,0.93,0.749145
4,9.26,1.463222
5,4.21,1.337588
6,3.01,1.250043
7,6.44,1.416747
8,-5.28,-1.383619
9,8.28,1.450605


#### SQRT

In [13]:
query = '''
    SELECT float_number
        , SQRT(float_number) AS f
    FROM df
'''

bc.sql(query)

Unnamed: 0,float_number,f
0,-6.88,
1,4.21,2.051828
2,4.71,2.170253
3,0.93,0.964365
4,9.26,3.043025
5,4.21,2.051828
6,3.01,1.734935
7,6.44,2.537716
8,-5.28,
9,8.28,2.877499


#### ABS

In [14]:
query = '''
    SELECT float_number
        , ABS(float_number) AS f
    FROM df
'''

bc.sql(query)

Unnamed: 0,float_number,f
0,-6.88,6.88
1,4.21,4.21
2,4.71,4.71
3,0.93,0.93
4,9.26,9.26
5,4.21,4.21
6,3.01,3.01
7,6.44,6.44
8,-5.28,5.28
9,8.28,8.28


#### NOT

In [15]:
query = '''
    SELECT float_number
        , NOT(float_number > 0) AS f
    FROM df
'''

bc.sql(query)

Unnamed: 0,float_number,f
0,-6.88,True
1,4.21,False
2,4.71,False
3,0.93,False
4,9.26,False
5,4.21,False
6,3.01,False
7,6.44,False
8,-5.28,True
9,8.28,False


#### LN

In [16]:
query = '''
    SELECT float_number
        , LN(float_number) AS f
    FROM df
'''

bc.sql(query)

Unnamed: 0,float_number,f
0,-6.88,
1,4.21,1.437463
2,4.71,1.549688
3,0.93,-0.072571
4,9.26,2.225704
5,4.21,1.437463
6,3.01,1.10194
7,6.44,1.862529
8,-5.28,
9,8.28,2.113843


#### LOG

In [17]:
query = '''
    SELECT float_number
        , LOG10(float_number) AS f
    FROM df
'''

bc.sql(query)

Unnamed: 0,float_number,f
0,-6.88,
1,4.21,0.624282
2,4.71,0.673021
3,0.93,-0.031517
4,9.26,0.966611
5,4.21,0.624282
6,3.01,0.478566
7,6.44,0.808886
8,-5.28,
9,8.28,0.91803


#### RAND

In [18]:
query = '''
    SELECT float_number
        , RAND() AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,float_number,r
0,-6.88,0.904132
1,4.21,0.145137
2,4.71,0.333669
3,0.93,0.759544
4,9.26,0.063746
5,4.21,0.276569
6,3.01,0.877131
7,6.44,0.203729
8,-5.28,0.654254
9,8.28,0.20353


#### ROUND

In [19]:
query = '''
    SELECT float_number
        , ROUND(float_number) AS f
    FROM df
'''

bc.sql(query)

Unnamed: 0,float_number,f
0,-6.88,-7.0
1,4.21,4.0
2,4.71,5.0
3,0.93,1.0
4,9.26,9.0
5,4.21,4.0
6,3.01,3.0
7,6.44,6.0
8,-5.28,-5.0
9,8.28,8.0


#### STDDEV

In [20]:
query = '''
    SELECT category
        , STDDEV(float_number) AS agg
    FROM df
    GROUP BY category
'''

bc.sql(query)

Unnamed: 0,category,agg
0,C,
1,B,3.824661205
2,D,6.266395296


#### STDDEV_POP

In [21]:
query = '''
    SELECT category
        , STDDEV_POP(float_number) AS agg
    FROM df
    GROUP BY category
'''

bc.sql(query)

Unnamed: 0,category,agg
0,C,0.0
1,B,3.122823
2,D,5.72041


#### STDDEV_SAMP

In [22]:
query = '''
    SELECT category
        , STDDEV_SAMP(float_number) AS agg
    FROM df
    GROUP BY category
'''

bc.sql(query)

Unnamed: 0,category,agg
0,C,
1,B,3.824661205
2,D,6.266395296


#### VARIANCE

In [23]:
query = '''
    SELECT category
        , VARIANCE(float_number) AS agg
    FROM df
    GROUP BY category
'''

bc.sql(query)

Unnamed: 0,category,agg
0,C,
1,B,14.62803333
2,D,39.26771


#### VAR_SAMP

In [24]:
query = '''
    SELECT category
        , VAR_SAMP(float_number) AS agg
    FROM df
    GROUP BY category
'''

bc.sql(query)

Unnamed: 0,category,agg
0,C,
1,B,14.62803333
2,D,39.26771


#### VAR_POP

In [25]:
query = '''
    SELECT category
        , VAR_POP(float_number) AS agg
    FROM df
    GROUP BY category
'''

bc.sql(query)

Unnamed: 0,category,agg
0,C,0.0
1,B,9.752022
2,D,32.723092
