# BlazingSQL Cheat Sheets sample code

(c) 2020 NVIDIA, Blazing SQL

Distributed under Apache License 2.0

### Imports

In [1]:
import cudf
import numpy as np
from blazingsql import BlazingContext

### Sample Data Table

In [2]:
df = cudf.DataFrame(
    [
          (39, -6.88, np.datetime64('2020-10-08T12:12:01'), 'C', 'D', 'data'
            , 'RAPIDS.ai is a suite of open-source libraries that allow you to run your end to end data science and analytics pipelines on GPUs.')
        , (11, 4.21, None,                                 'A', 'D', 'cuDF'
            , 'cuDF is a Python GPU DataFrame (built on the Apache Arrow columnar memory format)')
        , (31, 4.71, np.datetime64('2020-10-10T09:26:43'), 'U', 'D', 'memory'
            , 'cuDF allows for loading, joining, aggregating, filtering, and otherwise manipulating tabular data using a DataFrame style API.')
        , (40, 0.93, np.datetime64('2020-10-11T17:10:00'), 'P', 'B', 'tabular'
            , '''If your workflow is fast enough on a single GPU or your data comfortably fits in memory on 
                 a single GPU, you would want to use cuDF.''')
        , (33, 9.26, np.datetime64('2020-10-15T10:58:02'), 'O', 'D', 'parallel'
            , '''If you want to distribute your workflow across multiple GPUs or have more data than you can fit 
                 in memory on a single GPU you would want to use Dask-cuDF''')
        , (42, 4.21, np.datetime64('2020-10-01T10:02:23'), 'U', 'C', 'GPUs'
            , 'BlazingSQL provides a high-performance distributed SQL engine in Python')
        , (36, 3.01, np.datetime64('2020-09-30T14:36:26'), 'T', 'D', None
            , 'BlazingSQL is built on the RAPIDS GPU data science ecosystem')
        , (38, 6.44, np.datetime64('2020-10-10T08:34:36'), 'X', 'B', 'csv'
            , 'BlazingSQL lets you ETL raw data directly into GPU memory as a GPU DataFrame (GDF)')
        , (17, -5.28, np.datetime64('2020-10-09T08:34:40'), 'P', 'D', 'dataframes'
            , 'Dask is a flexible library for parallel computing in Python')
        , (10, 8.28, np.datetime64('2020-10-03T03:31:21'), 'W', 'B', 'python'
            , None)
    ]
    , columns = ['number', 'float_number', 'datetime', 'letter', 'category', 'word', 'string']
)

In [3]:
bc = BlazingContext()

BlazingContext ready


In [4]:
bc.create_table('df', df)

# SQL String Functions

#### CONCAT

In [5]:
query = '''
    SELECT string
        , 'INFO: ' || string AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,string,r
0,RAPIDS.ai is a suite of open-source libraries ...,INFO: RAPIDS.ai is a suite of open-source libr...
1,cuDF is a Python GPU DataFrame (built on the A...,INFO: cuDF is a Python GPU DataFrame (built on...
2,"cuDF allows for loading, joining, aggregating,...","INFO: cuDF allows for loading, joining, aggreg..."
3,If your workflow is fast enough on a single GP...,INFO: If your workflow is fast enough on a sin...
4,If you want to distribute your workflow across...,INFO: If you want to distribute your workflow ...
5,BlazingSQL provides a high-performance distrib...,INFO: BlazingSQL provides a high-performance d...
6,BlazingSQL is built on the RAPIDS GPU data sci...,INFO: BlazingSQL is built on the RAPIDS GPU da...
7,BlazingSQL lets you ETL raw data directly into...,INFO: BlazingSQL lets you ETL raw data directl...
8,Dask is a flexible library for parallel comput...,INFO: Dask is a flexible library for parallel ...
9,,


#### SUBSTRING

In [6]:
query = '''
    SELECT string
        , SUBSTRING(string, 0, 2) AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,string,r
0,RAPIDS.ai is a suite of open-source libraries ...,RA
1,cuDF is a Python GPU DataFrame (built on the A...,cu
2,"cuDF allows for loading, joining, aggregating,...",cu
3,If your workflow is fast enough on a single GP...,If
4,If you want to distribute your workflow across...,If
5,BlazingSQL provides a high-performance distrib...,Bl
6,BlazingSQL is built on the RAPIDS GPU data sci...,Bl
7,BlazingSQL lets you ETL raw data directly into...,Bl
8,Dask is a flexible library for parallel comput...,Da
9,,


#### CHAR_LENGTH

In [7]:
query = '''
    SELECT string
        , CHAR_LENGTH(string) AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,string,r
0,RAPIDS.ai is a suite of open-source libraries ...,129.0
1,cuDF is a Python GPU DataFrame (built on the A...,81.0
2,"cuDF allows for loading, joining, aggregating,...",126.0
3,If your workflow is fast enough on a single GP...,150.0
4,If you want to distribute your workflow across...,171.0
5,BlazingSQL provides a high-performance distrib...,71.0
6,BlazingSQL is built on the RAPIDS GPU data sci...,60.0
7,BlazingSQL lets you ETL raw data directly into...,82.0
8,Dask is a flexible library for parallel comput...,59.0
9,,


#### LEFT

In [8]:
query = '''
    SELECT string
        , LEFT(string, 3) AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,string,r
0,RAPIDS.ai is a suite of open-source libraries ...,RAP
1,cuDF is a Python GPU DataFrame (built on the A...,cuD
2,"cuDF allows for loading, joining, aggregating,...",cuD
3,If your workflow is fast enough on a single GP...,If
4,If you want to distribute your workflow across...,If
5,BlazingSQL provides a high-performance distrib...,Bla
6,BlazingSQL is built on the RAPIDS GPU data sci...,Bla
7,BlazingSQL lets you ETL raw data directly into...,Bla
8,Dask is a flexible library for parallel comput...,Das
9,,


#### RIGHT

In [9]:
query = '''
    SELECT string
        , RIGHT(string, 3) AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,string,r
0,RAPIDS.ai is a suite of open-source libraries ...,Us.
1,cuDF is a Python GPU DataFrame (built on the A...,at)
2,"cuDF allows for loading, joining, aggregating,...",PI.
3,If your workflow is fast enough on a single GP...,DF.
4,If you want to distribute your workflow across...,uDF
5,BlazingSQL provides a high-performance distrib...,hon
6,BlazingSQL is built on the RAPIDS GPU data sci...,tem
7,BlazingSQL lets you ETL raw data directly into...,DF)
8,Dask is a flexible library for parallel comput...,hon
9,,


#### LTRIM

In [10]:
query = '''
    SELECT string
        , LTRIM(string) AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,string,r
0,RAPIDS.ai is a suite of open-source libraries ...,RAPIDS.ai is a suite of open-source libraries ...
1,cuDF is a Python GPU DataFrame (built on the A...,cuDF is a Python GPU DataFrame (built on the A...
2,"cuDF allows for loading, joining, aggregating,...","cuDF allows for loading, joining, aggregating,..."
3,If your workflow is fast enough on a single GP...,If your workflow is fast enough on a single GP...
4,If you want to distribute your workflow across...,If you want to distribute your workflow across...
5,BlazingSQL provides a high-performance distrib...,BlazingSQL provides a high-performance distrib...
6,BlazingSQL is built on the RAPIDS GPU data sci...,BlazingSQL is built on the RAPIDS GPU data sci...
7,BlazingSQL lets you ETL raw data directly into...,BlazingSQL lets you ETL raw data directly into...
8,Dask is a flexible library for parallel comput...,Dask is a flexible library for parallel comput...
9,,


#### RTRIM

In [11]:
query = '''
    SELECT string
        , RTRIM(string) AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,string,r
0,RAPIDS.ai is a suite of open-source libraries ...,RAPIDS.ai is a suite of open-source libraries ...
1,cuDF is a Python GPU DataFrame (built on the A...,cuDF is a Python GPU DataFrame (built on the A...
2,"cuDF allows for loading, joining, aggregating,...","cuDF allows for loading, joining, aggregating,..."
3,If your workflow is fast enough on a single GP...,If your workflow is fast enough on a single GP...
4,If you want to distribute your workflow across...,If you want to distribute your workflow across...
5,BlazingSQL provides a high-performance distrib...,BlazingSQL provides a high-performance distrib...
6,BlazingSQL is built on the RAPIDS GPU data sci...,BlazingSQL is built on the RAPIDS GPU data sci...
7,BlazingSQL lets you ETL raw data directly into...,BlazingSQL lets you ETL raw data directly into...
8,Dask is a flexible library for parallel comput...,Dask is a flexible library for parallel comput...
9,,


#### REPLACE

In [12]:
query = '''
    SELECT string
        , REPLACE(string, 'RAPIDS', 'NVIDIA RAPIDS') AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,string,r
0,RAPIDS.ai is a suite of open-source libraries ...,NVIDIA RAPIDS.ai is a suite of open-source lib...
1,cuDF is a Python GPU DataFrame (built on the A...,cuDF is a Python GPU DataFrame (built on the A...
2,"cuDF allows for loading, joining, aggregating,...","cuDF allows for loading, joining, aggregating,..."
3,If your workflow is fast enough on a single GP...,If your workflow is fast enough on a single GP...
4,If you want to distribute your workflow across...,If you want to distribute your workflow across...
5,BlazingSQL provides a high-performance distrib...,BlazingSQL provides a high-performance distrib...
6,BlazingSQL is built on the RAPIDS GPU data sci...,BlazingSQL is built on the NVIDIA RAPIDS GPU d...
7,BlazingSQL lets you ETL raw data directly into...,BlazingSQL lets you ETL raw data directly into...
8,Dask is a flexible library for parallel comput...,Dask is a flexible library for parallel comput...
9,,


#### UPPER

In [13]:
query = '''
    SELECT string
        , UPPER(string) AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,string,r
0,RAPIDS.ai is a suite of open-source libraries ...,RAPIDS.AI IS A SUITE OF OPEN-SOURCE LIBRARIES ...
1,cuDF is a Python GPU DataFrame (built on the A...,CUDF IS A PYTHON GPU DATAFRAME (BUILT ON THE A...
2,"cuDF allows for loading, joining, aggregating,...","CUDF ALLOWS FOR LOADING, JOINING, AGGREGATING,..."
3,If your workflow is fast enough on a single GP...,IF YOUR WORKFLOW IS FAST ENOUGH ON A SINGLE GP...
4,If you want to distribute your workflow across...,IF YOU WANT TO DISTRIBUTE YOUR WORKFLOW ACROSS...
5,BlazingSQL provides a high-performance distrib...,BLAZINGSQL PROVIDES A HIGH-PERFORMANCE DISTRIB...
6,BlazingSQL is built on the RAPIDS GPU data sci...,BLAZINGSQL IS BUILT ON THE RAPIDS GPU DATA SCI...
7,BlazingSQL lets you ETL raw data directly into...,BLAZINGSQL LETS YOU ETL RAW DATA DIRECTLY INTO...
8,Dask is a flexible library for parallel comput...,DASK IS A FLEXIBLE LIBRARY FOR PARALLEL COMPUT...
9,,


#### LOWER

In [14]:
query = '''
    SELECT string
        , LOWER(string) AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,string,r
0,RAPIDS.ai is a suite of open-source libraries ...,rapids.ai is a suite of open-source libraries ...
1,cuDF is a Python GPU DataFrame (built on the A...,cudf is a python gpu dataframe (built on the a...
2,"cuDF allows for loading, joining, aggregating,...","cudf allows for loading, joining, aggregating,..."
3,If your workflow is fast enough on a single GP...,if your workflow is fast enough on a single gp...
4,If you want to distribute your workflow across...,if you want to distribute your workflow across...
5,BlazingSQL provides a high-performance distrib...,blazingsql provides a high-performance distrib...
6,BlazingSQL is built on the RAPIDS GPU data sci...,blazingsql is built on the rapids gpu data sci...
7,BlazingSQL lets you ETL raw data directly into...,blazingsql lets you etl raw data directly into...
8,Dask is a flexible library for parallel comput...,dask is a flexible library for parallel comput...
9,,


#### REVERSE

In [15]:
query = '''
    SELECT string
        , REVERSE(string) AS r
    FROM df
'''

bc.sql(query)

Unnamed: 0,string,r
0,RAPIDS.ai is a suite of open-source libraries ...,.sUPG no senilepip scitylana dna ecneics atad ...
1,cuDF is a Python GPU DataFrame (built on the A...,)tamrof yromem ranmuloc worrA ehcapA eht no tl...
2,"cuDF allows for loading, joining, aggregating,...",.IPA elyts emarFataD a gnisu atad ralubat gnit...
3,If your workflow is fast enough on a single GP...,".FDuc esu ot tnaw dluow uoy ,UPG elgnis a ..."
4,If you want to distribute your workflow across...,FDuc-ksaD esu ot tnaw dluow uoy UPG elgnis a n...
5,BlazingSQL provides a high-performance distrib...,nohtyP ni enigne LQS detubirtsid ecnamrofrep-h...
6,BlazingSQL is built on the RAPIDS GPU data sci...,metsysoce ecneics atad UPG SDIPAR eht no tliub...
7,BlazingSQL lets you ETL raw data directly into...,)FDG( emarFataD UPG a sa yromem UPG otni yltce...
8,Dask is a flexible library for parallel comput...,nohtyP ni gnitupmoc lellarap rof yrarbil elbix...
9,,
