# CSV

In [1]:
from blazingsql import BlazingContext

# Start up BlazingSQL
bc = BlazingContext()

# Register S3 bucket
bc.s3('bsql', bucket_name = 'bsql')

# Create table from CSV
bc.create_table('taxi', 's3://bsql/data/samples/nytaxi.csv')

# Query table (Results return as cuDF DataFrame)
df = bc.sql('''
    SELECT payment_type
        , COUNT(*) AS cnt 
    FROM taxi 
    GROUP BY payment_type
    ORDER BY COUNT(*) DESC
''')

# Display query results 
print(df)

BlazingContext ready
   payment_type     cnt
0             1  642282
1             2  352976
2             3    3536
3             4    1206


# ORC

In [2]:
from blazingsql import BlazingContext

# Start up BlazingSQL
bc = BlazingContext()

# Register S3 bucket
bc.s3('bsql', bucket_name = 'bsql')

# Create table from ORC
bc.create_table('taxi', 's3://bsql/data/samples/nytaxi.orc')

# Query table (Results return as cuDF DataFrame)
df = bc.sql('''
    SELECT payment_type
        , COUNT(*) AS cnt 
    FROM taxi 
    GROUP BY payment_type
    ORDER BY COUNT(*) DESC
''')

# Display query results 
print(df)

BlazingContext ready
   payment_type     cnt
0             1  642282
1             2  352976
2             3    3536
3             4    1206


# Parquet

In [3]:
from blazingsql import BlazingContext

# Start up BlazingSQL
bc = BlazingContext()

# Register S3 bucket
bc.s3('bsql', bucket_name = 'bsql')

# Create table from Parquet
bc.create_table('taxi', 's3://bsql/data/samples/nytaxi.parquet')

# Query table (Results return as cuDF DataFrame)
df = bc.sql('''
    SELECT payment_type
        , COUNT(*) AS cnt 
    FROM taxi 
    GROUP BY payment_type
    ORDER BY COUNT(*) DESC
''')

# Display query results 
print(df)

BlazingContext ready
   payment_type     cnt
0             1  642282
1             2  352976
2             3    3536
3             4    1206


# cuDF DataFrame

In [4]:
from blazingsql import BlazingContext
import cudf

# Start up BlazingSQL
bc = BlazingContext()

# Register S3 bucket
df_read = cudf.read_parquet(
    's3://bsql/data/samples/nytaxi.parquet'
    , storage_options={'anon': True}
)

# Create table from cuDF DataFrame
bc.create_table('taxi', df_read)

# Query table (Results return as cuDF DataFrame)
df = bc.sql('''
    SELECT payment_type
        , COUNT(*) AS cnt 
    FROM taxi 
    GROUP BY payment_type
    ORDER BY COUNT(*) DESC
''')

# Display query results 
print(df)

BlazingContext ready
   payment_type     cnt
0             1  642282
1             2  352976
2             3    3536
3             4    1206


# pandas DataFrame

In [5]:
from blazingsql import BlazingContext
import pandas as pd

# Start up BlazingSQL
bc = BlazingContext()

# Read data into pandas DataFrame
df_read = pd.read_csv('s3://bsql/data/samples/nytaxi.csv')

# Create table from pandas DataFrame
bc.create_table('taxi', df_read)

# Query table (Results return as cuDF DataFrame)
df = bc.sql('''
    SELECT payment_type
        , COUNT(*) AS cnt 
    FROM taxi 
    GROUP BY payment_type
    ORDER BY COUNT(*) DESC
''')

# Display query results 
print(df)

BlazingContext ready
   payment_type     cnt
0             1  642282
1             2  352976
2             3    3536
3             4    1206


# Multiple formats

In [7]:
from blazingsql import BlazingContext

# Start up BlazingSQL
bc = BlazingContext()

# Register S3 bucket
bc.s3('bsql', bucket_name = 'bsql')

# Create table from multiple sources
bc.create_table('taxi', 's3://bsql/data/samples/nytaxi.parquet')
bc.create_table('vendors', 's3://bsql/data/samples/vendors.csv')

# Query table (Results return as cuDF DataFrame)
df = bc.sql('''
    SELECT A.VendorID
        , B.VendorType
        , A.payment_type
        , COUNT(*) AS cnt 
    FROM taxi AS A
    LEFT OUTER JOIN vendors AS B
        ON A.VendorID = B.VendorID
    GROUP BY A.VendorID
        , B.VendorType
        , A.payment_type
    ORDER BY COUNT(*) DESC
''')

# Display query results 
print(df)

BlazingContext ready
   VendorID VendorType  payment_type     cnt
0         2   Vendor 2             1  362171
1         1   Vendor 1             1  280111
2         2   Vendor 2             2  199006
3         1   Vendor 1             2  153970
4         1   Vendor 1             3    3346
5         1   Vendor 1             4    1061
6         2   Vendor 2             3     190
7         2   Vendor 2             4     145
