<a id="CSV"></a>
# CSV

In [None]:
from blazingsql import BlazingContext

# Start up BlazingSQL
bc = BlazingContext()

# Register S3 bucket
bc.s3('bsql', bucket_name = 'bsql')

# Create table from CSV
bc.create_table('taxi', 's3://bsql/data/samples/nytaxi.csv')

# Query table (Results return as cuDF DataFrame)
df = bc.sql('''
    SELECT payment_type
        , COUNT(*) AS cnt 
    FROM taxi 
    GROUP BY payment_type
    ORDER BY COUNT(*) DESC
''')

# Display query results 
print(df)

<a id="ORC"></a>
# ORC

In [None]:
from blazingsql import BlazingContext

# Start up BlazingSQL
bc = BlazingContext()

# Register S3 bucket
bc.s3('bsql', bucket_name = 'bsql')

# Create table from ORC
bc.create_table('taxi', 's3://bsql/data/samples/nytaxi.orc')

# Query table (Results return as cuDF DataFrame)
df = bc.sql('''
    SELECT payment_type
        , COUNT(*) AS cnt 
    FROM taxi 
    GROUP BY payment_type
    ORDER BY COUNT(*) DESC
''')

# Display query results 
print(df)

<a id="Parquet"></a>
# Parquet

In [None]:
from blazingsql import BlazingContext

# Start up BlazingSQL
bc = BlazingContext()

# Register S3 bucket
bc.s3('bsql', bucket_name = 'bsql')

# Create table from Parquet
bc.create_table('taxi', 's3://bsql/data/samples/nytaxi.parquet')

# Query table (Results return as cuDF DataFrame)
df = bc.sql('''
    SELECT payment_type
        , COUNT(*) AS cnt 
    FROM taxi 
    GROUP BY payment_type
    ORDER BY COUNT(*) DESC
''')

# Display query results 
print(df)

<a id="cudf"></a>
# cuDF DataFrame

In [None]:
from blazingsql import BlazingContext
import cudf

# Start up BlazingSQL
bc = BlazingContext()

# Register S3 bucket
df_read = cudf.read_parquet(
    's3://bsql/data/samples/nytaxi.parquet'
    , storage_options={'anon': True}
)

# Create table from cuDF DataFrame
bc.create_table('taxi', df_read)

# Query table (Results return as cuDF DataFrame)
df = bc.sql('''
    SELECT payment_type
        , COUNT(*) AS cnt 
    FROM taxi 
    GROUP BY payment_type
    ORDER BY COUNT(*) DESC
''')

# Display query results 
print(df)

<a id="pandas"></a>
# pandas DataFrame

In [None]:
from blazingsql import BlazingContext
import pandas as pd

# Start up BlazingSQL
bc = BlazingContext()

# Read data into pandas DataFrame
df_read = pd.read_csv('s3://bsql/data/samples/nytaxi.csv')

# Create table from pandas DataFrame
bc.create_table('taxi', df_read)

# Query table (Results return as cuDF DataFrame)
df = bc.sql('''
    SELECT payment_type
        , COUNT(*) AS cnt 
    FROM taxi 
    GROUP BY payment_type
    ORDER BY COUNT(*) DESC
''')

# Display query results 
print(df)

<a id="multiple"></a>
# Multiple formats

In [None]:
from blazingsql import BlazingContext

# Start up BlazingSQL
bc = BlazingContext()

# Register S3 bucket
bc.s3('bsql', bucket_name = 'bsql')

# Create table from multiple sources
bc.create_table('taxi', 's3://bsql/data/samples/nytaxi.parquet')
bc.create_table('vendors', 's3://bsql/data/samples/vendors.csv')

# Query table (Results return as cuDF DataFrame)
df = bc.sql('''
    SELECT A.VendorID
        , B.VendorType
        , A.payment_type
        , COUNT(*) AS cnt 
    FROM taxi AS A
    LEFT OUTER JOIN vendors AS B
        ON A.VendorID = B.VendorID
    GROUP BY A.VendorID
        , B.VendorType
        , A.payment_type
    ORDER BY COUNT(*) DESC
''')

# Display query results 
print(df)