# SQL Reference

## General SQL
[Docs](https://docs.blazingdb.com/docs/queries)

### CASE
[Docs](https://docs.blazingdb.com/docs/queries#case)

```sql
SELECT CASE WHEN column_A > 5 THEN 1 ELSE 0 END FROM table_A
```

#### CASE -- Single Filter

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi', '../../../data/sample_taxi.parquet')

In [None]:
query = '''
        select 
            CASE 
                WHEN fare_amount > 20 THEN 1 
                ELSE 0 END
        from 
            taxi
            '''
bc.sql(query)

#### CASE -- Multiple Filters

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi', '../../../data/sample_taxi.parquet')

In [None]:
query = '''
        select 
            CASE 
                WHEN fare_amount > 20 THEN 1 
                ELSE 0 END, 
            CASE 
                WHEN tpep_pickup_datetime like '%-01-01%' THEN 1 
                ELSE 0 END
        from
            taxi
            '''
bc.sql(query)

### CAST
[Docs](https://docs.blazingdb.com/docs/queries#cast)

```sql
SELECT CAST(column_A AS type) FROM table_A
```

#### CAST -- Single Filter

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi', '../../../data/sample_taxi.parquet')

In [None]:
query = '''
        select 
            CAST(fare_amount AS int) 
        from 
            taxi
            '''
bc.sql(query)

#### CAST -- Multiple Filters

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi', '../../../data/sample_taxi.parquet')

In [None]:
query = '''
        select 
            CAST(passenger_count AS float) passenger_count_float, 
            CAST(fare_amount AS int) fare_amount_int
        from
            taxi
            '''
bc.sql(query)

#### CAST after WHERE

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi', '../../../data/sample_taxi.parquet')

In [None]:
query = '''
        select
            *
        from
            taxi
            WHERE
                DAYOFMONTH(tpep_pickup_datetime) <= 7
                '''
bc.sql(query)

### LIKE
[Docs](https://docs.blazingdb.com/docs/queries#like)

```sql
SELECT * FROM table_A WHERE column_A LIKE '%foo%'
```

#### LIKE -- Single Filter

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi', '../../../data/sample_taxi.parquet')

In [None]:
bc.sql("select * from taxi WHERE tpep_pickup_datetime LIKE '%15-01%'")

#### LIKE -- Multiple Filters (AND)

In [2]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi', '../../../data/sample_taxi.parquet')

BlazingContext ready


In [3]:
query = '''
        select    
            * 
        from 
            taxi 
            WHERE 
                tpep_pickup_datetime LIKE '%01-01%' 
                AND tpep_dropoff_datetime LIKE '%01-01%'
                '''
bc.sql(query)

Unnamed: 0,VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,RatecodeID,store_and_fwd_flag,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount,improvement_surcharge,total_amount,pickup_x,pickup_y,dropoff_x,dropoff_y
0,2,2015-01-01 01:08:55,2015-01-01 01:19:20,5,0.96,1,N,2,8.0,0.5,0.5,0.00,0.0,0.3,9.30,-8237097.917,66.731119,-8235826.514,66.725753
1,2,2015-01-01 01:08:55,2015-01-01 01:20:15,1,1.71,1,N,1,9.5,0.5,0.5,2.50,0.0,0.3,13.30,-8232881.990,66.720635,-8235900.404,66.741845
2,2,2015-01-01 01:08:55,2015-01-01 01:28:44,6,5.53,1,N,1,19.5,0.5,0.5,4.00,0.0,0.3,24.80,-8239467.465,66.606994,-8235059.596,66.687985
3,2,2015-01-01 01:08:55,2015-01-01 01:50:14,2,7.07,1,N,2,30.0,0.5,0.5,0.00,0.0,0.3,31.30,-8234884.641,66.772064,-8238804.161,66.652565
4,1,2015-01-01 01:08:56,2015-01-01 01:19:50,5,2.30,1,N,1,10.0,0.5,0.5,3.00,0.0,0.0,14.30,-8231657.300,66.775891,-8233149.520,66.724310
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21023,1,2015-01-01 06:13:41,2015-01-01 06:43:37,1,8.50,1,N,1,28.2,0.0,0.5,7.99,0.0,0.0,36.99,-8238221.542,66.674477,-8231559.630,66.819520
21024,1,2015-01-01 06:13:41,2015-01-01 06:24:25,1,2.70,1,N,2,11.0,0.0,0.5,0.00,0.0,0.0,11.80,-8236566.256,66.568525,-8232161.784,66.562752
21025,1,2015-01-01 06:13:41,2015-01-01 06:17:48,1,1.00,1,N,2,5.5,0.0,0.5,0.00,0.0,0.0,6.30,-8236344.588,66.720838,-8236265.603,66.739447
21026,2,2015-01-01 06:13:41,2015-01-01 06:24:22,1,2.09,1,N,1,9.5,0.0,0.5,1.90,0.0,0.3,12.20,-8236768.388,66.636942,-8238228.336,66.674485


#### LIKE -- Multiple Filters (OR)

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi', '../../../data/sample_taxi.parquet')

In [None]:
query = '''
        select 
            * 
        from 
            taxi 
        WHERE 
            tpep_pickup_datetime LIKE '%01-01%' 
            OR tpep_dropoff_datetime LIKE '%01-01%'
            '''
bc.sql(query)

#### LIKE -- Multiple Filters (AND/OR)

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi', '../../../data/sample_taxi.parquet')

In [None]:
query = '''
        select 
            * 
        from 
            taxi 
        WHERE 
            (tpep_pickup_datetime LIKE '%01-01%' OR tpep_dropoff_datetime LIKE '%01-01%') 
            AND dropoff_x LIKE '-822%'
            '''
bc.sql(query)

### GROUP BY
[Docs](https://docs.blazingdb.com/docs/queries#group-by)

Groups result set of a query by one or more columns.

```sql
SELECT * FROM table_a GROUP BY column_a
```

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi', '../../../data/sample_taxi.parquet')

In [None]:
bc.sql('select passenger_count from taxi GROUP BY passenger_count')

In [None]:
query = '''
        select
            passenger_count,
            VendorID
        from
            taxi
        GROUP BY
            passenger_count, 
            VendorID
            '''
bc.sql(query)

#### SUM

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi', '../../../data/sample_taxi.parquet')

In [None]:
# --SUM without GROUP BY
bc.sql('select SUM(fare_amount) from taxi')

In [None]:
# --SUM with GROUP BY
bc.sql('select SUM(fare_amount) from taxi GROUP BY passenger_count')

#### AVG

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi', '../../../data/sample_taxi.parquet')

In [None]:
# --AVG without GROUP BY
bc.sql('select AVG(fare_amount) from taxi')

In [None]:
# --AVG with GROUP BY
bc.sql('select AVG(fare_amount) from taxi GROUP BY passenger_count')

#### MIN

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi', '../../../data/sample_taxi.parquet')

In [None]:
# --MIN without GROUP BY
bc.sql('select MIN(fare_amount) FROM taxi')

In [None]:
# --MIN with GROUP BY
bc.sql('select MIN(fare_amount) from taxi GROUP BY passenger_count')

#### MAX

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi', '../../../data/sample_taxi.parquet')

In [None]:
# --MAX without GROUP BY
bc.sql('select MAX(fare_amount) FROM taxi')

In [None]:
# --MAX with GROUP BY
bc.sql('select MAX(fare_amount) from taxi GROUP BY passenger_count')

#### COUNT

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi', '../../../data/sample_taxi.parquet')

In [None]:
# --COUNT without GROUP BY
bc.sql('select COUNT(fare_amount) from taxi')

In [None]:
# --COUNT with GROUP BY
bc.sql('select COUNT(fare_amount) FROM taxi GROUP BY passenger_count')

#### DISTINCT

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi', '../../../data/sample_taxi.parquet')

In [None]:
# --DISTINCT without GROUP BY
bc.sql('select DISTINCT(passenger_count) from taxi')

In [None]:
# --DISTINCT with GROUP BY
bc.sql('select DISTINCT(passenger_count) from taxi GROUP BY passenger_count')

### JOIN
[Docs](https://docs.blazingdb.com/docs/queries#join)

Combine rows from two or more tables, based on a related column or columns between them.

#### INNER JOIN
[Docs](https://docs.blazingdb.com/docs/queries#inner)

```sql
SELECT table_A.column_A, table_B.column_A, table_B.column_C 
FROM table_A 
INNER JOIN table_B 
ON (table_A.column_A = table_B.column_B)
```

#### INNER JOIN SINGLE COLUMN

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi_a', '../../../data/sample_taxi.parquet')
bc.create_table('taxi_b', '../../../data/sample_taxi.csv', header=0)

In [None]:
query = '''
        select 
            A.tpep_pickup_datetime, A.VendorID,
            B.passenger_count, B.trip_distance, B.fare_amount
        from 
            taxi_a as A
        INNER JOIN 
            taxi_b as B
            ON A.tpep_pickup_datetime = B.tpep_pickup_datetime
            '''
bc.sql(query)

#### INNER JOIN MULTI-COLUMN

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi_a', '../../../data/sample_taxi.parquet')
bc.create_table('taxi_b', '../../../data/sample_taxi.csv', header=0)

In [None]:
query = '''
        select 
            A.tpep_pickup_datetime, A.VendorID,
            B.passenger_count, B.trip_distance, B.fare_amount
        from 
            taxi_a as A
        INNER JOIN 
            taxi_b as B
            ON A.tpep_pickup_datetime = B.tpep_pickup_datetime
            AND A.VendorID = B.VendorID
            '''
bc.sql(query)

#### FULL OUTER JOIN
[Docs](https://docs.blazingdb.com/docs/queries#full-outer)

```sql
SELECT table_A.column_A, table_B.column_A, table_B.column_C 
FROM table_A 
FULL OUTER JOIN table_B 
ON (table_A.column_A = table_B.column_B)
```

#### FULL OUTER JOIN SINGLE COLUMN

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi_a', '../../../data/sample_taxi.parquet')
bc.create_table('taxi_b', '../../../data/sample_taxi.csv', header=0)

In [None]:
query = '''
        select 
            taxi_a.tpep_pickup_datetime, 
            taxi_b.passenger_count, 
            taxi_b.fare_amount 
        from 
            taxi_a 
        FULL OUTER JOIN 
            taxi_b 
            ON taxi_a.tpep_dropoff_datetime = taxi_b.tpep_dropoff_datetime
            '''
bc.sql(query)

#### FULL OUTER JOIN MULTI-COLUMN

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi_a', '../../../data/sample_taxi.parquet')
bc.create_table('taxi_b', '../../../data/sample_taxi.csv', header=0)

In [None]:
query = '''
        select 
            taxi_a.tpep_pickup_datetime, 
            taxi_b.passenger_count, 
            taxi_b.fare_amount 
        from 
            taxi_a 
        FULL OUTER JOIN 
            taxi_b 
            ON taxi_a.tpep_dropoff_datetime = taxi_b.tpep_dropoff_datetime AND taxi_a.tpep_pickup_datetime = taxi_b.tpep_pickup_datetime
            '''
bc.sql(query)

#### LEFT OUTER JOIN
[Docs](https://docs.blazingdb.com/docs/queries#left-outer)

```sql
SELECT table_A.column_A, table_B.column_A, table_B.column_C 
FROM table_A 
LEFT JOIN table_B 
ON (table_A.column_A = table_B.column_B)
```

#### LEFT JOIN SINGLE COLUMN

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi_a', '../../../data/sample_taxi.parquet')
bc.create_table('taxi_b', '../../../data/sample_taxi.csv', header=0)

In [None]:
query = '''
        select 
            taxi_a.tpep_pickup_datetime, 
            taxi_b.passenger_count, 
            taxi_b.fare_amount 
        from 
            taxi_a 
        LEFT JOIN 
            taxi_b 
            ON taxi_a.tpep_dropoff_datetime = taxi_b.tpep_dropoff_datetime
            '''
bc.sql(query)

#### LEFT JOIN MULTI-COLUMN

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi_a', '../../../data/sample_taxi.parquet')
bc.create_table('taxi_b', '../../../data/sample_taxi.csv', header=0)

In [None]:
query = '''
        select 
            taxi_a.tpep_pickup_datetime, 
            taxi_b.passenger_count, 
            taxi_b.fare_amount 
        from 
            taxi_a 
        LEFT JOIN 
            taxi_b 
            ON taxi_a.tpep_dropoff_datetime = taxi_b.tpep_dropoff_datetime AND taxi_a.tpep_pickup_datetime = taxi_b.tpep_pickup_datetime
            '''
bc.sql(query)

#### CROSS JOIN
[Docs](https://docs.blazingdb.com/docs/queries#cross-join)

```sql
SELECT table_A.column_A, table_B.column_A, table_B.column_C 
FROM table_A 
CROSS JOIN table_B
```

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('iris_a', '../../../data/iris.orc')
bc.create_table('iris_b', '../../../data/iris.csv', header=0)

In [None]:
query = '''
        select 
            iris_a.sepal_length, 
            iris_b.sepal_length
        from 
            iris_a 
        CROSS JOIN 
            iris_b             
            '''
bc.sql(query)

### ORDER BY
[Docs](https://docs.blazingdb.com/docs/queries#order-by)

```sql
SELECT column_A, column_B FROM table_A ORDER BY column_A
```

#### ORDER BY without Specifying (defaults to Ascending)

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi', '../../../data/sample_taxi.parquet')

In [None]:
bc.sql('select passenger_count, fare_amount FROM taxi ORDER BY trip_distance')

#### ORDER BY Specifying Ascending

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi', '../../../data/sample_taxi.parquet')

In [None]:
bc.sql('select passenger_count, fare_amount from taxi ORDER BY trip_distance ASC')

#### ORDER BY Specifying Descending

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi', '../../../data/sample_taxi.parquet')

In [None]:
bc.sql('select passenger_count, fare_amount from taxi ORDER BY trip_distance DESC')

### Nested Queries
[Docs](https://docs.blazingdb.com/docs/queries#nested-queries)

A query nested inside another query that uses values from the outer query may appear elsewhere. 

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi', '../../../data/sample_taxi.parquet')

In [None]:
query = '''
        select 
            pickup_time, passenger_count, trip_distance, cost 
        from
            (
            SELECT 
                *,
                tpep_pickup_datetime AS pickup_time, 
                total_amount - tip_amount AS cost 
            FROM 
                taxi 
                WHERE
                    passenger_count <> 1
            )
            '''
bc.sql(query)

In [None]:
query = '''
        select 
            year(pickup_time) as pickup_year, month(pickup_time) as pickup_month, dayofmonth(pickup_time) as pickup_day,
            passenger_count, trip_distance, cost 
        from
            (
            SELECT 
                *,
                tpep_pickup_datetime AS pickup_time, 
                total_amount - tip_amount AS cost 
            FROM 
                taxi 
                WHERE
                    passenger_count <> 1
            )
            '''
bc.sql(query)

### SELECT
[Docs](https://docs.blazingdb.com/docs/queries#select)

```sql
SELECT * FROM table_a
```

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi', '../../../data/sample_taxi.parquet')

In [None]:
bc.sql('SELECT * from taxi')

In [None]:
bc.sql('SELECT passenger_count, trip_distance, fare_amount from taxi')

### WHERE
[Docs](https://docs.blazingdb.com/docs/queries#where) | [BlazingSQL Notebooks](https://app.blazingsql.com/jupyter/user-redirect/lab/workspaces/auto-b/tree/Welcome_to_BlazingSQL_Notebooks/docs/blazingsql.ipynb#WHERE)

Query data and filter rows with a condition(s).

```sql
SELECT * FROM table_a WHERE condition_a
```

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi', '../../../data/sample_taxi.parquet')

In [None]:
bc.sql('select * from taxi WHERE passenger_count = 2')

#### WHERE -- Multiple Filters (AND)

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi', '../../../data/sample_taxi.parquet')

In [None]:
bc.sql('select * from taxi WHERE passenger_count > 2 AND trip_distance < 10')

#### WHERE -- Multiple Filters (OR)

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi', '../../../data/sample_taxi.parquet')

In [None]:
bc.sql('select * from taxi WHERE passenger_count >= 2 OR trip_distance <= 10')

#### WHERE -- Multiple Filters (AND/OR)

In [None]:
from blazingsql import BlazingContext
bc = BlazingContext()
bc.create_table('taxi', '../../../data/sample_taxi.parquet')

In [None]:
query = ''' 
        select 
            * 
        from 
            taxi 
                WHERE 
                    (passenger_count <> 2 OR fare_amount > 100) 
                    AND trip_distance >= 10
                    '''
bc.sql(query)

# BlazingSQL Docs
**[Table of Contents](../TABLE_OF_CONTENTS.ipynb) | [Issues (GitHub)](https://github.com/BlazingDB/Welcome_to_BlazingSQL_Notebooks/issues)**