# Demo of SQL Written In Jupyter Notebook and run with DuckDB

## Import Needed Packages and Configure the Notebook to allow for SQL Magic (%%sql)

In [None]:
import duckdb
import pandas as pd

%load_ext sql

%config SqlMagic.autopandas = True
%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False

%sql duckdb:///:default:
# %sql duckdb:///:memory:
# %sql duckdb:///path/to/file.db

## Read csv file into dataframe

In [None]:
%%sql

census << 
SELECT * 
FROM us2021census.csv

## Verify the type and data types of the variables/columns that the csv was stored into

In [None]:
type(census)

In [None]:
census.dtypes

## Inspect First Ten Rows of Dataframe

In [None]:
%%sql

SELECT *
FROM census
LIMIT 10

## Use of EXCLUDE With SELECT * (DuckDB feature)

In [None]:
%%sql

SELECT 
    * EXCLUDE (Latitude, Longitude)
FROM census
LIMIT 10

## Use of WHERE clause

In [None]:
%%sql

SELECT 
    City, 
    Population
FROM census
WHERE Population > 1000000

## Use of GROUP BY and ORDER BY

In [None]:
%%sql 

SELECT 
    State, 
    CAST(AVG(Population) AS INTEGER) AS AVG_Pop
FROM census
GROUP BY State
ORDER BY AVG_Pop DESC
LIMIT 10

## Use of GROUP BY ALL (DuckDB feature)

In [None]:
%%sql

SELECT 
    State, 
    Type,
    CAST(AVG(Population) AS INTEGER) AS AVG_Pop_By_State_Type
FROM census
GROUP BY ALL
ORDER BY AVG_Pop_By_State_Type DESC
LIMIT 10

## Random Sampling From Dataframe

In [None]:
%%sql
 
SELECT *
FROM census
USING SAMPLE 10

### Random Sampling with Reproducible Seed

In [None]:
%%sql

SELECT *
FROM census
USING SAMPLE 10% (system, 422)

## String Slicing (DuckDB feature)

In [None]:
%%sql

SELECT
    'BD-STEP FELLOWS ARE THE BEST OF HUMANITY'[:-21] AS sliced_string