<a href="https://colab.research.google.com/github/JonasWetzel94/google_collab_sql/blob/main/SQL_date_time_functions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<a target="_blank" href="https://colab.research.google.com/github/lukebarousse/Int_SQL_Data_Analytics_Course/blob/main/Resources/Blank_SQL_Notebook.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# Blank SQL Notebook

#### Import Libraries & Database

In [None]:
import sys
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# If running in Google Colab, install PostgreSQL and restore the database
if 'google.colab' in sys.modules:
    # Install PostgreSQL
    !sudo apt-get install postgresql -qq > /dev/null 2>&1

    # Start PostgreSQL service (suppress output)
    !sudo service postgresql start > /dev/null 2>&1

    # Set password for the 'postgres' user to avoid authentication errors (suppress output)
    !sudo -u postgres psql -c "ALTER USER postgres WITH PASSWORD 'password';" > /dev/null 2>&1

    # Create the 'colab_db' database (suppress output)
    !sudo -u postgres psql -c "CREATE DATABASE contoso_100k;" > /dev/null 2>&1

    # Download the PostgreSQL .sql dump
    !wget -q -O contoso_100k.sql https://github.com/lukebarousse/Int_SQL_Data_Analytics_Course/releases/download/v.0.0.0/contoso_100k.sql

    # Restore the dump file into the PostgreSQL database (suppress output)
    !sudo -u postgres psql contoso_100k < contoso_100k.sql > /dev/null 2>&1

    # Shift libraries from ipython-sql to jupysql
    !pip uninstall -y ipython-sql > /dev/null 2>&1
    !pip install jupysql > /dev/null 2>&1

# Load the sql extension for SQL magic
%load_ext sql

# Connect to the PostgreSQL database
%sql postgresql://postgres:password@localhost:5432/contoso_100k

# Enable automatic conversion of SQL results to pandas DataFrames
%config SqlMagic.autopandas = True

# Disable named parameters for SQL magic
%config SqlMagic.named_parameters = "disabled"

# Display pandas number to two decimal places
pd.options.display.float_format = '{:.2f}'.format

In [None]:
%%sql
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'public';


Unnamed: 0,table_name
0,currencyexchange
1,customer
2,sales
3,date
4,product
5,store


In [None]:
%%sql
SELECT * FROM sales LIMIT 2;

Unnamed: 0,orderkey,linenumber,orderdate,deliverydate,customerkey,storekey,productkey,quantity,unitprice,netprice,unitcost,currencycode,exchangerate
0,1000,0,2015-01-01,2015-01-01,947009,400,48,1,112.46,98.97,57.34,GBP,0.64
1,1000,1,2015-01-01,2015-01-01,947009,400,460,1,749.75,659.78,382.25,GBP,0.64


In [None]:
%%sql
SELECT
    DATE_TRUNC('quarter', orderdate):: date quarter,
    SUM(quantity) total_quantity
FROM sales
GROUP BY DATE_TRUNC('quarter', orderdate)
LIMIT 5;


Unnamed: 0,quarter,total_quantity
0,2015-01-01,4493
1,2015-04-01,4071
2,2015-07-01,5766
3,2015-10-01,7261
4,2016-01-01,7158


In [None]:
%config SqlMagic.named_parameters="enabled"

In [None]:
%%sql
SELECT
    TO_CHAR(orderdate, 'WW-YYYY') week,
    SUM(netprice * quantity * exchangerate) net_revenue
FROM sales
WHERE orderdate BETWEEN '2023-01-01' AND '2023-12-31'
GROUP BY TO_CHAR(orderdate, 'WW-YYYY')
ORDER BY week
LIMIT 5;

Unnamed: 0,week,net_revenue
0,01-2023,1118860.15
1,02-2023,773467.25
2,03-2023,797088.74
3,04-2023,782617.25
4,05-2023,717966.27


In [None]:
%%sql
SELECT TO_CHAR(orderdate, 'WW-YYYY') week,
       PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY quantity) AS median_quantity
FROM sales
WHERE TO_CHAR(orderdate, 'YYYY') = '2023'
GROUP BY TO_CHAR(orderdate, 'WW-YYYY')
LIMIT 5;

Unnamed: 0,week,median_quantity
0,01-2023,3.0
1,02-2023,2.0
2,03-2023,3.0
3,04-2023,3.0
4,05-2023,2.0


In [None]:
%%sql
SELECT orderkey,
       EXTRACT(DECADE FROM orderdate) order_decade,
       EXTRACT(YEAR FROM orderdate) order_year,
       EXTRACT(QUARTER FROM orderdate) order_quarter,
       EXTRACT(MONTH FROM orderdate) order_month,
       EXTRACT(ISOYEAR FROM orderdate) order_isoyear
FROM sales
LIMIT 5;

Unnamed: 0,orderkey,order_decade,order_year,order_quarter,order_month,order_isoyear
0,1000,201,2015,1,1,2015
1,1000,201,2015,1,1,2015
2,1001,201,2015,1,1,2015
3,1002,201,2015,1,1,2015
4,1002,201,2015,1,1,2015


In [None]:
%%sql
SELECT EXTRACT(doy from orderdate) order_day,
       SUM(quantity * netprice * exchangerate) net_revenue
FROM sales
WHERE orderdate BETWEEN '2022-01-01' AND '2022-12-31'
GROUP BY EXTRACT(doy from orderdate)
ORDER BY order_day
LIMIT 5;

Unnamed: 0,order_day,net_revenue
0,1,255185.54
1,2,30229.29
2,3,141615.78
3,4,129968.6
4,5,171813.44


In [None]:
%%sql
SELECT EXTRACT(dow from orderdate) order_week,
       COUNT(orderkey) total_orders
FROM sales
WHERE extract(year from orderdate) = extract(year from current_date) -5
GROUP BY order_week
ORDER BY order_week
LIMIT 5;

Unnamed: 0,order_week,total_orders
0,0,162
1,1,1154
2,2,1458
3,3,2080
4,4,2173


In [None]:
%%sql
SELECT EXTRACT(year from orderdate) order_year,
       EXTRACT(month from orderdate) order_month,
       COUNT(orderkey) total_orders,
       COUNT(DISTINCT customerkey) total_customers
FROM sales
WHERE EXTRACT(year from orderdate) >= EXTRACT(year from now()) -6
GROUP BY order_year, order_month
ORDER BY order_year, order_month
LIMIT 5;

Unnamed: 0,order_year,order_month,total_orders,total_customers
0,2019,1,2571,1093
1,2019,2,3283,1341
2,2019,3,1836,732
3,2019,4,992,393
4,2019,5,2563,1032


In [None]:
%%sql
SELECT * FROM store LIMIT 2;

Unnamed: 0,storekey,storecode,geoareakey,countrycode,countryname,state,opendate,closedate,description,squaremeters,status
0,10,1,1,AU,Australia,Australian Capital Territory,2008-01-01,,Contoso Store Australian Capital Territory,595.0,
1,20,2,3,AU,Australia,Northern Territory,2008-01-12,2016-07-07,Contoso Store Northern Territory,665.0,Closed


In [None]:
%%sql
SELECT description,
       EXTRACT(month FROM closedate) - EXTRACT(month FROM opendate) closure_time
FROM store
WHERE closedate IS NOT NULL
LIMIT 5;

Unnamed: 0,description,closure_time
0,Contoso Store Northern Territory,6
1,Contoso Store South Australia,7
2,Contoso Store New Brunswick,-2
3,Contoso Store New Brunswick,1
4,Contoso Store Yukon,-1


In [None]:
%%sql

SELECT
    customerkey,
    EXTRACT(YEAR FROM AGE(CURRENT_DATE, birthday)) * 12 + EXTRACT(MONTH FROM AGE(CURRENT_DATE, birthday)) age_in_months,
    AGE(CURRENT_DATE, birthday) age_in_days,
    CASE
        WHEN birthday >= CURRENT_DATE - INTERVAL '25 years' THEN 'Under 25'
        WHEN birthday >= CURRENT_DATE - INTERVAL '49 years' THEN '25-50'
        ELSE '50+'
    END age_group
FROM customer
LIMIT 5;

Unnamed: 0,customerkey,age_in_months,age_in_days,age_group
0,15,721,21958 days,50+
1,23,420,12786 days,25-50
2,36,730,22205 days,50+
3,120,941,28630 days,50+
4,180,840,25577 days,50+


In [None]:
%%sql
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'public';


Unnamed: 0,table_name
0,currencyexchange
1,customer
2,sales
3,date
4,product
5,store


In [None]:
%%sql
SELECT * FROM store LIMIT 2;

Unnamed: 0,storekey,storecode,geoareakey,countrycode,countryname,state,opendate,closedate,description,squaremeters,status
0,10,1,1,AU,Australia,Australian Capital Territory,2008-01-01,,Contoso Store Australian Capital Territory,595.0,
1,20,2,3,AU,Australia,Northern Territory,2008-01-12,2016-07-07,Contoso Store Northern Territory,665.0,Closed


In [None]:
%%sql
WITH store_durations AS (
    SELECT
        storekey,
        EXTRACT(YEAR FROM AGE(closedate, opendate)) * 12 + EXTRACT(MONTH FROM AGE(closedate, opendate)) closure_time_months
    FROM store
    WHERE closedate IS NOT NULL
)
SELECT
    storekey,
    closure_time_months,
    CASE
        WHEN closure_time_months < 12 THEN 'Less than 1 year'
        WHEN closure_time_months BETWEEN 12 AND 36 THEN '1-3 years'
        WHEN closure_time_months BETWEEN 37 AND 60 THEN '3-5 years'
        ELSE '5+ years'
    END closure_duration
FROM store_durations
ORDER BY storekey
LIMIT 5;

Unnamed: 0,storekey,closure_time_months,closure_duration
0,20,101,5+ years
1,30,43,3-5 years
2,70,82,5+ years
3,72,36,1-3 years
4,110,71,5+ years


In [None]:
%%sql
WITH customer_durations AS (
    SELECT
        customerkey,
        EXTRACT(YEAR FROM AGE(enddt, startdt)) * 12 + EXTRACT(MONTH FROM AGE(enddt, startdt)) months_active
    FROM customer
    WHERE enddt IS NOT NULL AND enddt <= CURRENT_DATE - INTERVAL '7 years'
)
SELECT
    CASE
        WHEN months_active < (3 * 12) THEN '1 - Less than 3 years'
        WHEN months_active <= (5 * 12) THEN '2 - 3-5 years'
        WHEN months_active <= (7 * 12) THEN '3 - 5-7 years'
        ELSE '4 - 7+ years'
    END active_range,
    COUNT(*) AS customer_count,
    ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM customer_durations), 2) percentage_of_customers,
    ROUND(AVG(months_active), 1) AS avg_months_active
FROM customer_durations
GROUP BY active_range
ORDER BY active_range
LIMIT 5;

Unnamed: 0,active_range,customer_count,percentage_of_customers,avg_months_active
0,1 - Less than 3 years,779,5.12,17.3
1,2 - 3-5 years,534,3.51,47.7
2,3 - 5-7 years,568,3.73,72.8
3,4 - 7+ years,13342,87.64,258.7
