<a target="_blank" href="https://colab.research.google.com/github/lukebarousse/Int_SQL_Data_Analytics_Course/blob/main/Resources/Blank_SQL_Notebook.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# Blank SQL Notebook

#### Import Libraries & Database

In [1]:
import sys
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# If running in Google Colab, install PostgreSQL and restore the database
if 'google.colab' in sys.modules:
    # Update package installer
    !sudo apt-get update -qq > /dev/null 2>&1

    # Install PostgreSQL
    !sudo apt-get install postgresql -qq > /dev/null 2>&1

    # Start PostgreSQL service (suppress output)
    !sudo service postgresql start > /dev/null 2>&1

    # Set password for the 'postgres' user to avoid authentication errors (suppress output)
    !sudo -u postgres psql -c "ALTER USER postgres WITH PASSWORD 'password';" > /dev/null 2>&1

    # Create the 'colab_db' database (suppress output)
    !sudo -u postgres psql -c "CREATE DATABASE contoso_100k;" > /dev/null 2>&1

    # Download the PostgreSQL .sql dump
    !wget -q -O contoso_100k.sql https://github.com/lukebarousse/Int_SQL_Data_Analytics_Course/releases/download/v.0.0.0/contoso_100k.sql

    # Restore the dump file into the PostgreSQL database (suppress output)
    !sudo -u postgres psql contoso_100k < contoso_100k.sql > /dev/null 2>&1

    # Shift libraries from ipython-sql to jupysql
    !pip uninstall -y ipython-sql > /dev/null 2>&1
    !pip install jupysql > /dev/null 2>&1

# Load the sql extension for SQL magic
%load_ext sql

# Connect to the PostgreSQL database
%sql postgresql://postgres:password@localhost:5432/contoso_100k

# Enable automatic conversion of SQL results to pandas DataFrames
%config SqlMagic.autopandas = True

# Disable named parameters for SQL magic
%config SqlMagic.named_parameters = "disabled"

# Display pandas number to two decimal places
pd.options.display.float_format = '{:.2f}'.format

## ***PIVOTING WITH CASE BASIC AGGREGATION***
      Using case statement and aggregation

      We will use count to analyze the number of customer per region.
      We will use sum to calculate net revenue based on different categories in different year.




**TOTAL CUSTOMER PER DAY IN 2023**
---#### (Count Distinct) Review.

In [5]:
%%sql

SELECT
  orderdate,
  COUNT(DISTINCT customerkey) AS total_customers
FROM
  sales
WHERE
  orderdate BETWEEN '2023-01-01' AND '2023-12-31'
 GROUP BY
  orderdate
 ORDER BY
  orderdate

Unnamed: 0,orderdate,total_customers
0,2023-01-01,12
1,2023-01-02,49
2,2023-01-03,64
3,2023-01-04,78
4,2023-01-05,87
...,...,...
359,2023-12-27,73
360,2023-12-28,75
361,2023-12-29,55
362,2023-12-30,91


**DAILY CUSTOMERS BY REGION**
----#### Pivot with Count()

In [11]:
%%sql

SELECT
  s.orderdate,
  COUNT(DISTINCT s.customerkey) AS total_customers,
  COUNT(DISTINCT CASE WHEN c.continent = 'Europe' THEN s.customerkey END) AS eu_customers,
  COUNT(DISTINCT CASE WHEN c.continent = 'North America' THEN s.customerkey END) AS ns_customers,
  COUNT(DISTINCT CASE WHEN c.continent = 'Australia' THEN s.customerkey END) AS au_customers
FROM
  sales s
LEFT JOIN customer c ON s.customerkey = c.customerkey
WHERE
  s.orderdate BETWEEN '2023-01-01' AND '2023-12-31'
 GROUP BY
  s.orderdate
 ORDER BY
  s.orderdate

Unnamed: 0,orderdate,total_customers,eu_customers,ns_customers,au_customers
0,2023-01-01,12,6,5,1
1,2023-01-02,49,15,31,3
2,2023-01-03,64,17,44,3
3,2023-01-04,78,28,46,4
4,2023-01-05,87,22,57,8
...,...,...,...,...,...
359,2023-12-27,73,26,41,6
360,2023-12-28,75,24,44,7
361,2023-12-29,55,19,32,4
362,2023-12-30,91,25,50,16


**TOTAL REVENUE BY CATEGORY IN 2022 AND 2023**
-----#### Pivot with Sum

In [21]:
%%sql

SELECT
  p.categoryname,
  SUM(CASE WHEN s.orderdate BETWEEN '2022-01-01' AND '2022-12-31' THEN (s.quantity * s.netprice * s.exchangerate) ELSE 0 END) AS total_net_revenue_2022,
  SUM(CASE WHEN s.orderdate BETWEEN '2023-01-01' AND '2023-12-31' THEN (s.quantity * s.netprice * s.exchangerate) ELSE 0 END) AS total_net_revenue_2023
FROM
  sales s
LEFT JOIN product p ON s.productkey = p.productkey
 GROUP BY
  p.categoryname
 ORDER BY
  p.categoryname

Unnamed: 0,categoryname,total_net_revenue_2022,total_net_revenue_2023
0,Audio,766938.21,688690.18
1,Cameras and camcorders,2382532.56,1983546.29
2,Cell phones,8119665.07,6002147.63
3,Computers,17862213.49,11650867.21
4,Games and Toys,316127.3,270374.96
5,Home Appliances,6612446.68,5919992.87
6,"Music, Movies and Audio Books",2989297.28,2180768.13
7,TV and Video,5815336.61,4412178.23


**AVG,MIX,MIN REVENUE BY CATEGORY IN 2022 AND 2023**
-----#### Pivot with Avg,Max,Min

In [23]:
%%sql

SELECT
  p.categoryname,
  AVG(CASE WHEN s.orderdate BETWEEN '2022-01-01' AND '2022-12-31' THEN (s.quantity * s.netprice * s.exchangerate) END) AS avg_net_revenue_2022,
  AVG(CASE WHEN s.orderdate BETWEEN '2023-01-01' AND '2023-12-31' THEN (s.quantity * s.netprice * s.exchangerate) END) AS avg_net_revenue_2023,
  MAX(CASE WHEN s.orderdate BETWEEN '2022-01-01' AND '2022-12-31' THEN (s.quantity * s.netprice * s.exchangerate) END) AS max_net_revenue_2022,
  MAX(CASE WHEN s.orderdate BETWEEN '2023-01-01' AND '2023-12-31' THEN (s.quantity * s.netprice * s.exchangerate) END) AS max_net_revenue_2023,
  MIN(CASE WHEN s.orderdate BETWEEN '2022-01-01' AND '2022-12-31' THEN (s.quantity * s.netprice * s.exchangerate) END) AS min_net_revenue_2022,
  MIN(CASE WHEN s.orderdate BETWEEN '2023-01-01' AND '2023-12-31' THEN (s.quantity * s.netprice * s.exchangerate) END) AS min_net_revenue_2023
FROM
  sales s
LEFT JOIN product p ON s.productkey = p.productkey
 GROUP BY
  p.categoryname
 ORDER BY
  p.categoryname



Unnamed: 0,categoryname,avg_net_revenue_2022,avg_net_revenue_2023,max_net_revenue_2022,max_net_revenue_2023,min_net_revenue_2022,min_net_revenue_2023
0,Audio,392.3,425.38,3473.36,2730.87,9.31,10.85
1,Cameras and camcorders,1210.02,1210.96,15008.39,13572.0,6.74,5.98
2,Cell phones,722.2,623.28,7692.37,8912.22,2.53,2.28
3,Computers,1565.62,1292.39,38082.66,27611.6,0.83,0.75
4,Games and Toys,81.29,80.83,5202.01,3357.3,2.83,3.49
5,Home Appliances,1755.36,1886.55,31654.55,32915.59,4.04,4.54
6,"Music, Movies and Audio Books",386.61,334.58,5415.19,3804.91,7.29,6.91
7,TV and Video,1535.61,1687.9,30259.41,27503.12,41.3,42.3


**MEDIAN REVENUE BY CATEGORY IN 2022 AND 2023**
-----#### Pivot with Perccentile_Cont(Median)

In [24]:
%%sql

SELECT
  p.categoryname,
  PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY (CASE WHEN s.orderdate BETWEEN '2022-01-01' AND '2022-12-31' THEN (s.quantity * s.netprice * s.exchangerate) END)) AS median_net_revenue_2022,
  PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY (CASE WHEN s.orderdate BETWEEN '2023-01-01' AND '2023-12-31' THEN (s.quantity * s.netprice * s.exchangerate) END)) AS median_net_revenue_2023
FROM
  sales s
LEFT JOIN product p ON s.productkey = p.productkey
 GROUP BY
  p.categoryname
 ORDER BY
  p.categoryname

Unnamed: 0,categoryname,median_net_revenue_2022,median_net_revenue_2023
0,Audio,257.21,266.59
1,Cameras and camcorders,651.46,672.6
2,Cell phones,418.6,375.88
3,Computers,809.7,657.18
4,Games and Toys,33.78,32.62
5,Home Appliances,791.0,825.25
6,"Music, Movies and Audio Books",186.58,159.63
7,TV and Video,730.46,790.79


**PIVOTING WITH CASE** ------
*ADVANCE SEGEMENT*

**USING AND & MULTIPLE WHEN CLAUSES**

  Segement Orders




In [33]:
%%sql

SELECT
  orderdate,
  quantity,
  netprice,
  CASE
   WHEN quantity >= 2 AND netprice >= 100 THEN 'Multiple High Value Items'
   WHEN netprice >= 100 THEN 'Single High Value Item'
   WHEN quantity >= 2 THEN 'Multiple Standard Items' ELSE 'Single Standard Items' END AS order_type
FROM
 sales
LIMIT 10

Unnamed: 0,orderdate,quantity,netprice,order_type
0,2015-01-01,1,98.97,Single Standard Items
1,2015-01-01,1,659.78,Single High Value Item
2,2015-01-01,2,54.38,Multiple Standard Items
3,2015-01-01,4,286.69,Multiple High Value Items
4,2015-01-01,7,135.75,Multiple High Value Items
5,2015-01-01,3,434.3,Multiple High Value Items
6,2015-01-01,1,58.73,Single Standard Items
7,2015-01-01,3,74.99,Multiple Standard Items
8,2015-01-01,2,113.57,Multiple High Value Items
9,2015-01-01,1,499.45,Single High Value Item


**USING AND FOR MULTIPLE WHEN CONDITIONS**

  Segement Orders By Median

In [45]:
%%sql

WITH median_value AS (
   SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY (s.quantity * s.netprice * s.exchangerate)) AS median
   FROM
 sales s
   WHERE
 s.orderdate BETWEEN '2022-01-01' AND '2023-12-31')



SELECT
  p.categoryname,
  SUM(CASE
   WHEN (s.quantity * s.netprice * s.exchangerate) < mv.median
   AND s.orderdate BETWEEN '2022-01-01' AND '2022-12-31'
   THEN (s.quantity * s.netprice * s.exchangerate) END) AS low_net_revenue_2022,
  SUM(CASE
   WHEN (s.quantity * s.netprice * s.exchangerate) >= mv.median
   AND s.orderdate BETWEEN '2022-01-01' AND '2022-12-31'
   THEN (s.quantity * s.netprice * s.exchangerate) END) AS high_net_revenue_2022,
  SUM(CASE
   WHEN (s.quantity * s.netprice * s.exchangerate) < mv.median
   AND s.orderdate BETWEEN '2023-01-01' AND '2023-12-31'
   THEN (s.quantity * s.netprice * s.exchangerate) END) AS low_net_revenue_2023,
  SUM(CASE
   WHEN (s.quantity * s.netprice * s.exchangerate) >= mv.median
   AND s.orderdate BETWEEN '2023-01-01' AND '2023-12-31'
   THEN (s.quantity * s.netprice * s.exchangerate) END) AS high_net_revenue_2023
FROM
  sales s
LEFT JOIN product p ON s.productkey = p.productkey,
  median_value mv
 GROUP BY
  p.categoryname
 ORDER BY
  p.categoryname

Unnamed: 0,categoryname,low_net_revenue_2022,high_net_revenue_2022,low_net_revenue_2023,high_net_revenue_2023
0,Audio,222337.83,544600.39,180251.13,508439.06
1,Cameras and camcorders,133004.54,2249528.02,104869.46,1878676.83
2,Cell phones,814449.53,7305215.55,729699.39,5272448.24
3,Computers,624340.42,17237873.07,590790.31,11060076.9
4,Games and Toys,231979.63,84147.67,206103.36,64271.6
5,Home Appliances,219797.07,6392649.61,176261.35,5743731.52
6,"Music, Movies and Audio Books",685808.49,2303488.8,574958.76,1605809.37
7,TV and Video,272338.29,5542998.32,164275.35,4247902.87


MULTIPLE WHEN CLAUSES IN CASE

   Segement Order By Percentile



In [57]:
%%sql

WITH percentile AS (
   SELECT
     PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY (s.quantity * s.netprice * s.exchangerate)) AS revenue_25_perentile,
     PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY (s.quantity * s.netprice * s.exchangerate)) AS revenue_75_perentile
   FROM
 sales s
   WHERE
 s.orderdate BETWEEN '2022-01-01' AND '2023-12-31')



SELECT
  p.categoryname,
  CASE
    WHEN (s.quantity * s.netprice * s.exchangerate) <= pctl.revenue_25_perentile THEN '3-LOW'
    WHEN (s.quantity * s.netprice * s.exchangerate) >= pctl.revenue_25_perentile THEN '1-HIGH'
    ELSE '2-MEDIAN'
  END AS revenue_tier,
  SUM(s.quantity * s.netprice * s.exchangerate) AS total_revenue
FROM
  sales s
  LEFT JOIN
     product p ON s.productkey = p.productkey,
     percentile pctl
GROUP BY
  p.categoryname,
  revenue_tier
ORDER BY
  p.categoryname

Unnamed: 0,categoryname,revenue_tier,total_revenue
0,Audio,1-HIGH,5045681.1
1,Audio,3-LOW,267217.01
2,Cameras and camcorders,1-HIGH,18439327.73
3,Cameras and camcorders,3-LOW,81032.92
4,Cell phones,1-HIGH,32213956.36
5,Cell phones,3-LOW,410309.35
6,Computers,1-HIGH,90415815.0
7,Computers,3-LOW,203207.06
8,Games and Toys,1-HIGH,1135131.0
9,Games and Toys,3-LOW,533443.13


# **DATE FORMAT**

*   DATE_TRUNC()
*   TO_CHAR()



   Revenue and Customers By Month

In [63]:
%%sql

SELECT
 DATE_TRUNC('month', orderdate)::DATE AS order_month,
 SUM(quantity * netprice * exchangerate) AS net_revenue,
 COUNT(DISTINCT customerkey) AS total_unique_customers
FROM
 sales
GROUP BY
 order_month

Unnamed: 0,order_month,net_revenue,total_unique_customers
0,2015-01-01,384092.66,200
1,2015-02-01,706374.12,291
2,2015-03-01,332961.59,139
3,2015-04-01,160767.00,78
4,2015-05-01,548632.63,236
...,...,...,...
107,2023-12-01,2928550.93,1484
108,2024-01-01,2677498.55,1340
109,2024-02-01,3542322.55,1718
110,2024-03-01,1692854.89,877


PREVIOUS ONE USING TO_CHAR

In [64]:
%%sql

SELECT
 TO_CHAR(orderdate,'YYYY-MM') AS order_month,
 SUM(quantity * netprice * exchangerate) AS net_revenue,
 COUNT(DISTINCT customerkey) AS total_unique_customers
FROM
 sales
GROUP BY
 order_month

Unnamed: 0,order_month,net_revenue,total_unique_customers
0,2015-01,384092.66,200
1,2015-02,706374.12,291
2,2015-03,332961.59,139
3,2015-04,160767.00,78
4,2015-05,548632.63,236
...,...,...,...
107,2023-12,2928550.93,1484
108,2024-01,2677498.55,1340
109,2024-02,3542322.55,1718
110,2024-03,1692854.89,877


# **DATE & TIME** ------ *DATE AND TIME FILTERING*

* CATEGORY NET REVENUE PER YEAR USING EXTRACT()



In [70]:
%%sql

SELECT
 EXTRACT(YEAR FROM orderdate) AS order_year,
 EXTRACT(MONTH FROM orderdate) AS order_month,
 SUM(quantity * netprice * exchangerate) AS net_revenue,
 COUNT(DISTINCT customerkey) AS total_unique_customers
FROM
 sales
GROUP BY
 order_year,order_month
ORDER BY
 order_year,order_month

Unnamed: 0,order_year,order_month,net_revenue,total_unique_customers
0,2015,1,384092.66,200
1,2015,2,706374.12,291
2,2015,3,332961.59,139
3,2015,4,160767.00,78
4,2015,5,548632.63,236
...,...,...,...,...
107,2023,12,2928550.93,1484
108,2024,1,2677498.55,1340
109,2024,2,3542322.55,1718
110,2024,3,1692854.89,877


# **CURRENT_DATE & NOW()**

*   Net Revenue Last 5 Years

In [None]:
%%sql

SELECT
 s.orderdate,
 p.categoryname,
 SUM(s.quantity * s.netprice * s.exchangerate) AS net_revenue
FROM
 sales s
LEFT JOIN product p ON p.productkey = p.productkey
GROUP BY
 s.orderdate,p.categoryname
ORDER BY
 s.orderdate
LIMIT 10