<a href="https://colab.research.google.com/github/JonasWetzel94/google_collab_sql/blob/main/sql_case_statements.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<a target="_blank" href="https://colab.research.google.com/github/lukebarousse/Int_SQL_Data_Analytics_Course/blob/main/Resources/Blank_SQL_Notebook.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# Blank SQL Notebook

#### Import Libraries & Database

In [None]:
import sys
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# If running in Google Colab, install PostgreSQL and restore the database
if 'google.colab' in sys.modules:
    # Install PostgreSQL
    !sudo apt-get install postgresql -qq > /dev/null 2>&1

    # Start PostgreSQL service (suppress output)
    !sudo service postgresql start > /dev/null 2>&1

    # Set password for the 'postgres' user to avoid authentication errors (suppress output)
    !sudo -u postgres psql -c "ALTER USER postgres WITH PASSWORD 'password';" > /dev/null 2>&1

    # Create the 'colab_db' database (suppress output)
    !sudo -u postgres psql -c "CREATE DATABASE contoso_100k;" > /dev/null 2>&1

    # Download the PostgreSQL .sql dump
    !wget -q -O contoso_100k.sql https://github.com/lukebarousse/Int_SQL_Data_Analytics_Course/releases/download/v.0.0.0/contoso_100k.sql

    # Restore the dump file into the PostgreSQL database (suppress output)
    !sudo -u postgres psql contoso_100k < contoso_100k.sql > /dev/null 2>&1

    # Shift libraries from ipython-sql to jupysql
    !pip uninstall -y ipython-sql > /dev/null 2>&1
    !pip install jupysql > /dev/null 2>&1

# Load the sql extension for SQL magic
%load_ext sql

# Connect to the PostgreSQL database
%sql postgresql://postgres:password@localhost:5432/contoso_100k

# Enable automatic conversion of SQL results to pandas DataFrames
%config SqlMagic.autopandas = True

# Disable named parameters for SQL magic
%config SqlMagic.named_parameters = "disabled"

# Display pandas number to two decimal places
pd.options.display.float_format = '{:.2f}'.format

In [None]:
%%sql

SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'public';


Unnamed: 0,table_name
0,currencyexchange
1,customer
2,sales
3,date
4,product
5,store


In [None]:
%%sql
SELECT * FROM product LIMIT 2;

Unnamed: 0,productkey,productcode,productname,manufacturer,brand,color,weightunit,weight,cost,price,categorykey,categoryname,subcategorykey,subcategoryname
0,1,101001,Contoso 512MB MP3 Player E51 Silver,"Contoso, Ltd",Contoso,Silver,ounces,4.8,6.62,12.99,1,Audio,101,MP4&MP3
1,2,101002,Contoso 512MB MP3 Player E51 Blue,"Contoso, Ltd",Contoso,Blue,ounces,4.1,6.62,12.99,1,Audio,101,MP4&MP3


In [None]:
%%sql
SELECT COUNT(DISTINCT CASE WHEN c.age < 25 THEN s.orderkey ELSE NULL END) total_purchase_youngins,
       COUNT(DISTINCT CASE WHEN c.age BETWEEN 25 AND 44 THEN s.orderkey ELSE NULL END) total_purchase_mid_age,
       COUNT(DISTINCT CASE WHEN c.age > 44 THEN s.orderkey ELSE NULL END) total_purchase_seniors
FROM sales s
LEFT JOIN customer c ON s.customerkey = c.customerkey


Unnamed: 0,total_purchase_youngins,total_purchase_mid_age,total_purchase_seniors
0,7528,25040,50562


In [None]:
%config SqlMagic.named_parameters="enabled"

In [None]:
%%sql
SELECT c.customerkey,
       SUM(s.quantity * s.netprice * s.exchangerate) net_revenue,
       CASE WHEN SUM(s.quantity * s.netprice * s.exchangerate) < 500 THEN 'Low_spender'
            WHEN SUM(s.quantity * s.netprice * s.exchangerate) BETWEEN 500 AND 2000 THEN 'Medium_spender'
            ELSE 'High_spender'
       END AS customer_type
FROM sales s
LEFT JOIN customer c ON s.customerkey = c.customerkey
WHERE s.orderdate BETWEEN '2023-01-01' AND '2023-12-31'
GROUP BY c.customerkey
ORDER BY net_revenue DESC;

Unnamed: 0,customerkey,net_revenue,customer_type
0,228672,42485.69,High_spender
1,342151,33510.80,High_spender
2,269309,32367.99,High_spender
3,242286,30296.71,High_spender
4,308957,30108.03,High_spender
...,...,...,...
13741,2094077,3.35,Low_spender
13742,1433067,3.32,Low_spender
13743,440697,3.28,Low_spender
13744,1064373,3.27,Low_spender


In [None]:
%%sql

SELECT CASE WHEN p.weightunit IS NULL THEN '1 - No Weight_specified'
            WHEN p.weight < 5 AND p.weightunit = 'pounds' THEN '2 - very_light'
            WHEN p.weight BETWEEN 5 AND 25 AND p.weightunit = 'pounds' THEN '3 - light'
            WHEN p.weight BETWEEN 26 AND 100 AND p.weightunit = 'pounds' THEN '4 - medium'
            WHEN p.weight > 100 AND p.weightunit = 'pounds' THEN '5 - heavy'
            WHEN p.weight < 5 AND p.weightunit = 'ounces' THEN '6 - light_ounces'
            WHEN p.weight >= 5 AND p.weightunit = 'ounces' THEN '7 - heavy_ounces'
            WHEN p.weightunit = 'grams' THEN '8 - metric_weights'
            ELSE '9 - other_weight_categories' END weight_categories,
            COUNT(p.productkey) categorie_count
FROM product p
GROUP BY weight_categories
ORDER BY weight_categories;


Unnamed: 0,weight_categories,categorie_count
0,1 - No Weight_specified,222
1,2 - very_light,568
2,3 - light,751
3,4 - medium,385
4,5 - heavy,112
5,6 - light_ounces,225
6,7 - heavy_ounces,176
7,8 - metric_weights,10
8,9 - other_weight_categories,68


In [None]:
%%sql
SELECT * FROM store LIMIT 2;

Unnamed: 0,storekey,storecode,geoareakey,countrycode,countryname,state,opendate,closedate,description,squaremeters,status
0,10,1,1,AU,Australia,Australian Capital Territory,2008-01-01,,Contoso Store Australian Capital Territory,595.0,
1,20,2,3,AU,Australia,Northern Territory,2008-01-12,2016-07-07,Contoso Store Northern Territory,665.0,Closed


In [None]:
%%sql
WITH store_revenue AS (
    SELECT
        st.storekey,
        MAX(st.squaremeters) squaremeters,
        SUM(s.quantity * s.netprice * s.exchangerate) revenue
    FROM sales s
    LEFT JOIN store st ON s.storekey = st.storekey
    WHERE s.orderdate BETWEEN '2023-01-01' AND '2023-12-31'
    GROUP BY st.storekey
)
SELECT
    CASE
        WHEN squaremeters < 1000 AND revenue < 100000 THEN '1 - Small Store - Low Revenue'
        WHEN squaremeters < 1000 AND revenue >= 100000 THEN '2 - Small Store - High Revenue'
        WHEN squaremeters BETWEEN 1000 AND 2000 AND revenue < 300000 THEN '3 - Medium Store - Low Revenue'
        WHEN squaremeters BETWEEN 1000 AND 2000 AND revenue >= 300000 THEN '4 - Medium Store - High Revenue'
        WHEN squaremeters > 2000 AND revenue < 500000 THEN '5 - Large Store - Low Revenue'
        WHEN squaremeters > 2000 AND revenue >= 500000 THEN '6 - Large Store - High Revenue'
        WHEN squaremeters IS NULL THEN '7 - Online Store'
    END store_category,
    SUM(revenue) total_net_revenue,
    (SUM(revenue) / (SELECT SUM(revenue) FROM store_revenue) * 100.0) percentage_contribution
FROM store_revenue
GROUP BY store_category
ORDER BY store_category;

Unnamed: 0,store_category,total_net_revenue,percentage_contribution
0,1 - Small Store - Low Revenue,387769.84,1.17
1,2 - Small Store - High Revenue,922092.43,2.79
2,3 - Medium Store - Low Revenue,5148573.92,15.55
3,4 - Medium Store - High Revenue,4318808.5,13.04
4,5 - Large Store - Low Revenue,1638484.83,4.95
5,6 - Large Store - High Revenue,603045.14,1.82
6,7 - Online Store,20089790.85,60.68
