## BALANCED TREE CLOTHING CO. SQL CHALLENGE
This is a sql project created by [Danny Ma]("https://www.linkedin.com/in/datawithdanny/") to help establish the foundational knowledge of sql while testing and developing logical problem skills.
For this case study, I chose to create my database with pgAdmin and then remotely connected this to Jupyter Notebooks. 

To get access to this and other of Danny's projects - [8 Week SQL Challenge]("https://8weeksqlchallenge.com/").

Before getting started, I would also recommend installing "ipython-sql". This allows you use the 'jupyter magic' function to interact with your relational database.

#### Importing Libraries

In [1]:
from sqlalchemy import create_engine
import sqlite3 as sql

#### Create a postgresql engine to connect to database

In [2]:
engine = create_engine('postgresql://postgres:pgres95.@localhost:5432/balanced_tree')

#### Load the sql extension

In [3]:
%load_ext sql

#### Set up the connection

In [4]:
%sql engine

#### High Level Sales Analysis

##### 1. What was the total quantity sold for all products?

In [5]:
%%sql
SELECT SUM(qty) AS total_sales 
FROM sales;

total_sales
45216


##### 2. What is the total generated revenue for all products before discounts?

In [6]:
%%sql
SELECT SUM(s.qty*s.price) AS total_revenue_b4_discnt
FROM sales s;

total_revenue_b4_discnt
1289453


##### 3. What was the total discount amount for all products?

In [7]:
%%sql
SELECT SUM((ROUND(s.discount*1.0/100,2)*s.price*s.qty)) AS total_discount 
FROM sales s;

total_discount
156229.14


#### Transaction Analysis

##### 1. How many unique transactions were there?

In [8]:
%%sql
SELECT COUNT(DISTINCT(s.txn_id)) AS unique_trnstn_id 
FROM sales s;

unique_trnstn_id
2500


##### 2. What is the average unique products purchased in each transaction?

In [9]:
%%sql
WITH uniq_prods AS (
SELECT s.txn_id, COUNT(DISTINCT s.prod_id) AS unique_products
FROM sales s
GROUP BY s.txn_id

)
SELECT ROUND(AVG(unique_products),3) AS avg_uniq_prods
FROM uniq_prods;

avg_uniq_prods
6.038


##### 3. What are the 25th, 50th and 75th percentile values for the revenue per transaction?
- This will be split into 2:
    - revenue before discount 
    - revenue after discount

Revenue Before Discount

In [10]:
%%sql
WITH rev_per_transaction AS (
SELECT s.txn_id, ROUND((SUM(s.qty*s.price)/COUNT(DISTINCT(s.txn_id))),2) AS rev_per_trans
FROM sales s
GROUP BY s.txn_id

)

SELECT PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY rpt.rev_per_trans) twenty_fifth_perc
, PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY rpt.rev_per_trans) fiftieth_perc
, PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY rpt.rev_per_trans) seventy_fifth_perc
FROM rev_per_transaction rpt;

twenty_fifth_perc,fiftieth_perc,seventy_fifth_perc
375.75,509.5,647.0


Revenue After Discount

In [11]:
%%sql
WITH rev_per_transaction AS (
SELECT s.txn_id, SUM((ROUND(1 - s.discount*1.0/100,2)*s.price*s.qty)) AS rev_per_trans
FROM sales s
GROUP BY s.txn_id

)

SELECT PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY rpt.rev_per_trans)  twenty_fifth_perc
, PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY rpt.rev_per_trans) fiftieth_perc
, PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY rpt.rev_per_trans) seventy_fifth_perc
FROM rev_per_transaction rpt;

twenty_fifth_perc,fiftieth_perc,seventy_fifth_perc
326.40500000000003,441.225,572.7625


##### 4. What is the average discount value per transaction?

In [12]:
%%sql
SELECT ROUND((SUM((ROUND(s.discount*1.0/100,2)*s.price*s.qty))/COUNT(DISTINCT(s.txn_id))),2) AS avg_discnt_per_trans 
FROM sales s;

avg_discnt_per_trans
62.49


##### 5. What is the percentage split of all transactions for members vs non-members?

In [13]:
%%sql
WITH uniq_transaction AS (
SELECT DISTINCT(s.txn_id), s.member
FROM sales s

)

SELECT ROUND((COUNT(CASE WHEN ut.member ='t' THEN 1 END)*1.0/COUNT(*))*100,2)||'%' member
, ROUND((COUNT(CASE WHEN ut.member ='f' THEN 1 END)*1.0/COUNT(*))*100,2)||'%' non_member
FROM uniq_transaction ut;

member,non_member
60.20%,39.80%


##### 6. What is the average revenue for member transactions and non-member transactions?
- Using Revenue After Discount         

In [14]:
%%sql
WITH rev_after_discount AS (
SELECT s.txn_id, s.member
, SUM((ROUND(1 - s.discount*1.0/100,2)*s.price*s.qty)) filter (WHERE member = 't') member_rev
, SUM((ROUND(1 - s.discount*1.0/100,2)*s.price*s.qty)) filter (WHERE member = 'f') non_member_rev
, SUM(qty) filter (WHERE member = 't') member_qty
, SUM(qty) filter (WHERE member = 'f') non_member_qty
FROM sales s
GROUP BY s.txn_id, s.member

)
SELECT ROUND((SUM(member_rev)/SUM(member_qty)),2) avg_member_rev
, ROUND((SUM(non_member_rev)/SUM(non_member_qty)),2) avg_non_member_rev
FROM rev_after_discount rad;

avg_member_rev,avg_non_member_rev
25.17,24.89


#### Product Analysis

##### 1. What are the top 3 products by total revenue before discount?

In [15]:
%%sql
SELECT pd.product_name, SUM(s.qty*s.price) AS total_revenue_b4_discnt
FROM sales s
LEFT JOIN product_details pd
ON s.prod_id = pd.product_id
GROUP BY pd.product_name
ORDER BY total_revenue_b4_discnt DESC
LIMIT 3;

product_name,total_revenue_b4_discnt
Blue Polo Shirt - Mens,217683
Grey Fashion Jacket - Womens,209304
White Tee Shirt - Mens,152000


##### 2. What is the total quantity, revenue and discount for each segment?

In [16]:
%%sql
SELECT pd.segment_name, SUM(s.qty) Qty, SUM(s.qty*s.price) total_revenue_b4_discnt
, SUM((ROUND(s.discount*1.0/100,2)*s.price*s.qty)) total_discount
FROM sales s
LEFT JOIN product_details pd
ON s.prod_id = pd.product_id
GROUP BY pd.segment_name;

segment_name,qty,total_revenue_b4_discnt,total_discount
Shirt,11265,406143,49594.27
Jeans,11349,208350,25343.97
Jacket,11385,366983,44277.46
Socks,11217,307977,37013.44


##### 3. What is the top selling product for each segment?

In [17]:
%%sql
WITH seg_prod AS (
SELECT pd.segment_name, pd.product_name , SUM(s.qty) Qty
FROM sales s
LEFT JOIN product_details pd
ON s.prod_id = pd.product_id
GROUP BY pd.segment_name, pd.product_name 
ORDER BY pd.segment_name, pd.product_name 

)
, comp AS (
SELECT *
, ROW_NUMBER() OVER (PARTITION BY sp.segment_name ORDER BY sp.Qty DESC ) rank
FROM seg_prod sp

)
SELECT c.segment_name, c.product_name, c.Qty
FROM comp c
WHERE 1=1
AND rank = 1;

segment_name,product_name,qty
Jacket,Grey Fashion Jacket - Womens,3876
Jeans,Navy Oversized Jeans - Womens,3856
Shirt,Blue Polo Shirt - Mens,3819
Socks,Navy Solid Socks - Mens,3792


##### 4. What is the total quantity, revenue and discount for each category?

In [18]:
%%sql
SELECT pd.category_name, SUM(s.qty) Qty, SUM(s.qty*s.price) total_revenue_b4_discnt
, SUM((ROUND(s.discount*1.0/100,2)*s.price*s.qty)) total_discount
FROM sales s
LEFT JOIN product_details pd
ON s.prod_id = pd.product_id
GROUP BY pd.category_name;

category_name,qty,total_revenue_b4_discnt,total_discount
Mens,22482,714120,86607.71
Womens,22734,575333,69621.43


##### 5. What is the top selling product for each category?

In [19]:
%%sql
WITH seg_prod AS (
SELECT pd.category_name, pd.product_name , SUM(s.qty) Qty
FROM sales s
LEFT JOIN product_details pd
ON s.prod_id = pd.product_id
GROUP BY pd.category_name, pd.product_name 
ORDER BY pd.category_name, pd.product_name

)
, comp AS (
SELECT *
, ROW_NUMBER() OVER (PARTITION BY sp.category_name ORDER BY sp.Qty DESC) rank
FROM seg_prod sp

)
SELECT c.category_name, c.product_name, c.Qty
FROM comp c
WHERE 1=1
AND rank = 1
ORDER BY c.qty DESC;

category_name,product_name,qty
Womens,Grey Fashion Jacket - Womens,3876
Mens,Blue Polo Shirt - Mens,3819


##### 6. What is the percentage split of revenue by product for each segment?
- Using Revenue After Discount

In [20]:
%%sql
WITH seg_prod AS (
SELECT pd.segment_name, pd.product_name 
, SUM((ROUND(1 - s.discount*1.0/100,2)*s.price*s.qty)) revenue
FROM sales s
LEFT JOIN product_details pd
ON s.prod_id = pd.product_id
GROUP BY pd.segment_name, pd.product_name 
ORDER BY pd.segment_name, pd.product_name, revenue DESC

)
, comp AS (
SELECT *
, SUM(sp.revenue) OVER (PARTITION BY sp.segment_name) tot_rev
FROM seg_prod sp

)
SELECT c.segment_name, c.product_name
, ROUND((c.revenue/c.tot_rev)*100,2)||'%' rev_perc
FROM comp c;

segment_name,product_name,rev_perc
Jacket,Grey Fashion Jacket - Womens,56.99%
Jacket,Indigo Rain Jacket - Womens,19.44%
Jacket,Khaki Suit Jacket - Womens,23.57%
Jeans,Black Straight Jeans - Womens,58.14%
Jeans,Cream Relaxed Jeans - Womens,17.82%
Jeans,Navy Oversized Jeans - Womens,24.04%
Shirt,Blue Polo Shirt - Mens,53.53%
Shirt,Teal Button Up Shirt - Mens,8.99%
Shirt,White Tee Shirt - Mens,37.48%
Socks,Navy Solid Socks - Mens,44.24%


##### 7. What is the percentage split of revenue by segment for each category?
- Using Revenue After Discount

In [21]:
%%sql
WITH seg_prod AS (
SELECT pd.category_name, pd.segment_name 
, SUM((ROUND(1 - s.discount*1.0/100,2)*s.price*s.qty)) revenue
FROM sales s
LEFT JOIN product_details pd
ON s.prod_id = pd.product_id
GROUP BY pd.category_name, pd.segment_name 
ORDER BY pd.category_name, pd.segment_name, revenue DESC

)
, comp AS (
SELECT *
, SUM(sp.revenue) OVER (PARTITION BY sp.category_name) tot_rev
FROM seg_prod sp

)
SELECT c.category_name, c.segment_name
, ROUND((c.revenue/c.tot_rev)*100,2)||'%' rev_perc
FROM comp c;

category_name,segment_name,rev_perc
Mens,Shirt,56.82%
Mens,Socks,43.18%
Womens,Jacket,63.81%
Womens,Jeans,36.19%


##### 8. What is the percentage split of total revenue by category?
- Using Revenue After Discount

In [22]:
%%sql
WITH seg_prod AS (
SELECT pd.category_name
, SUM((ROUND(1 - s.discount*1.0/100,2)*s.price*s.qty)) revenue
FROM sales s
LEFT JOIN product_details pd
ON s.prod_id = pd.product_id
GROUP BY pd.category_name
ORDER BY pd.category_name, revenue DESC

)
, comp AS (
SELECT *
, SUM(sp.revenue) OVER () tot_rev
FROM seg_prod sp

)
SELECT c.category_name
, ROUND((c.revenue/c.tot_rev)*100,2)||'%' rev_perc
FROM comp c;

category_name,rev_perc
Mens,55.37%
Womens,44.63%


##### 9. What is the total transaction “penetration” for each product? 
- (hint: penetration = number of transactions where at least 1 quantity of a product was purchased divided by total number of transactions)

In [23]:
%%sql
WITH pen AS (
SELECT pd.product_name, s.txn_id
, SUM((ROUND(1 - s.discount*1.0/100,2)*s.price*s.qty)) revenue
FROM sales s
LEFT JOIN product_details pd
ON s.prod_id = pd.product_id
GROUP BY pd.product_name, s.txn_id
ORDER BY pd.product_name, revenue DESC

)
, comp AS (
SELECT p.product_name
, (SELECT COUNT(*) FROM sales) trans_count
, COUNT(p.product_name) OVER (PARTITION BY p.product_name) prod_pen
FROM pen p
ORDER BY p.product_name
)
SELECT c.product_name, c.prod_pen
, ROUND((c.prod_pen*1.0/c.trans_count)*100,2)||'%' trans_penetration
FROM comp c
GROUP BY C.product_name, c.prod_pen, ROUND((c.prod_pen*1.0/c.trans_count)*100,2)
ORDER BY trans_penetration DESC;

product_name,prod_pen,trans_penetration
Navy Solid Socks - Mens,1281,8.49%
Grey Fashion Jacket - Womens,1275,8.45%
Navy Oversized Jeans - Womens,1274,8.44%
White Tee Shirt - Mens,1268,8.40%
Blue Polo Shirt - Mens,1268,8.40%
Pink Fluro Polkadot Socks - Mens,1258,8.33%
Indigo Rain Jacket - Womens,1250,8.28%
Khaki Suit Jacket - Womens,1247,8.26%
Black Straight Jeans - Womens,1246,8.25%
Cream Relaxed Jeans - Womens,1243,8.23%


##### 10. What is the most common combination of at least 1 quantity of any 3 products in a 1 single transaction?

In [24]:
%%sql
WITH bas_dt AS (
SELECT  pd.product_id, pd.product_name, s.txn_id
FROM sales s
LEFT JOIN product_details pd
ON s.prod_id = pd.product_id

)
, conc_dt AS (
SELECT bd.product_name, bd.txn_id, bd.product_id, bd1.product_id, bd2.product_id
,CONCAT(bd.product_id,'_',bd1.product_id,'_',bd2.product_id) conc_prod_id
, CONCAT(bd.product_name,'_',bd1.product_name,'_',bd2.product_name) conc_name
FROM bas_dt bd
INNER JOIN bas_dt bd1
ON bd.txn_id = bd1.txn_id AND bd.product_id < bd1.product_id
INNER JOIN bas_dt bd2
ON bd.txn_id = bd2.txn_id AND bd1.product_id < bd2.product_id 

)
, conc_freq AS (
SELECT *
, COUNT(cd.conc_prod_id) OVER (PARTITION BY cd.conc_prod_id) freq
FROM conc_dt cd
ORDER BY freq DESC

)
SELECT cf.conc_name, cf.conc_prod_id, cf.freq
FROM conc_freq cf
GROUP BY cf.conc_name,cf.conc_prod_id, cf.freq
ORDER BY cf.freq DESC
LIMIT 1;

conc_name,conc_prod_id,freq
White Tee Shirt - Mens_Grey Fashion Jacket - Womens_Teal Button Up Shirt - Mens,5d267b_9ec847_c8d436,352
