### 1. Distinct 2. Order By (default ASC)

In [None]:
select distinct author_id as id
from view
where author_id = view_id
order by author_id

### 1. Common Table Expression (CTE) 2. Case When 3. Coalesce 4. nullif

In [None]:
with t1 as(
select ad_id, sum(case when action in ('Clicked') then 1 else 0 end) as clicked
from ads
group by ad_id
)

, t2 as
(
Select ad_id as ad, sum(case when action in ('Clicked','Viewed') then 1 else 0 end) as total
from ads
group by ad_id
)

Select a.ad_id, coalesce(round((clicked+0.0)/nullif((total+0.0), 0)*100, 2),0) as ctr
from
(
select *
from t1 join t2
on t1.ad_id = t2.ad) a
order by ctr desc, ad_id

### Having (only used with Group By and need aggregation function)

In [None]:
Select actor_id, director_id
from actordirector
group by actor_id, director_id
having count(*)>=3

### 1. Subquery 2. Between (with date)

In [None]:
Select d.product_id, round((sum(price*units)+0.00)/(sum(units)+0.00),2) as average_price
from(
Select *
from prices p inner join unitssold u
where u.purchase_date between p.start_date and p.end_date) d
group by d.product_id 

### Pivot table

+----+------+-------+
| ID | Type | Value |
+----+------+-------+
| 1  | A    | 10    |
| 1  | B    | 20    |
| 2  | A    | 30    |
| 2  | B    | 40    |
+----+------+-------+
to 
+----+---------+---------+
| ID | A_Value | B_Value |
+----+---------+---------+
| 1  | 10      | 20      |
| 2  | 30      | 40      |
+----+---------+---------+



In [None]:
-- need Max() and group by here since there is null value 
SELECT
  ID,
  MAX(CASE WHEN Type = 'A' THEN Value END) AS A_Value,
  MAX(CASE WHEN Type = 'B' THEN Value END) AS B_Value
FROM
  your_table_name
GROUP BY
  ID;


In [None]:
WITH t1 as (
SELECT candidate_id,
      MAX(CASE WHEN skill = 'Python' THEN 'yes' END) as python,
      MAX(CASE WHEN skill = 'Tableau' THEN 'yes' END) as tableau,
      MAX(CASE WHEN skill = 'PostgreSQL' THEN 'yes' END) as postgresql
FROM candidates
GROUP BY candidate_id)

SELECT candidate_id
FROM t1
WHERE python is not NULL
AND tableau is not NULL
AND postgresql is not NULL
ORDER BY candidate_id ASC;

### Date Calculation in PostgreSql

In [None]:
SELECT 
	user_id, 
    MAX(post_date::DATE) - MIN(post_date::DATE) AS days_between
FROM posts
WHERE DATE_PART('year', post_date::DATE) = 2021 
GROUP BY user_id
HAVING COUNT(post_id)>1;

### Date Calculation with INTERVAL

In [None]:
with t1 as 
(SELECT 
  policy_holder_id,
  call_received AS current_call,
  lag(call_received) OVER(PARTITION BY policy_holder_id ORDER BY call_received) as pre_call
FROM callers)

SELECT count(DISTINCT policy_holder_id) AS patient_count
FROM t1
WHERE current_call - pre_call < INTERVAL '7 days'
;

### Find Duplicates with Group By

In [None]:
WITH job_count_cte AS (
  SELECT 
    company_id, 
    title, 
    description, 
    COUNT(job_id) AS job_count
  FROM job_listings
  GROUP BY company_id, title, description
)

SELECT COUNT(DISTINCT company_id) AS duplicate_companies
FROM job_count_cte
WHERE job_count > 1;

### Concat Number and symbol or character

In [None]:
SELECT 
  manufacturer, 
  CONCAT('$', ROUND(SUM(total_sales) / 1000000), ' million') AS sales_mil
FROM pharmacy_sales 
GROUP BY manufacturer;

### Row_number() over(Partition_by ... Order_by ...)

In [None]:
with t1 as (
SELECT *, 
  row_number() OVER(
  PARTITION BY user_id ORDER BY transaction_date) as r
FROM transactions)

SELECT user_id, spend, transaction_date
FROM t1
WHERE r = 3;

### Using average with "partition by" and "rows between" to get rolling average

In [None]:
SELECT    
  user_id,    
  tweet_date,   
  ROUND(AVG(tweet_count) OVER (
    PARTITION BY user_id     
    ORDER BY tweet_date     
    ROWS BETWEEN 2 PRECEDING AND CURRENT ROW)
  ,2) AS rolling_avg_3d
FROM tweets;

### Getting the top two value

In [None]:
with t as (
SELECT category, product, SUM(spend) as total_spend,
      RANK() OVER(PARTITION BY category ORDER BY SUM(spend) DESC) as ranking
FROM product_spend
WHERE date_part('year', transaction_date) = 2022
GROUP BY category, product)

SELECT category, product, total_spend
FROM t
WHERE ranking <= 2
ORDER BY category, ranking;

### Dense_rank() and join lots of tables

In [None]:
with t1 as (
    SELECT a.artist_name,  
        dense_rank() OVER(ORDER BY COUNT(*) DESC) as artist_rank
    FROM global_song_rank g 
    JOIN songs s on g.song_id = s.song_id
    JOIN artists a on s.artist_id = a.artist_id
    where g.rank <= 10
    GROUP BY a.artist_name)

SELECT * 
FROM t1
WHERE artist_rank <= 5
;

### Left join 2 times to get caller and receiver

In [None]:
with t1 as (
    SELECT pc.caller_id, pc.receiver_id, 
        pic.country_id as caller_country, 
        pir.country_id as receiver_country
    FROM phone_calls pc 
    LEFT JOIN phone_info pic on pc.caller_id = pic.caller_id
    LEFT JOIN phone_info pir on pc.receiver_id = pir.caller_id)

SELECT ROUND(COUNT(*)*100.0/(SELECT COUNT(*) FROM t1),1)
FROM t1
WHERE caller_country != receiver_country
;

### Create 2 tables for current month and previous month with lag

In [9]:
-- note here: lag is based on the rows, and distinct is based on the whole combination of rows

WITH a AS (
select distinct user_id, date_part('month', event_date) as aaa
from user_actions
),

b AS (
SELECT a.aaa as cur_mth,
lag(aaa, 1) OVER (PARTITION BY user_id ORDER BY a.aaa ASC) as pre_mth
FROM a)

SELECT cur_mth as mth,
COUNT(*) as count
FROM b
WHERE cur_mth - pre_mth = 1
AND cur_mth = 7
GROUP BY cur_mth
;

SyntaxError: invalid syntax (2954275414.py, line 1)

### Median: Percentile_cont(0.5) this is in postgresql only and 
### Create number of rows based on frequency using recursive method

In [None]:
-- flatten the table
with recursive nums as (
    SELECT 
        1 as value, 
        searches,
        num_users
    FROM search_frequency
    union all 
    SELECT 
        value + 1 as value,
        searches,
        num_users
    WHERE value < num_users
)
-- find median (for postgresql)
select round(percentile_cont(0.5) within group(order by searches::decimal), 1) as median
FROM nums

### create number from 1 to 5

In [None]:
WITH RECURSIVE numbers AS (
  SELECT 1 AS n
  UNION ALL
  SELECT n + 1 FROM numbers WHERE n < 5 -- Adjust the end value as needed
)
SELECT n FROM numbers;


### Cross join to make combination of the same table

In [None]:
-- where statement make the combination unique
SELECT 
  concat(p1.topping_name, ',', p2.topping_name, ',', p3.topping_name) as pizza,
  p1.ingredient_cost + p2.ingredient_cost + p3.ingredient_cost as total_cost
FROM pizza_toppings AS p1
cross join
  pizza_toppings as p2,
  pizza_toppings as p3
where p1.topping_name < p2.topping_name
AND p2.topping_name < p3.topping_name
ORDER BY total_cost DESC, pizza ASC;

### RegexP_Like() in where statement

In [None]:
SELECT name
FROM employees
WHERE REGEXP_LIKE(name, '^J..');


### Multiple if statement to exchange seat

In [None]:
select if(id < (select max(id) from seat), 
        if(id%2=0, id-1,id+1), 
        if(id%2=0, id-1, id)) as id, student
from seat
order by id;

# Great advanced practices

### Retrieve top 5 highest salary employee in each department

In [None]:

with t1 as (
    select 
        department,
        employee,
        salary,
        row_number() over (partition by department order by salary desc) as rank
    from employees
)

select
    department,
    employee,
    salary
from t1
where rank <= 5;

### Calculate total sales for each month of the current year (including months with zero sales)
### This is for postgresql (generate_series(), date_trunc(), current_date), otherwise you need to use recursive for mysql

  sale_date  
------------
 2023-01-01
 2023-02-01
 2023-03-01
 2023-04-01
 2023-05-01
 2023-06-01
 2023-07-01
 2023-08-01
 2023-09-01
 2023-10-01
 2023-11-01
 2023-12-01
(12 rows)


In [None]:
-- this cte is to generate the month table that has all the month
with t1 as (
    generate_series(
        date_trunc('year', current_date), -- from when 
        date_trunc('year', current_date) + interval '1 year' - interval '1 day', -- to when
        interval '1 month' -- window interval
    ) as month_date
)

select 
    to_char(t1.month_date, 'YYYY-MM') as month,
    coalesce(sum(sales.sales), 0) as monthly sales
from 
    t1 
left join sales on to_char(t1.month_date, 'YYYY-MM') = to_char(sales.sales_date, 'YYYY-MM')
group by month;

### Find customers who have made a purchase every month for last six months

In [None]:
select 
    customer_id
from
    purchase
where
    date_trunc('month', purchase_date) - interval '1 month' <= 6
group by 
    customer_id
having
    count(distinct date_trunc('month', purchase_date)) = 6;

### Calculate running total of sales for each day within the past month

In [None]:
-- use cte to create date table
with t1 as (
    generate_series(
        date_trunc('month', current_date) - interval '1 month', -- from when
        date_trunc('month', current_date) - interval '1 day', -- to when (inclusive)
        interval '1 day' -- interval
    ) as date
)
select
    t1.date,
    sum(coalesce(sales.sales_amount, 0)) over(order by t1.date) as running total
from 
    t1
left join sales on t1.date = sales.date;

### Find the employees who manage the same number of employees as their manager

In [None]:
select
    e1.employee_name as employee,
    e1.managed_count as managed_count
from 
    employees e1
join 
    employees e2 on e1.manger_id = e2.employee_id
where 
    e1.managed_count = e2.managed_count;

### calculate 30-day moving average of sales for each product (range between)

In [None]:
select
    product_id,
    sales_date,
    sales_amount,
    avg(sales_amount) over (
                    partition by product_id 
                    order by sales_date range between interval '30 days' preceding and current row) as moving_avg
from
    sales;