### Pivot table

+----+------+-------+
| ID | Type | Value |
+----+------+-------+
| 1  | A    | 10    |
| 1  | B    | 20    |
| 2  | A    | 30    |
| 2  | B    | 40    |
+----+------+-------+
to 
+----+---------+---------+
| ID | A_Value | B_Value |
+----+---------+---------+
| 1  | 10      | 20      |
| 2  | 30      | 40      |
+----+---------+---------+



In [None]:
-- need Max here since 
SELECT
  ID,
  MAX(CASE WHEN Type = 'A' THEN Value END) AS A_Value,
  MAX(CASE WHEN Type = 'B' THEN Value END) AS B_Value
FROM
  your_table_name
GROUP BY
  ID;


In [None]:
WITH t1 as (
SELECT candidate_id,
      MAX(CASE WHEN skill = 'Python' THEN 'yes' END) as python,
      MAX(CASE WHEN skill = 'Tableau' THEN 'yes' END) as tableau,
      MAX(CASE WHEN skill = 'PostgreSQL' THEN 'yes' END) as postgresql
FROM candidates
GROUP BY candidate_id)

SELECT candidate_id
FROM t1
WHERE python is not NULL
AND tableau is not NULL
AND postgresql is not NULL
ORDER BY candidate_id ASC;

### Date Calculation in PostgreSql

In [None]:
SELECT 
	user_id, 
    MAX(post_date::DATE) - MIN(post_date::DATE) AS days_between
FROM posts
WHERE DATE_PART('year', post_date::DATE) = 2021 
GROUP BY user_id
HAVING COUNT(post_id)>1;

### Date Calculation with INTERVAL

In [None]:
with t1 as 
(SELECT 
  policy_holder_id,
  call_received AS current_call,
  lag(call_received) OVER(PARTITION BY policy_holder_id ORDER BY call_received) as pre_call
FROM callers)

SELECT count(DISTINCT policy_holder_id) AS patient_count
FROM t1
WHERE current_call - pre_call < INTERVAL '7 days'
;

### Find Duplicates with Group By

In [None]:
WITH job_count_cte AS (
  SELECT 
    company_id, 
    title, 
    description, 
    COUNT(job_id) AS job_count
  FROM job_listings
  GROUP BY company_id, title, description
)

SELECT COUNT(DISTINCT company_id) AS duplicate_companies
FROM job_count_cte
WHERE job_count > 1;

### Concat Number and symbol or character

In [None]:
SELECT 
  manufacturer, 
  CONCAT('$', ROUND(SUM(total_sales) / 1000000), ' million') AS sales_mil
FROM pharmacy_sales 
GROUP BY manufacturer;

### Row_number() over(Partition_by ... Order_by ...)

In [None]:
with t1 as (
SELECT *, 
  row_number() OVER(
  PARTITION BY user_id ORDER BY transaction_date) as r
FROM transactions)

SELECT user_id, spend, transaction_date
FROM t1
WHERE r = 3;

### Using average with "partition by" and "rows between" to get rolling average

In [None]:
SELECT    
  user_id,    
  tweet_date,   
  ROUND(AVG(tweet_count) OVER (
    PARTITION BY user_id     
    ORDER BY tweet_date     
    ROWS BETWEEN 2 PRECEDING AND CURRENT ROW)
  ,2) AS rolling_avg_3d
FROM tweets;



### Getting the top two value

In [None]:
with t as (
SELECT category, product, SUM(spend) as total_spend,
      RANK() OVER(PARTITION BY category ORDER BY SUM(spend) DESC) as ranking
FROM product_spend
WHERE date_part('year', transaction_date) = 2022
GROUP BY category, product)

SELECT category, product, total_spend
FROM t
WHERE ranking <= 2
ORDER BY category, ranking;

### Dense_rank() and join lots of tables

In [None]:
with t1 as (
    SELECT a.artist_name,  
        dense_rank() OVER(ORDER BY COUNT(*) DESC) as artist_rank
    FROM global_song_rank g 
    JOIN songs s on g.song_id = s.song_id
    JOIN artists a on s.artist_id = a.artist_id
    where g.rank <= 10
    GROUP BY a.artist_name)

SELECT * 
FROM t1
WHERE artist_rank <= 5
;

### Left join 2 times to get caller and receiver

In [None]:
with t1 as (
    SELECT pc.caller_id, pc.receiver_id, 
        pic.country_id as caller_country, 
        pir.country_id as receiver_country
    FROM phone_calls pc 
    LEFT JOIN phone_info pic on pc.caller_id = pic.caller_id
    LEFT JOIN phone_info pir on pc.receiver_id = pir.caller_id)

SELECT ROUND(COUNT(*)*100.0/(SELECT COUNT(*) FROM t1),1)
FROM t1
WHERE caller_country != receiver_country
;

### Create 2 tables for current month and previous month with lag

In [9]:
-- note here: lag is based on the rows, and distinct is based on the whole combination of rows

WITH a AS (
select distinct user_id, date_part('month', event_date) as aaa
from user_actions
),

b AS (
SELECT a.aaa as cur_mth,
lag(aaa, 1) OVER (PARTITION BY user_id ORDER BY a.aaa ASC) as pre_mth
FROM a)

SELECT cur_mth as mth,
COUNT(*) as count
FROM b
WHERE cur_mth - pre_mth = 1
AND cur_mth = 7
GROUP BY cur_mth
;

SyntaxError: invalid syntax (2954275414.py, line 1)

### Median: Percentile_cont(0.5) and 
### Create number of rows based on frequency using recursive method

In [None]:
-- flatten the table
with recursive nums as (
    SELECT 
        1 as value, 
        searches,
        num_users
    FROM search_frequency
    union all 
    SELECT 
        value + 1 as value,
        searches,
        num_users
    WHERE value < num_users
)
-- find median (for postgresql)
select round(percentile_cont(0.5) within group(order by searches::decimal),1) as median
FROM nums

### Cross join to make combination of the same table

In [None]:
-- where statement make the combination unique
SELECT 
  concat(p1.topping_name, ',', p2.topping_name, ',', p3.topping_name) as pizza,
  p1.ingredient_cost + p2.ingredient_cost + p3.ingredient_cost as total_cost
FROM pizza_toppings AS p1
cross join
  pizza_toppings as p2,
  pizza_toppings as p3
where p1.topping_name < p2.topping_name
AND p2.topping_name < p3.topping_name
ORDER BY total_cost DESC, pizza ASC;

### RegexP_Like() in where statement

In [None]:
SELECT name
FROM employees
WHERE REGEXP_LIKE(name, '^J..');
