What is the median follower count for users in the following age groups:

- 18-24
- 25-35
- 36-50
- +50

In [0]:
%run "/Users/sarfaraj_ahmed@outlook.com/Clean"

In [0]:
df_pin.createOrReplaceTempView("pin")
df_user.createOrReplaceTempView("user")

In [0]:
%sql
CREATE OR REPLACE TEMP VIEW follower_count_rank AS
SELECT
  CASE
    WHEN u.age BETWEEN 18 AND 24 THEN '18-24'
    WHEN u.age BETWEEN 25 AND 35 THEN '25-35'
    WHEN u.age BETWEEN 36 AND 50 THEN '36-50'
    WHEN u.age > 50 THEN '+50'
  END AS age_group,
  p.follower_count,
  ROW_NUMBER() OVER (ORDER BY (follower_count) DESC) AS rank
FROM 
  pin p
INNER JOIN
  user u
ON 
  p.ind = u.ind
WHERE 
  p.follower_count IS NOT NULL 
  AND 
  p.follower_count != 0;


In [0]:
%sql
CREATE OR REPLACE TEMP VIEW ranked_followers AS
SELECT
    age_group,
    follower_count,
    ROW_NUMBER() OVER (PARTITION BY age_group ORDER BY follower_count) AS row_num,
    COUNT(*) OVER (PARTITION BY age_group) AS total_count
  FROM 
    follower_count_rank;

In [0]:
%sql
SELECT
    age_group,
    follower_count AS median_follower_count
  FROM 
    ranked_followers
  WHERE 
    row_num IN (total_count / 2, total_count / 2 + 1)
  GROUP BY 
    age_group, follower_count
  ORDER BY
  CASE
    WHEN age_group = '18-24' THEN 1
    WHEN age_group = '25-35' THEN 2
    WHEN age_group = '36-50' THEN 3
    WHEN age_group = '+50' THEN 4
  END;