# 08. 여러 개의 테이블 조작

In [2]:
from sqlalchemy import create_engine
import pandas as pd

In [3]:
db = create_engine("postgresql+psycopg2://postgres:skt1faker@localhost:5432/PostgreSQL", 
     echo=False) 
def query(query_state, db=db):
    return pd.read_sql(query_state, db)

# 
# 
# 
### 8-1 여러개의 테이블을 세로로 결합

### ```UNION ALL``` : 비슷한 구조를 가지는 테이블의 데이터를 일괄 처리
### ```UNION DISTINCT``` 데이터의 중복을 제외한 결과 (```UNION```)

In [4]:
query("SELECT * FROM app1_mst_users;")

Unnamed: 0,user_id,name,email
0,U001,Sato,sato@example.com
1,U002,Suzuki,suzuki@example.com


In [5]:
query("SELECT * FROM app2_mst_users;")

Unnamed: 0,user_id,name,phone
0,U001,Ito,080-xxxx-xxxx
1,U002,Tanaka,070-xxxx-xxxx


In [6]:
query("""
SELECT 'app1' AS app_name, user_id, name, email FROM app1_mst_users
    UNION ALL
SELECT 'app2' AS app_name, user_id, name, NULL AS email FROM app2_mst_users;""")

Unnamed: 0,app_name,user_id,name,email
0,app1,U001,Sato,sato@example.com
1,app1,U002,Suzuki,suzuki@example.com
2,app2,U001,Ito,
3,app2,U002,Tanaka,


# 
# 
# 
### 8-2 여러 개의 테이블을 가로로 정렬

- 카테고리 마스터 테이블

In [7]:
query("SELECT * FROM mst_categories;")

Unnamed: 0,category_id,name
0,1,dvd
1,2,cd
2,3,book


- 카테고리별 매출 테이블

In [8]:
query("SELECT * FROM category_sales;")

Unnamed: 0,category_id,sales
0,1,850000
1,2,500000


- 카테고리별 상품 매출 순위 테이블

In [9]:
query("SELECT * FROM product_sale_ranking;")

Unnamed: 0,category_id,rank,product_id,sales
0,1,1,D001,50000
1,1,2,D002,20000
2,1,3,D003,10000
3,2,1,C001,30000
4,2,2,C002,20000
5,2,3,C003,10000


# 
# 
### ```JOIN``` : 교집합 JOIN

In [16]:
query("""
SELECT m.category_id,
    m.name,
    s.sales
FROM mst_categories AS m
    JOIN
        category_sales AS s
        ON m.category_id = s.category_id
        JOIN 
            product_sale_ranking AS r
            ON m.category_id = r.category_id
""")

Unnamed: 0,category_id,name,sales
0,1,dvd,850000
1,1,dvd,850000
2,1,dvd,850000
3,2,cd,500000
4,2,cd,500000
5,2,cd,500000


# 
# 
### ```LEFT JOIN```

In [19]:
query("""
SELECT m.category_id,
    m.name,
    s.sales,
    r. product_id AS top_sale_product
FROM mst_categories AS m
    LEFT JOIN
        category_sales AS s
        ON m.category_id = s.category_id
        LEFT JOIN
            product_sale_ranking AS r
            ON m.category_id = r.category_id
            AND r.rank = 1""")

Unnamed: 0,category_id,name,sales,top_sale_product
0,1,dvd,850000.0,D001
1,2,cd,500000.0,C001
2,3,book,,


# 
- 상관 서브쿼리로 여러 개의 테이블을 가로로 정렬

In [20]:
query("""
SELECT m.category_id,
    m.name,
    (SELECT s.sales
    FROM category_sales AS s
    WHERE m.category_id = s.category_id) AS sales,
    (SELECT r.product_id
    FROM product_sale_ranking AS r 
    WHERE m.category_id = r.category_id
    ORDER BY sales DESC
    LIMIT 1) AS top_sale_product 
FROM mst_categories AS m""")

Unnamed: 0,category_id,name,sales,top_sale_product
0,1,dvd,850000.0,D001
1,2,cd,500000.0,C001
2,3,book,,


# 
# 
# 
### 8-3 조건 플래그를 0과 1로 표현

# 

### ```CASE WHEN ~ THEN ~ ELSE ~ END```
### ```SIGN()``` : 0 이상의 숫자는 0 / 1 이상의 숫자는 1

# 

In [22]:
query("SELECT * FROM mst_users_with_card_number;")

Unnamed: 0,user_id,card_number
0,U001,1234-xxxx-xxxx-xxxx
1,U002,
2,U003,5678-xxxx-xxxx-xxxx


In [23]:
query("SELECT * FROM purchase_log;")

Unnamed: 0,purchase_id,user_id,amount,stamp
0,10001,U001,200,2017-01-30 10:00:00
1,10002,U001,500,2017-02-10 10:00:00
2,10003,U001,200,2017-02-12 10:00:00
3,10004,U002,800,2017-03-01 10:00:00
4,10005,U002,400,2017-03-02 10:00:00


# 

In [24]:
query("""
SELECT m.user_id,
    m.card_number,
    COUNT(p.user_id) AS purchase_count,
    CASE WHEN m.card_number IS NOT NULL THEN 1 ELSE 0 END AS has_card,
    SIGN(COUNT(p.user_id)) AS has_purchased
FROM 
    mst_users_with_card_number AS m
    LEFT JOIN
        purchase_log AS p
        ON m.user_id = p.user_id
    GROUP BY m.user_id, m.card_number""")

Unnamed: 0,user_id,card_number,purchase_count,has_card,has_purchased
0,U002,,2,0,1.0
1,U003,5678-xxxx-xxxx-xxxx,0,1,0.0
2,U001,1234-xxxx-xxxx-xxxx,3,1,1.0


# 
# 
# 
### 8-4 계산한 테이블에 이름 붙이기

# 
### ```WITH <테이블 이름> AS (SELECT ~)```

# 

In [25]:
query("SELECT * FROM product_sales;")

Unnamed: 0,category_name,product_id,sales
0,dvd,D001,50000
1,dvd,D002,20000
2,dvd,D003,10000
3,cd,C001,30000
4,cd,C002,20000
5,cd,C003,10000
6,book,B001,20000
7,book,B002,15000
8,book,B003,10000
9,book,B004,5000


# 
- **공통 테이블 식 (CTE : Common Table Expression)**

In [26]:
query(""" 
WITH
product_sale_ranking AS (
    SELECT
        category_name,
        product_id,
        sales,
        ROW_NUMBER() OVER(PARTITION BY category_name ORDER BY sales DESC) AS rank
            FROM 
                product_sales
) 
SELECT * FROM product_sale_ranking;""")

Unnamed: 0,category_name,product_id,sales,rank
0,book,B001,20000,1
1,book,B002,15000,2
2,book,B003,10000,3
3,book,B004,5000,4
4,cd,C001,30000,1
5,cd,C002,20000,2
6,cd,C003,10000,3
7,dvd,D001,50000,1
8,dvd,D002,20000,2
9,dvd,D003,10000,3


In [27]:
query(""" 
WITH
product_sale_ranking AS (
    SELECT
        category_name,
        product_id,
        sales,
        ROW_NUMBER() OVER(PARTITION BY category_name ORDER BY sales DESC) AS rank
            FROM 
                product_sales
),
    mst_rank AS (
        SELECT DISTINCT rank
        FROM product_sale_ranking
    )
SELECT * FROM mst_rank;""")

Unnamed: 0,rank
0,4
1,2
2,3
3,1


In [29]:
query(""" 
WITH
product_sale_ranking AS (
    SELECT
        category_name,
        product_id,
        sales,
        ROW_NUMBER() OVER(PARTITION BY category_name ORDER BY sales DESC) AS rank
            FROM 
                product_sales
),
    mst_rank AS (
        SELECT DISTINCT rank
        FROM product_sale_ranking
    )
SELECT m.rank,
    r1.product_id AS dvd,
    r1.sales AS dvd_sales,
    r2.product_id AS cd,
    r2.sales AS cd_sales,
    r3.product_id AS book,
    r3.sales AS book_sales
FROM mst_rank AS m
    LEFT JOIN
        product_sale_ranking AS r1
        ON m.rank = r1.rank
        AND r1.category_name = 'dvd'
        LEFT JOIN
            product_sale_ranking AS r2
            ON m.rank = r2.rank
            AND r2.category_name = 'cd'
            LEFT JOIN
                product_sale_ranking AS r3
                ON m.rank = r3.rank
                AND r3.category_name = 'book' 
ORDER BY m.rank;""")

Unnamed: 0,rank,dvd,dvd_sales,cd,cd_sales,book,book_sales
0,1,D001,50000.0,C001,30000.0,B001,20000
1,2,D002,20000.0,C002,20000.0,B002,15000
2,3,D003,10000.0,C003,10000.0,B003,10000
3,4,,,,,B004,5000


# 
# 
# 
### 8-5 유사 테이블 생성

# 

### 임의의 레코드를 가진 유사 테이블 생성

# 

In [30]:
query(""" 
WITH mst_devices AS (
    SELECT 1 AS device_id, 'PC' AS device_name
    UNION ALL SELECT 2 AS device_id, 'SP' AS device_name
        UNION ALL SELECT 3 AS device_id, 'app' AS device_name
) 
SELECT * FROM mst_devices;""")

Unnamed: 0,device_id,device_name
0,1,PC
1,2,SP
2,3,app


# 
# 
- **```WITH <테이블이름>(테이블 열1, 테이블 열2 ...) VALUES```구문 사용**

In [34]:
query(""" 
WITH mst_devices(device_id, device_name) AS (
    VALUES
        (1, 'PC'),
        (2, 'SP'),
        (3, 'APP')
)
SELECT * FROM mst_devices;""")

Unnamed: 0,device_id,device_name
0,1,PC
1,2,SP
2,3,APP


# 
- **순번을 사용하여 테이블 작성**

### ```generate_series``` : 순번을 가진 유사 테이블 작성 **(PostgreSQL에서만 가능)**

In [37]:
query(""" 
WITH 
series AS (
    SELECT generate_series(1, 5) AS idx
)

SELECT * FROM series;""")

# BigQuery : 
# SELECT idx FROM unnest(generate_array(1, 5)) AS idx

Unnamed: 0,idx
0,1
1,2
2,3
3,4
4,5


# 
### ```repeat( 문자열, n )``` : 지정 문자열을 n번 반복