# 09. 시계열 기반 데이터 집계

In [1]:
from sqlalchemy import create_engine
import pandas as pd

In [2]:
db = create_engine("postgresql+psycopg2://postgres:skt1faker@localhost:5432/PostgreSQL", 
     echo=False) 
def query(query_state, db=db):
    return pd.read_sql(query_state, db)

# 
# 
# 
### 9-1 날짜별 매출 집계

In [4]:
query("SELECT * FROM purchase_log;").head()

Unnamed: 0,dt,order_id,user_id,purchase_amount
0,2014-01-01,1,rhwpvvitou,13900
1,2014-01-01,2,hqnwoamzic,10616
2,2014-01-02,3,tzlmqryunr,21156
3,2014-01-02,4,wkmqqwbyai,14893
4,2014-01-03,5,ciecbedwbq,13054


In [5]:
query(""" 
SELECT dt, 
    COUNT(*) AS purchase_count,
    SUM(purchase_amount) AS total_amount,
    AVG(purchase_amount) AS avg_amount
FROM purchase_log 
GROUP BY dt 
ORDER BY dt;""")

Unnamed: 0,dt,purchase_count,total_amount,avg_amount
0,2014-01-01,2,24516,12258.0
1,2014-01-02,2,36049,18024.5
2,2014-01-03,3,53029,17676.333333
3,2014-01-04,3,29299,9766.333333
4,2014-01-05,3,48256,16085.333333
5,2014-01-06,3,29440,9813.333333
6,2014-01-07,3,47679,15893.0
7,2014-01-08,3,19760,6586.666667
8,2014-01-09,2,22944,11472.0
9,2014-01-10,2,27923,13961.5


# 
# 
# 
### 9-2 이동평균

### ```AVG ~ OVER```

# 

- 7일 이동 평균

In [7]:
query(""" 
SELECT dt,
    SUM(purchase_amount) AS total_amount,
    
    AVG(SUM(purchase_amount)) OVER(
            ORDER BY dt ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
        ) AS seven_day_avg,
        
    CASE WHEN 
            COUNT(*) OVER(
                ORDER BY dt ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
            ) = 7 
        THEN 
            AVG(SUM(purchase_amount)) OVER(
                ORDER BY dt ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
            )
    END AS seven_day_avg_strict
FROM purchase_log
GROUP BY dt ORDER BY dt;""")

Unnamed: 0,dt,total_amount,seven_day_avg,seven_day_avg_strict
0,2014-01-01,24516,24516.0,
1,2014-01-02,36049,30282.5,
2,2014-01-03,53029,37864.666667,
3,2014-01-04,29299,35723.25,
4,2014-01-05,48256,38229.8,
5,2014-01-06,29440,36764.833333,
6,2014-01-07,47679,38324.0,38324.0
7,2014-01-08,19760,37644.571429,37644.571429
8,2014-01-09,22944,35772.428571,35772.428571
9,2014-01-10,27923,32185.857143,32185.857143


# 
# 
# 
### 9-3 당월 매출 집계

In [9]:
query(""" 
SELECT dt,
    substr(dt, 1, 7) AS year_month,

    -- 합계
    SUM(purchase_amount) AS total_amount,

    -- 누적 합계
    SUM(SUM(purchase_amount)) OVER(
        PARTITION BY substr(dt, 1, 7) ORDER BY dt ROWS UNBOUNDED PRECEDING
    ) agg_amount
FROM purchase_log
GROUP BY dt ORDER BY dt;""")

Unnamed: 0,dt,year_month,total_amount,agg_amount
0,2014-01-01,2014-01,24516,24516.0
1,2014-01-02,2014-01,36049,60565.0
2,2014-01-03,2014-01,53029,113594.0
3,2014-01-04,2014-01,29299,142893.0
4,2014-01-05,2014-01,48256,191149.0
5,2014-01-06,2014-01,29440,220589.0
6,2014-01-07,2014-01,47679,268268.0
7,2014-01-08,2014-01,19760,288028.0
8,2014-01-09,2014-01,22944,310972.0
9,2014-01-10,2014-01,27923,338895.0


# 


In [13]:
query(""" 
WITH daily_purchase AS (
    SELECT dt,
    substr(dt, 1, 4) AS year,
    substr(dt, 6, 2) AS month,
    substr(dt, 9, 2) AS date,
    SUM(purchase_amount) AS purchase_amount

    FROM purchase_log
    GROUP BY dt
)
SELECT * FROM daily_purchase ORDER BY dt;""")

Unnamed: 0,dt,year,month,date,purchase_amount
0,2014-01-01,2014,1,1,24516
1,2014-01-02,2014,1,2,36049
2,2014-01-03,2014,1,3,53029
3,2014-01-04,2014,1,4,29299
4,2014-01-05,2014,1,5,48256
5,2014-01-06,2014,1,6,29440
6,2014-01-07,2014,1,7,47679
7,2014-01-08,2014,1,8,19760
8,2014-01-09,2014,1,9,22944
9,2014-01-10,2014,1,10,27923


# 

In [15]:
query(""" 
WITH daily_purchase AS (
    SELECT dt,
    substr(dt, 1, 4) AS year,
    substr(dt, 6, 2) AS month,
    substr(dt, 9, 2) AS date,
    SUM(purchase_amount) AS purchase_amount

    FROM purchase_log
    GROUP BY dt
)
SELECT dt,
    concat(year, '-', month) AS year_month,
    purchase_amount, 
    SUM(purchase_amount) OVER(
        PARTITION BY year, month ORDER BY dt ROWS UNBOUNDED PRECEDING
    ) AS agg_amount
FROM daily_purchase ORDER BY dt;""")

Unnamed: 0,dt,year_month,purchase_amount,agg_amount
0,2014-01-01,2014-01,24516,24516.0
1,2014-01-02,2014-01,36049,60565.0
2,2014-01-03,2014-01,53029,113594.0
3,2014-01-04,2014-01,29299,142893.0
4,2014-01-05,2014-01,48256,191149.0
5,2014-01-06,2014-01,29440,220589.0
6,2014-01-07,2014-01,47679,268268.0
7,2014-01-08,2014-01,19760,288028.0
8,2014-01-09,2014-01,22944,310972.0
9,2014-01-10,2014-01,27923,338895.0


# 
# 
# 
### 9-4 월별 매출 작대비

In [20]:
query(""" 
WITH daily_purchase AS (
    SELECT dt,
    substr(dt, 1, 4) AS year,
    substr(dt, 6, 2) AS month,
    substr(dt, 9, 2) AS date,
    SUM(purchase_amount) AS purchase_amount
    FROM purchase_log GROUP BY dt
)
SELECT month,
    SUM(CASE year WHEN '2014' THEN purchase_amount END) AS amount_2014,
    SUM(CASE year WHEN '2015' THEN purchase_amount END) AS amount_2015,
    100.0 * SUM(CASE year WHEN '2015' THEN purchase_amount END) / SUM(CASE year WHEN '2014' THEN purchase_amount END) AS rate
FROm daily_purchase GROUP BY month ORDER BY month;""")

Unnamed: 0,month,amount_2014,amount_2015,rate
0,1,13900.0,22111.0,159.071942
1,2,28469.0,11965.0,42.028171
2,3,18899.0,20215.0,106.963331
3,4,12394.0,11792.0,95.142811
4,5,2282.0,18087.0,792.594216
5,6,10180.0,18859.0,185.255403
6,7,4027.0,14919.0,370.474298
7,8,6243.0,12906.0,206.727535
8,9,3832.0,5696.0,148.643006
9,10,6716.0,13398.0,199.493746


# 
# 
# 
### 9-5 Z차트
- **월차매출 : 매출 합계를 월별로 집계**
    - 월차매출이 일정할 경우 매출누계는 직선형태
        - 가로축에서 오른쪽으로 갈수록 그래프의 기울기가 급해지면, 최근 매출이 상승
        - 가로축에서 오른쪽으로 갈수록 그래프의 기울기가 완만해지면, 최근 매출이 감소

- **매출누계 : 해당 월의 매출에 이전 월까지의 매출 누계를 합한 값**
    - 작년과 올해의 매출이 일정하면 이동년계는 직선
    - 오른쪽 위로 올라가면 매출이 오르는 경향
    - 오른쪽 아래로 내려가면 매출이 감소하는 경향
- **이동년계 : 해당 월의 매출에 과거 11개월의 매출을 합한 값**

# 


In [26]:
query(""" 
WITH daily_purchase AS (
    SELECT dt,
    substr(dt, 1, 4) AS year,
    substr(dt, 6, 2) AS month,
    substr(dt, 9, 2) AS date,
    SUM(purchase_amount) AS purchase_amount
    FROM purchase_log GROUP BY dt
),
    -- 월별 매출
    monthly_purchase AS (
        SELECT year, month,
            SUM(purchase_amount) AS amount
        FROM daily_purchase
        GROUP BY year, month
    ),

    calc_index AS (
        SELECT year, month, amount,

        -- 매출누계
            SUM(CASE WHEN year = '2015' THEN amount END) OVER (
                ORDER BY year, month ROWS UNBOUNDED PRECEDING
            ) AS agg_amount,
        
        -- 이동년계
            SUM(amount) OVER(
                ORDER BY year, month ROWS BETWEEN 11 PRECEDING AND CURRENT ROW
            ) AS year_avg_amount
        FROM monthly_purchase 
        ORDER BY year, month
    )
    
    SELECT 
        concat(year, '-', month) AS year_month,
        amount, 
        agg_amount,
        year_avg_amount
        FROM calc_index
        WHERE year = '2015' 
        ORDER BY year_month;""")

Unnamed: 0,year_month,amount,agg_amount,year_avg_amount
0,2015-01,22111.0,22111.0,160796.0
1,2015-02,11965.0,34076.0,144292.0
2,2015-03,20215.0,54291.0,145608.0
3,2015-04,11792.0,66083.0,145006.0
4,2015-05,18087.0,84170.0,160811.0
5,2015-06,18859.0,103029.0,169490.0
6,2015-07,14919.0,117948.0,180382.0
7,2015-08,12906.0,130854.0,187045.0
8,2015-09,5696.0,136550.0,188909.0
9,2015-10,13398.0,149948.0,195591.0


# 
# 
# 
### 9-6 

In [36]:
query(""" 
WITH daily_purchase AS (
    SELECT dt,
    substr(dt, 1, 4) AS year,
    substr(dt, 6, 2) AS month,
    substr(dt, 9, 2) AS date,
    SUM(purchase_amount) AS purchase_amount
    FROM purchase_log GROUP BY dt
),
    monthly_purchase AS (
        SELECT year, month, 
            AVG(purchase_amount) AS avg_amount,
            SUM(purchase_amount) AS monthly
        FROM daily_purchase
        GROUP BY year, month
    )
SELECT 
    concat(year, '-', month) AS year_month,
    avg_amount,
    monthly,
    SUM(monthly) OVER(
        PARTITION BY year ORDER BY month ROWS UNBOUNDED PRECEDING
    ) AS agg_amount,

    -- 12개월 전의 매출
    LAG(monthly, 12) OVER(
        ORDER BY year, month
    ) AS last_year,

    -- 작대비
    (monthly / LAG(monthly, 12) OVER(ORDER BY year, month)) * 100.0 AS rate
FROM monthly_purchase 
ORDER BY year_month; """)

Unnamed: 0,year_month,avg_amount,monthly,agg_amount,last_year,rate
0,2014-01,13900.0,13900.0,13900.0,,
1,2014-02,28469.0,28469.0,42369.0,,
2,2014-03,18899.0,18899.0,61268.0,,
3,2014-04,12394.0,12394.0,73662.0,,
4,2014-05,2282.0,2282.0,75944.0,,
5,2014-06,10180.0,10180.0,86124.0,,
6,2014-07,4027.0,4027.0,90151.0,,
7,2014-08,6243.0,6243.0,96394.0,,
8,2014-09,3832.0,3832.0,100226.0,,
9,2014-10,6716.0,6716.0,106942.0,,
