In [1]:
!pip install prophet --user



In [2]:
from prophet import Prophet

  from .autonotebook import tqdm as notebook_tqdm
Importing plotly failed. Interactive plots will not work.


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Prophetを使う
## 単純な時系列の予測をするのではなく、時系列成分を学習データのカラムとして扱う

$y = g(t) + s(t) + h(t) + e(t)$

$g(t)$：トレンド

$s(t)$：周期性

$h(t)$：イベント

$e(t)$：ノイズ


In [4]:
data = pd.read_csv("data_raw_Robyn.csv", parse_dates = ["DATE"])
data.columns = [col.lower() if col in ["DATE"] else col for col in data.columns]
data

Unnamed: 0,date,revenue,tv_S,ooh_S,print_S,facebook_I,search_clicks_P,search_S,competitor_sales_B,facebook_S,events,newsletter
0,2015-11-23,2.754372e+06,167687.6,0,95463.666667,7.290385e+07,0.000000,0,8125009,228213.987444,na,19401.653846
1,2015-11-30,2.584277e+06,214600.9,0,0.000000,1.658110e+07,29511.715457,31000,7901549,34258.573511,na,14791.000000
2,2015-12-07,2.547387e+06,0.0,248022,3404.000000,4.995477e+07,36132.358958,28400,8300197,127691.261335,na,14544.000000
3,2015-12-14,2.875220e+06,625877.3,0,132600.000000,3.164930e+07,36804.210958,31900,8122883,84014.720306,na,2800.000000
4,2015-12-21,2.215953e+06,0.0,520005,0.000000,8.802269e+06,28401.744069,27100,7105985,20687.478156,na,15478.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
203,2019-10-14,2.456240e+06,0.0,60433,153723.666667,0.000000e+00,152840.323412,112100,7612762,0.000000,na,28157.000000
204,2019-10-21,2.182825e+06,154917.6,0,0.000000,5.688196e+07,103680.047821,103700,6701667,133624.575524,na,10644.000000
205,2019-10-28,2.377707e+06,21982.5,14094,17476.000000,0.000000e+00,138387.704138,114700,7688920,0.000000,na,9597.000000
206,2019-11-04,2.732825e+06,22453.0,0,24051.333333,0.000000e+00,151707.990462,134100,8815710,0.000000,na,90189.000000


In [5]:
holidays = pd.read_csv("holidays_daily.csv", parse_dates = ["ds"])
holidays.head()

Unnamed: 0,ds,holiday,country,year
0,1995-01-01,New Year's Day,AE,1995
1,1995-03-01,Eid al-Fitr,AE,1995
2,1995-03-02,Eid al-Fitr,AE,1995
3,1995-03-03,Eid al-Fitr,AE,1995
4,1995-03-04,Eid al-Fitr,AE,1995


In [6]:
holidays["begin_week"] = holidays["ds"].dt.to_period('W-SUN').dt.start_time #はじめの月曜日を取得
holidays.head()

Unnamed: 0,ds,holiday,country,year,begin_week
0,1995-01-01,New Year's Day,AE,1995,1994-12-26
1,1995-03-01,Eid al-Fitr,AE,1995,1995-02-27
2,1995-03-02,Eid al-Fitr,AE,1995,1995-02-27
3,1995-03-03,Eid al-Fitr,AE,1995,1995-02-27
4,1995-03-04,Eid al-Fitr,AE,1995,1995-02-27


In [7]:
holidays.groupby(["begin_week", "country", "year"], as_index = False).agg({"holiday":"__________".join})

Unnamed: 0,begin_week,country,year,holiday
0,1994-12-26,AE,1995,New Year's Day
1,1994-12-26,AR,1995,Ano Nuevo [New Year's Day]
2,1994-12-26,AT,1995,Neujahr
3,1994-12-26,AU,1995,New Year's Day
4,1994-12-26,AW,1995,Ana Nobo [New Year's Day]
...,...,...,...,...
35854,2044-12-26,SK,2044,Druhy sviatok vianocny
35855,2044-12-26,TH,2044,New Year's Eve
35856,2044-12-26,UK,2044,Boxing Day__________Christmas Day (Observed)
35857,2044-12-26,US,2044,Christmas Day (Observed)


In [8]:
#休日を一つにし、Robynと同じDEという国名のみにする
holidays_weekly = holidays.groupby(["begin_week", "country", "year"], as_index = False).agg({'holiday':'#'.join, 'country': 'first', 'year': 'first'}).rename(columns = {'begin_week': 'ds'})
holidays_weekly_de = holidays_weekly.query("(country == 'DE')").copy()
holidays_weekly_de

Unnamed: 0,ds,holiday,country,year
12,1994-12-26,Neujahr,DE,1995
183,1995-04-10,Karfreitag,DE,1995
222,1995-04-17,Ostermontag,DE,1995
270,1995-05-01,Erster Mai,DE,1995
346,1995-05-22,Christi Himmelfahrt,DE,1995
...,...,...,...,...
35445,2044-05-23,Christi Himmelfahrt,DE,2044
35481,2044-06-06,Pfingstmontag,DE,2044
35646,2044-10-03,Tag der Deutschen Einheit,DE,2044
35781,2044-12-19,Erster Weihnachtstag,DE,2044


In [9]:
prophet_data = data.rename(columns = {'revenue': 'y', 'date': 'ds'})
#ダミー変数化して、データに挿入
prophet_data = pd.concat([prophet_data, pd.get_dummies(prophet_data["events"], drop_first = True, prefix = "events")], axis = 1)
prophet_data.head()

Unnamed: 0,ds,y,tv_S,ooh_S,print_S,facebook_I,search_clicks_P,search_S,competitor_sales_B,facebook_S,events,newsletter,events_event2,events_na
0,2015-11-23,2754372.0,167687.6,0,95463.666667,72903850.0,0.0,0,8125009,228213.987444,na,19401.653846,False,True
1,2015-11-30,2584277.0,214600.9,0,0.0,16581100.0,29511.715457,31000,7901549,34258.573511,na,14791.0,False,True
2,2015-12-07,2547387.0,0.0,248022,3404.0,49954770.0,36132.358958,28400,8300197,127691.261335,na,14544.0,False,True
3,2015-12-14,2875220.0,625877.3,0,132600.0,31649300.0,36804.210958,31900,8122883,84014.720306,na,2800.0,False,True
4,2015-12-21,2215953.0,0.0,520005,0.0,8802269.0,28401.744069,27100,7105985,20687.478156,na,15478.0,False,True


In [10]:
prophet_data.plot(kind="line", x="ds", y="y")

<Axes: xlabel='ds'>

Error in callback <function _draw_all_if_interactive at 0x13c1e4040> (for post_execute), with arguments args (),kwargs {}:


ValueError: object __array__ method not producing an array

ValueError: object __array__ method not producing an array

<Figure size 640x480 with 1 Axes>

# １つの外部変数を入れて計算

In [11]:
prophet = Prophet(yearly_seasonality=True, weekly_seasonality=True, holidays=holidays_weekly_de)
prophet.add_regressor(name = "events_event2")


<prophet.forecaster.Prophet at 0x169b6eb50>

In [12]:
prophet.fit(prophet_data[["ds", "y", "events_event2"]])
prophet_predict = prophet.predict(prophet_data[["ds", "y", "events_event2"]])

21:56:17 - cmdstanpy - INFO - Chain [1] start processing
21:56:18 - cmdstanpy - INFO - Chain [1] done processing


In [13]:
plot = prophet.plot_components(prophet_predict, figsize = (20, 10))

  fcst_t = fcst['ds'].dt.to_pydatetime()
  artists += ax.plot(df_y['ds'].dt.to_pydatetime(), seas[name], ls='-',
  df_y['ds'].dt.to_pydatetime(), seas[name + '_lower'],
  df_y['ds'].dt.to_pydatetime(), seas[name], ls='-', c='#0072B2')
  df_y['ds'].dt.to_pydatetime(), seas[name + '_lower'],
  fcst_t = fcst['ds'].dt.to_pydatetime()


Error in callback <function _draw_all_if_interactive at 0x13c1e4040> (for post_execute), with arguments args (),kwargs {}:


ValueError: object __array__ method not producing an array

ValueError: object __array__ method not producing an array

<Figure size 2000x1000 with 5 Axes>

# ２つの外部変数を入れて計算

In [14]:
prophet = Prophet(yearly_seasonality=True, weekly_seasonality=True, holidays=holidays_weekly_de)
prophet.add_regressor(name = "events_event2")
prophet.add_regressor(name = "events_na")

<prophet.forecaster.Prophet at 0x16f220c40>

In [15]:
prophet.fit(prophet_data[["ds", "y", "events_event2", "events_na"]])
prophet_predict = prophet.predict(prophet_data[["ds", "y", "events_event2", "events_na"]])

21:56:19 - cmdstanpy - INFO - Chain [1] start processing
21:56:19 - cmdstanpy - INFO - Chain [1] done processing


In [16]:
plot = prophet.plot_components(prophet_predict, figsize = (20, 10))

  fcst_t = fcst['ds'].dt.to_pydatetime()
  artists += ax.plot(df_y['ds'].dt.to_pydatetime(), seas[name], ls='-',
  df_y['ds'].dt.to_pydatetime(), seas[name + '_lower'],
  df_y['ds'].dt.to_pydatetime(), seas[name], ls='-', c='#0072B2')
  df_y['ds'].dt.to_pydatetime(), seas[name + '_lower'],
  fcst_t = fcst['ds'].dt.to_pydatetime()


Error in callback <function _draw_all_if_interactive at 0x13c1e4040> (for post_execute), with arguments args (),kwargs {}:


ValueError: object __array__ method not producing an array

ValueError: object __array__ method not producing an array

<Figure size 2000x1000 with 5 Axes>

In [17]:
prophet_predict

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,Christi Himmelfahrt,Christi Himmelfahrt_lower,Christi Himmelfahrt_upper,Christi Himmelfahrt#Erster Mai,...,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2015-11-23,2.809293e+06,2.572395e+06,3.130213e+06,2.809293e+06,2.809293e+06,0.0,0.0,0.0,0.0,...,178004.759904,178004.759904,178004.759904,1.125405e+06,1.125405e+06,1.125405e+06,0.0,0.0,0.0,2.848821e+06
1,2015-11-30,2.810359e+06,2.355340e+06,2.896327e+06,2.810359e+06,2.810359e+06,0.0,0.0,0.0,0.0,...,178004.759904,178004.759904,178004.759904,8.970257e+05,8.970257e+05,8.970257e+05,0.0,0.0,0.0,2.621507e+06
2,2015-12-07,2.811425e+06,2.151335e+06,2.685756e+06,2.811425e+06,2.811425e+06,0.0,0.0,0.0,0.0,...,178004.759904,178004.759904,178004.759904,7.128434e+05,7.128434e+05,7.128434e+05,0.0,0.0,0.0,2.438391e+06
3,2015-12-14,2.812491e+06,2.145148e+06,2.663472e+06,2.812491e+06,2.812491e+06,0.0,0.0,0.0,0.0,...,178004.759904,178004.759904,178004.759904,6.955211e+05,6.955211e+05,6.955211e+05,0.0,0.0,0.0,2.422135e+06
4,2015-12-21,2.813557e+06,2.738939e+06,3.292074e+06,2.813557e+06,2.813557e+06,0.0,0.0,0.0,0.0,...,178004.759904,178004.759904,178004.759904,7.950228e+05,7.950228e+05,7.950228e+05,0.0,0.0,0.0,3.010421e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
203,2019-10-14,2.817754e+06,2.191155e+06,2.751281e+06,2.817754e+06,2.817754e+06,0.0,0.0,0.0,0.0,...,178004.759904,178004.759904,178004.759904,7.411661e+05,7.411661e+05,7.411661e+05,0.0,0.0,0.0,2.473043e+06
204,2019-10-21,2.816925e+06,2.256816e+06,2.804528e+06,2.816925e+06,2.816925e+06,0.0,0.0,0.0,0.0,...,178004.759904,178004.759904,178004.759904,7.950647e+05,7.950647e+05,7.950647e+05,0.0,0.0,0.0,2.526113e+06
205,2019-10-28,2.816095e+06,2.306658e+06,2.871610e+06,2.816095e+06,2.816095e+06,0.0,0.0,0.0,0.0,...,178004.759904,178004.759904,178004.759904,8.684650e+05,8.684650e+05,8.684650e+05,0.0,0.0,0.0,2.598684e+06
206,2019-11-04,2.815266e+06,2.465875e+06,3.022384e+06,2.815266e+06,2.815266e+06,0.0,0.0,0.0,0.0,...,178004.759904,178004.759904,178004.759904,1.015303e+06,1.015303e+06,1.015303e+06,0.0,0.0,0.0,2.744693e+06


In [18]:
prophet.holidays

Unnamed: 0,ds,holiday,country,year
12,1994-12-26,Neujahr,DE,1995
183,1995-04-10,Karfreitag,DE,1995
222,1995-04-17,Ostermontag,DE,1995
270,1995-05-01,Erster Mai,DE,1995
346,1995-05-22,Christi Himmelfahrt,DE,1995
...,...,...,...,...
35445,2044-05-23,Christi Himmelfahrt,DE,2044
35481,2044-06-06,Pfingstmontag,DE,2044
35646,2044-10-03,Tag der Deutschen Einheit,DE,2044
35781,2044-12-19,Erster Weihnachtstag,DE,2044


In [19]:
holname = prophet.holidays["holiday"]

In [20]:
prophet_predict[holname.to_list()]

Unnamed: 0,Neujahr,Karfreitag,Ostermontag,Erster Mai,Christi Himmelfahrt,Pfingstmontag,Tag der Deutschen Einheit,Erster Weihnachtstag#Zweiter Weihnachtstag,Neujahr.1,Karfreitag.1,...,Erster Weihnachtstag#Zweiter Weihnachtstag.1,Neujahr.2,Karfreitag.2,Ostermontag.1,Erster Mai.1,Christi Himmelfahrt.1,Pfingstmontag.1,Tag der Deutschen Einheit.1,Erster Weihnachtstag,Zweiter Weihnachtstag
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,...,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,...,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,...,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,...,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,487717.90848,0.0,0.0,...,487717.90848,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
203,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,...,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,...,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
205,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,...,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
206,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,...,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
prophet_predict[holname.to_list()].max()[:10]

Neujahr                                       202218.598656
Karfreitag                                     27755.146454
Ostermontag                                   986309.801280
Erster Mai                                    355490.488800
Christi Himmelfahrt                                0.000000
Pfingstmontag                                 316331.514432
Tag der Deutschen Einheit                      35340.487315
Erster Weihnachtstag#Zweiter Weihnachtstag    487717.908480
Neujahr                                       202218.598656
Karfreitag                                     27755.146454
dtype: float64

In [22]:
prophet_predict[holname.to_list()].max()[:10].plot.barh()

<Axes: >

Error in callback <function _draw_all_if_interactive at 0x13c1e4040> (for post_execute), with arguments args (),kwargs {}:


ValueError: object __array__ method not producing an array

ValueError: object __array__ method not producing an array

<Figure size 640x480 with 1 Axes>

# MMMの学習データとして織り込む

In [23]:
prophet_columns = [col for col in prophet_predict.columns if (col.endswith("upper") == False) & (col.endswith("lower") == False)]
events_numeric = prophet_predict[prophet_columns].filter(like = "events_").sum(axis = 1)


final_data = data.copy()
final_data["trend"] = prophet_predict["trend"]
final_data["season"] = prophet_predict["yearly"]
final_data["holiday"] = prophet_predict["holidays"]
final_data["events"] = (events_numeric - np.min(events_numeric)).values

In [24]:
final_data.head()

Unnamed: 0,date,revenue,tv_S,ooh_S,print_S,facebook_I,search_clicks_P,search_S,competitor_sales_B,facebook_S,events,newsletter,trend,season,holiday
0,2015-11-23,2754372.0,167687.6,0,95463.666667,72903850.0,0.0,0,8125009,228213.987444,0.0,19401.653846,2809293.0,1125405.0,0.0
1,2015-11-30,2584277.0,214600.9,0,0.0,16581100.0,29511.715457,31000,7901549,34258.573511,0.0,14791.0,2810359.0,897025.7,0.0
2,2015-12-07,2547387.0,0.0,248022,3404.0,49954770.0,36132.358958,28400,8300197,127691.261335,0.0,14544.0,2811425.0,712843.4,0.0
3,2015-12-14,2875220.0,625877.3,0,132600.0,31649300.0,36804.210958,31900,8122883,84014.720306,0.0,2800.0,2812491.0,695521.1,0.0
4,2015-12-21,2215953.0,0.0,520005,0.0,8802269.0,28401.744069,27100,7105985,20687.478156,0.0,15478.0,2813557.0,795022.8,487717.90848
