In [73]:
# 仮想環境は mmm_down_grade を使用
from prophet import Prophet

In [105]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import japanize_matplotlib 
%matplotlib inline

In [75]:
data = pd.read_csv("data_raw_Robyn.csv", parse_dates=["DATE"])    # 裏で"DATE"カラムが日付データであることを宣言している

In [76]:
data.head()

Unnamed: 0,DATE,revenue,tv_S,ooh_S,print_S,facebook_I,search_clicks_P,search_S,competitor_sales_B,facebook_S,events,newsletter
0,2015-11-23,2754372.0,167687.6,0,95463.666667,72903850.0,0.0,0,8125009,228213.987444,na,19401.653846
1,2015-11-30,2584277.0,214600.9,0,0.0,16581100.0,29511.715457,31000,7901549,34258.573511,na,14791.0
2,2015-12-07,2547387.0,0.0,248022,3404.0,49954770.0,36132.358958,28400,8300197,127691.261335,na,14544.0
3,2015-12-14,2875220.0,625877.3,0,132600.0,31649300.0,36804.210958,31900,8122883,84014.720306,na,2800.0
4,2015-12-21,2215953.0,0.0,520005,0.0,8802269.0,28401.744069,27100,7105985,20687.478156,na,15478.0


In [77]:
# リスト内包表記
[col.lower() if col in ["DATE"] else col for col in data.columns]    # data.columns を順に取得して返す際に、それが "DATE" カラムなら小文字にする。

['date',
 'revenue',
 'tv_S',
 'ooh_S',
 'print_S',
 'facebook_I',
 'search_clicks_P',
 'search_S',
 'competitor_sales_B',
 'facebook_S',
 'events',
 'newsletter']

In [78]:
data.columns = [col.lower() if col in ["DATE"] else col for col in data.columns]
data

Unnamed: 0,date,revenue,tv_S,ooh_S,print_S,facebook_I,search_clicks_P,search_S,competitor_sales_B,facebook_S,events,newsletter
0,2015-11-23,2.754372e+06,167687.6,0,95463.666667,7.290385e+07,0.000000,0,8125009,228213.987444,na,19401.653846
1,2015-11-30,2.584277e+06,214600.9,0,0.000000,1.658110e+07,29511.715457,31000,7901549,34258.573511,na,14791.000000
2,2015-12-07,2.547387e+06,0.0,248022,3404.000000,4.995477e+07,36132.358958,28400,8300197,127691.261335,na,14544.000000
3,2015-12-14,2.875220e+06,625877.3,0,132600.000000,3.164930e+07,36804.210958,31900,8122883,84014.720306,na,2800.000000
4,2015-12-21,2.215953e+06,0.0,520005,0.000000,8.802269e+06,28401.744069,27100,7105985,20687.478156,na,15478.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
203,2019-10-14,2.456240e+06,0.0,60433,153723.666667,0.000000e+00,152840.323412,112100,7612762,0.000000,na,28157.000000
204,2019-10-21,2.182825e+06,154917.6,0,0.000000,5.688196e+07,103680.047821,103700,6701667,133624.575524,na,10644.000000
205,2019-10-28,2.377707e+06,21982.5,14094,17476.000000,0.000000e+00,138387.704138,114700,7688920,0.000000,na,9597.000000
206,2019-11-04,2.732825e+06,22453.0,0,24051.333333,0.000000e+00,151707.990462,134100,8815710,0.000000,na,90189.000000


In [79]:
holidays = pd.read_csv("holidays_daily.csv", parse_dates=["ds"])
holidays.head()

Unnamed: 0,ds,holiday,country,year
0,1995-01-01,New Year's Day,AE,1995
1,1995-03-01,Eid al-Fitr,AE,1995
2,1995-03-02,Eid al-Fitr,AE,1995
3,1995-03-03,Eid al-Fitr,AE,1995
4,1995-03-04,Eid al-Fitr,AE,1995


In [80]:
holidays["begin_week"] = holidays["ds"].dt.to_period('W-SUN').dt.start_time    # 初めの月曜日を取得
holidays_weekly = holidays.groupby(["begin_week", "country", "year"], as_index=False).agg({'holiday':'#'.join, 'country': 'first', 'year': 'first'}).rename(columns = {'begin_week': 'ds'})
holidays_weekly_de = holidays_weekly.query("(country == 'DE')").copy()
holidays_weekly_de

Unnamed: 0,ds,holiday,country,year
12,1994-12-26,Neujahr,DE,1995
183,1995-04-10,Karfreitag,DE,1995
222,1995-04-17,Ostermontag,DE,1995
270,1995-05-01,Erster Mai,DE,1995
346,1995-05-22,Christi Himmelfahrt,DE,1995
...,...,...,...,...
35445,2044-05-23,Christi Himmelfahrt,DE,2044
35481,2044-06-06,Pfingstmontag,DE,2044
35646,2044-10-03,Tag der Deutschen Einheit,DE,2044
35781,2044-12-19,Erster Weihnachtstag,DE,2044


In [81]:
data["events"]

0      na
1      na
2      na
3      na
4      na
       ..
203    na
204    na
205    na
206    na
207    na
Name: events, Length: 208, dtype: object

In [82]:
# カラムが１行情報を持たなくなる
pd.get_dummies(data["events"], drop_first=True, prefix="events")

Unnamed: 0,events_event2,events_na
0,False,True
1,False,True
2,False,True
3,False,True
4,False,True
...,...,...
203,False,True
204,False,True
205,False,True
206,False,True


In [83]:
prophet_data = data.rename(columns={"revenue":"y", "date":"ds"})
prophet_data

Unnamed: 0,ds,y,tv_S,ooh_S,print_S,facebook_I,search_clicks_P,search_S,competitor_sales_B,facebook_S,events,newsletter
0,2015-11-23,2.754372e+06,167687.6,0,95463.666667,7.290385e+07,0.000000,0,8125009,228213.987444,na,19401.653846
1,2015-11-30,2.584277e+06,214600.9,0,0.000000,1.658110e+07,29511.715457,31000,7901549,34258.573511,na,14791.000000
2,2015-12-07,2.547387e+06,0.0,248022,3404.000000,4.995477e+07,36132.358958,28400,8300197,127691.261335,na,14544.000000
3,2015-12-14,2.875220e+06,625877.3,0,132600.000000,3.164930e+07,36804.210958,31900,8122883,84014.720306,na,2800.000000
4,2015-12-21,2.215953e+06,0.0,520005,0.000000,8.802269e+06,28401.744069,27100,7105985,20687.478156,na,15478.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
203,2019-10-14,2.456240e+06,0.0,60433,153723.666667,0.000000e+00,152840.323412,112100,7612762,0.000000,na,28157.000000
204,2019-10-21,2.182825e+06,154917.6,0,0.000000,5.688196e+07,103680.047821,103700,6701667,133624.575524,na,10644.000000
205,2019-10-28,2.377707e+06,21982.5,14094,17476.000000,0.000000e+00,138387.704138,114700,7688920,0.000000,na,9597.000000
206,2019-11-04,2.732825e+06,22453.0,0,24051.333333,0.000000e+00,151707.990462,134100,8815710,0.000000,na,90189.000000


In [84]:
pd.concat([prophet_data, pd.get_dummies(data["events"], drop_first=True, prefix="events")], axis=1)    # axis=1で横に合体

Unnamed: 0,ds,y,tv_S,ooh_S,print_S,facebook_I,search_clicks_P,search_S,competitor_sales_B,facebook_S,events,newsletter,events_event2,events_na
0,2015-11-23,2.754372e+06,167687.6,0,95463.666667,7.290385e+07,0.000000,0,8125009,228213.987444,na,19401.653846,False,True
1,2015-11-30,2.584277e+06,214600.9,0,0.000000,1.658110e+07,29511.715457,31000,7901549,34258.573511,na,14791.000000,False,True
2,2015-12-07,2.547387e+06,0.0,248022,3404.000000,4.995477e+07,36132.358958,28400,8300197,127691.261335,na,14544.000000,False,True
3,2015-12-14,2.875220e+06,625877.3,0,132600.000000,3.164930e+07,36804.210958,31900,8122883,84014.720306,na,2800.000000,False,True
4,2015-12-21,2.215953e+06,0.0,520005,0.000000,8.802269e+06,28401.744069,27100,7105985,20687.478156,na,15478.000000,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
203,2019-10-14,2.456240e+06,0.0,60433,153723.666667,0.000000e+00,152840.323412,112100,7612762,0.000000,na,28157.000000,False,True
204,2019-10-21,2.182825e+06,154917.6,0,0.000000,5.688196e+07,103680.047821,103700,6701667,133624.575524,na,10644.000000,False,True
205,2019-10-28,2.377707e+06,21982.5,14094,17476.000000,0.000000e+00,138387.704138,114700,7688920,0.000000,na,9597.000000,False,True
206,2019-11-04,2.732825e+06,22453.0,0,24051.333333,0.000000e+00,151707.990462,134100,8815710,0.000000,na,90189.000000,False,True


In [85]:
prophet_data = pd.concat([prophet_data, pd.get_dummies(prophet_data["events"], drop_first = True, prefix = "events")], axis = 1)    # axis=1で横に合体

In [91]:
prophet_data.values

array([[Timestamp('2015-11-23 00:00:00'), 2754371.66666667, 167687.6,
        ..., 19401.6538461538, False, True],
       [Timestamp('2015-11-30 00:00:00'), 2584276.66666667, 214600.9,
        ..., 14791.0, False, True],
       [Timestamp('2015-12-07 00:00:00'), 2547386.66666667, 0.0, ...,
        14544.0, False, True],
       ...,
       [Timestamp('2019-10-28 00:00:00'), 2377706.66666667, 21982.5, ...,
        9597.0, False, True],
       [Timestamp('2019-11-04 00:00:00'), 2732825.0, 22453.0, ...,
        90189.0, False, True],
       [Timestamp('2019-11-11 00:00:00'), 2767788.33333333, 0.0, ...,
        19401.6538461538, False, True]], dtype=object)

In [106]:
prophet_data.plot(kind="line", x="ds", y="y")

<Axes: xlabel='ds'>

Error in callback <function _draw_all_if_interactive at 0x146a9d040> (for post_execute), with arguments args (),kwargs {}:


ValueError: object __array__ method not producing an array

ValueError: object __array__ method not producing an array

<Figure size 640x480 with 1 Axes>

In [108]:
y = range(0,3)
x = range(3,4)

plt.plot(x , y )

ValueError: x and y must have same first dimension, but have shapes (1,) and (3,)

Error in callback <function _draw_all_if_interactive at 0x146a9d040> (for post_execute), with arguments args (),kwargs {}:


ValueError: object __array__ method not producing an array

ValueError: object __array__ method not producing an array

<Figure size 640x480 with 1 Axes>

In [103]:
prophet_data

Unnamed: 0,ds,y,tv_S,ooh_S,print_S,facebook_I,search_clicks_P,search_S,competitor_sales_B,facebook_S,events,newsletter,events_event2,events_na
0,2015-11-23,2.754372e+06,167687.6,0,95463.666667,7.290385e+07,0.000000,0,8125009,228213.987444,na,19401.653846,False,True
1,2015-11-30,2.584277e+06,214600.9,0,0.000000,1.658110e+07,29511.715457,31000,7901549,34258.573511,na,14791.000000,False,True
2,2015-12-07,2.547387e+06,0.0,248022,3404.000000,4.995477e+07,36132.358958,28400,8300197,127691.261335,na,14544.000000,False,True
3,2015-12-14,2.875220e+06,625877.3,0,132600.000000,3.164930e+07,36804.210958,31900,8122883,84014.720306,na,2800.000000,False,True
4,2015-12-21,2.215953e+06,0.0,520005,0.000000,8.802269e+06,28401.744069,27100,7105985,20687.478156,na,15478.000000,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
203,2019-10-14,2.456240e+06,0.0,60433,153723.666667,0.000000e+00,152840.323412,112100,7612762,0.000000,na,28157.000000,False,True
204,2019-10-21,2.182825e+06,154917.6,0,0.000000,5.688196e+07,103680.047821,103700,6701667,133624.575524,na,10644.000000,False,True
205,2019-10-28,2.377707e+06,21982.5,14094,17476.000000,0.000000e+00,138387.704138,114700,7688920,0.000000,na,9597.000000,False,True
206,2019-11-04,2.732825e+06,22453.0,0,24051.333333,0.000000e+00,151707.990462,134100,8815710,0.000000,na,90189.000000,False,True
