In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from fbprophet import Prophet 
from fbprophet.plot import plot_plotly
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder

In [None]:
data = pd.read_csv("../input/avocado-prices/avocado.csv")

In [None]:
data.head()

In [None]:
data.info()

In [None]:
le = LabelEncoder()
data.iloc[:,11] = le.fit_transform(data.iloc[:,11])
data.head()

In [None]:
X= data[['Date','Total Volume','4046','4225','Total Bags','Small Bags','XLarge Bags','type']]
y= data['AveragePrice']

input to Prophet is a data frame with minimum two columns : ds and y

In [None]:
train = pd.DataFrame()
train['ds'] = pd.to_datetime(X["Date"])

train['y'] = data['AveragePrice']

In [None]:
prophet_basic = Prophet()
prophet_basic.fit(train)

Predicting the values for the future
For predicting the values using Prophet, we need to create a dataframe with ds(datetime stamp) containing the dates for which we want to make the predictions.
We use make_future_dataframe() to which we specify the number of days to extend into the future. By default it includes dates from the history

In [None]:
future = prophet_basic.make_future_dataframe(periods=300)
future.head()

In [None]:
forcast = prophet_basic.predict(future)

**plotting the predicted data**

In [None]:
fig1 = prophet_basic.plot(forcast)

**We can plot the trend and seasonality, components of the forecast**

In [None]:
fig2 =  prophet_basic.plot_components(forcast)

By default, Prophet adds 25 changepoints to the initial 80% of the data-set.
Let’s plot the vertical lines where the potential changepoints occurred

In [None]:
from fbprophet.plot import add_changepoints_to_plot
fig = prophet_basic.plot(forcast)
a = add_changepoints_to_plot(fig.gca(),prophet_basic,forcast)

**We can view the dates where the chagepoints occurred**

In [None]:
prophet_basic.changepoints

**We can change the inferred changepoint range by setting the changepoint_range**

In [None]:
prophet_pro = Prophet(changepoint_range=0.9)
prophet_pro.fit(train)
new_forcast = prophet_pro.predict(future)
fig4 = prophet_pro.plot(new_forcast)
b = add_changepoints_to_plot(fig4.gca(),prophet_pro,new_forcast)

**Adjusting Trend**

* Prophet allow you to adjust the trend in case there is an overfit or underfit. changepoint_prior_scale helps adjust the strength of the trend.
* Default value for changepoint_prior_scale is 0.05. Decrease the value to make the trend less flexible. Increase the value of changepoint_prior_scale to make the trend more flexible.
* Increasing the changepoint_prior_scale to 0.08 to make the trend flexible

In [None]:
pro_change = Prophet(changepoint_prior_scale = 0.08,n_changepoints = 20,yearly_seasonality=True)
pro_change.fit(train)
more_flexible_forcast = pro_change.predict(future)
fig5 = pro_change.plot(more_flexible_forcast)
b = add_changepoints_to_plot(fig5.gca(),pro_change,more_flexible_forcast)

In [None]:
pro_change = Prophet(changepoint_prior_scale = 0.01,n_changepoints = 20,yearly_seasonality=True)
pro_change.fit(train)
less_flexible_forcast = pro_change.predict(future)
fig5 = pro_change.plot(less_flexible_forcast)
b = add_changepoints_to_plot(fig5.gca(),pro_change,less_flexible_forcast)

**Adding Holidays**
* Holidays and events can cause changes to a time series. In our example the National Avocado day on July 31 and Guacamole day on September 16 can impact prices of the Avocado.
* We can create a custom holiday list for Prophet by creating a dataframe with two columns ds and holiday. A row for each occurrence of the holiday

In [None]:
avocado_season = pd.DataFrame({
  'holiday': 'avocado season',
  'ds': pd.to_datetime(['2014-07-31', '2014-09-16', 
                        '2015-07-31', '2015-09-16',
                        '2016-07-31', '2016-09-16',
                        '2017-07-31', '2017-09-16',
                       '2018-07-31', '2018-09-16',
                        '2019-07-31', '2019-09-16']),
  'lower_window': -1,
  'upper_window': 0,
})
avocado_season.head()

* lower window and upper window extend holiday to days around the date. If we want to include a day prior to the national avocado day and Guacamole day, we set lower_window: -1 upper_window: 0
* If we wanted to use the day after the holiday then set lower_window: 0 upper_window: 1

In [None]:
pro_holiday = Prophet(holidays=avocado_season)
pro_holiday.fit(train)
future_data = pro_holiday.make_future_dataframe(periods=12,freq = 'm')
##############
forcast_data = pro_holiday.predict(future_data)
pro_holiday.plot(forcast_data)

In [None]:
X.head()

**Adding Multiple Regressors**

In [None]:
train['Total Volume'] = X['Total Volume']
train['4046'] = X['4046']
train['4225'] = X['4225']
train['Total Bags'] = X['Total Bags']
train['Small Bags'] = X['Small Bags']
train['type'] = X['type']

In [None]:
train_X= train[:18000]
test_X= train[18000:]

In [None]:
pro_regressor = Prophet()
pro_regressor.add_regressor('Total Volume')
pro_regressor.add_regressor('4046')
pro_regressor.add_regressor('4225')
pro_regressor.add_regressor('Total Bags')
pro_regressor.add_regressor('Small Bags')
pro_regressor.add_regressor('type')

In [None]:
pro_regressor.fit(train_X)


In [None]:
forcast_data = pro_regressor.predict(test_X)
pro_regressor.plot(forcast_data)