In [1]:
import plotly
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import statsmodels.api as sm
import pylab as py

plt.rcParams["font.family"] = "Times New Roman"
plt.rcParams["font.size"] = "17"

from greykite.framework.templates.autogen.forecast_config import EvaluationPeriodParam
from greykite.framework.templates.autogen.forecast_config import ForecastConfig
from greykite.framework.templates.autogen.forecast_config import MetadataParam
from greykite.framework.templates.autogen.forecast_config import ModelComponentsParam
from greykite.framework.templates.forecaster import Forecaster
from greykite.framework.templates.model_templates import ModelTemplateEnum
from greykite.framework.utils.result_summary import summarize_grid_search_results
from greykite.framework.input.univariate_time_series import UnivariateTimeSeries
from greykite.algo.common.seasonality_inferrer import SeasonalityInferConfig
from greykite.algo.common.seasonality_inferrer import SeasonalityInferrer
from greykite.algo.common.seasonality_inferrer import TrendAdjustMethodEnum
from greykite.common import constants as cst
from plotly.offline import init_notebook_mode, iplot

import warnings
from collections import defaultdict
warnings.filterwarnings("ignore")

In [2]:
file_path ='AirPassengers.csv'

df = pd.read_csv(file_path)
Month = UnivariateTimeSeries()
Month.load_data(
     df=df,
     time_col="Month",
     value_col="#Passengers",
     freq="MS")

<greykite.framework.input.univariate_time_series.UnivariateTimeSeries at 0x27f69863490>

In [3]:
df.head()

Unnamed: 0,Month,#Passengers
0,1949-01,112
1,1949-02,118
2,1949-03,132
3,1949-04,129
4,1949-05,121


In [4]:
print(Month.describe_time_col())
print('-----------------------------')
print(Month.describe_value_col())
print('-----------------------------')
print(df.head())
print('-----------------------------')
print(df.tail())

{'data_points': 144, 'mean_increment_secs': 2629460.1398601397, 'min_timestamp': Timestamp('1949-01-01 00:00:00'), 'max_timestamp': Timestamp('1960-12-01 00:00:00')}
-----------------------------
count    144.000000
mean     280.298611
std      119.966317
min      104.000000
25%      180.000000
50%      265.500000
75%      360.500000
max      622.000000
Name: y, dtype: float64
-----------------------------
     Month  #Passengers
0  1949-01          112
1  1949-02          118
2  1949-03          132
3  1949-04          129
4  1949-05          121
-----------------------------
       Month  #Passengers
139  1960-08          606
140  1960-09          508
141  1960-10          461
142  1960-11          390
143  1960-12          432


In [5]:
df.info()
print('--------------------------')
df.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 144 entries, 0 to 143
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Month        144 non-null    object
 1   #Passengers  144 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 2.4+ KB
--------------------------


Unnamed: 0,#Passengers
count,144.0
mean,280.298611
std,119.966317
min,104.0
25%,180.0
50%,265.5
75%,360.5
max,622.0


In [6]:
fig = Month.plot()
iplot(fig)

In [7]:
fig = Month.plot_quantiles_and_overlays(
      groupby_time_feature="month_dom",
      show_mean=True,
      show_quantiles=False,
      show_overlays=True,
      overlay_label_time_feature="year",
      overlay_style={"line": {"width": 1}, "opacity": 0.5},
      center_values=False,
      xlabel="month of year",
      ylabel=Month.original_value_col,
      title="Yearly Trend for Each Year",)
iplot(fig)

In [8]:
fig = Month.plot_quantiles_and_overlays(
     groupby_time_feature="month_dom",
     show_mean=True,
     show_quantiles=False,
     show_overlays=True,
     overlay_label_time_feature="year",
     overlay_style={"line": {"width": 1}, "opacity": 0.5},
     center_values=True,
     xlabel="day of year",
     ylabel=Month.original_value_col,
     title="Yearly Trend for Each Year (Centered)",
 )
iplot(fig)

In [9]:
fig = Month.plot_quantiles_and_overlays(
     groupby_time_feature="month_dom",  # week of year and day of week
     show_mean=False,
     show_quantiles=True,
     show_overlays=True,
     overlay_label_time_feature="year",
     overlay_style={"line": {"width": 1}, "opacity": 0.9},
     center_values=False,
     xlabel="Month of the Year",
     ylabel=Month.original_value_col,
     title="Yearly and Monthly Trend for Each Year",
 )
iplot(fig)

In [10]:
fig = Month.plot_quantiles_and_overlays(
     groupby_time_feature="month_dom",  # week of year and day of week
     show_mean=True,
     show_quantiles=True,
     show_overlays=True,
     overlay_label_time_feature="year",
     overlay_style={"line": {"width": 1}, "opacity": 0.9},
     center_values=True,
     xlabel="Month of the Year",
     ylabel=Month.original_value_col,
     title="Yearly and Monthly Trend for Each Year (Centered)",
 )
iplot(fig)

In [11]:
fig = Month.plot_quantiles_and_overlays(
     groupby_time_feature="month_dom",
     show_mean=True,
     show_quantiles=[0.1, 0.25, 0.75, 0.9],  # specifies quantiles to include
     xlabel="Month of Year",
     ylabel=Month.original_value_col,
     title="Monthly Trend",
)
iplot(fig)

In [12]:
from greykite.algo.changepoint.adalasso.changepoint_detector import ChangepointDetector

model1 = ChangepointDetector()
res = model1.find_trend_changepoints(
     df=df,            # data df
     time_col="Month",    # time column name
     value_col="#Passengers")    # value column name
pd.DataFrame({"trend_changepoints": res["trend_changepoints"]})  # prints a dataframe showing the result

Unnamed: 0,trend_changepoints
0,1949-06-01
1,1949-08-01
2,1950-07-01
3,1953-07-01
4,1953-08-01
5,1954-08-01
6,1958-12-01


In [13]:
fig = model1.plot(plot=False)  # plot = False returns a plotly figure object.
iplot(fig)

In [14]:
fig = model1.plot(
     observation=True,                       # whether to plot the observations
     observation_original=True,              # whether to plot the unaggregated values
     trend_estimate=True,                    # whether to plot the trend estimation
     trend_change=True,                      # whether to plot detected trend changepoints
     yearly_seasonality_estimate=False,       # whether to plot estimated yearly seasonality
     adaptive_lasso_estimate=True,           # whether to plot the adaptive lasso estimated trend
     seasonality_change=True,               # detected seasonality change points, discussed in next section
     seasonality_change_by_component=True,   # plot seasonality by component (daily, weekly, etc.), discussed in next section
     seasonality_estimate=False,             # plot estimated trend+seasonality, discussed in next section
     plot=False)                             # set to True to display the plot (need to import plotly interactive tool) or False to return the figure object
iplot(fig)

In [15]:
res = model1.find_seasonality_changepoints(
     df=df,            # data df
     time_col="Month",    # time column name
     value_col="#Passengers")    # value column name
pd.DataFrame(dict([(k, pd.Series(v)) for k, v in res["seasonality_changepoints"].items()]))  # view result
 # one could also print res["seasonality_changepoints"] directly to view the result

Unnamed: 0,weekly,yearly
0,,1949-06-01
1,,1950-05-01


In [16]:
fig = model1.plot(
     seasonality_change=True,                # detected seasonality change points, discussed in next section
     seasonality_change_by_component=True,   # plot seasonality by component (daily, weekly, etc.), discussed in next section
     seasonality_estimate=True,              # plot estimated trend+seasonality, discussed in next section
     plot=False)                             # set to True to display the plot (need to import plotly interactive tool) or False to return the figure object
iplot(fig)