In [118]:
%matplotlib inline

from pathlib import Path

import math
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import statsmodels.formula.api as sm
from statsmodels.tsa import tsatools, stattools
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.graphics import tsaplots

In [119]:
pd.set_option('display.max_columns', None)

In [120]:
wine = pd.read_csv('AustralianWines.csv')

In [121]:
wine.head()

Unnamed: 0,Month,Drywhite,Fortified,Red,Rose,Sparkling,Sweetwhite
0,1980-01,1954,2585,464,112.0,1686,85
1,1980-02,2302,3368,675,118.0,1591,89
2,1980-03,3054,3210,703,129.0,2304,109
3,1980-04,2414,3111,887,99.0,1712,95
4,1980-05,2226,3756,1139,116.0,1471,91


In [122]:
len(wine)

187

a) For sweet wine, quadratic trend + multiplicative seasonality will be suitable.
b) For Rose wine, linear trend + multiplicative seasonality will be suitable.
c) For Sparkling wine, linear trend + multiplicative sesonality will be suitable.
d) For Red wine, linear trend + multiplicative seasonality will  be suitable.
e) For Dry white wine, linear trend + multiplicative seasonality will be suitable.
f) For Fortified wine, linear trend + multiplicative seasonality will be suitable.

In [123]:
tified = wine[['Month', 'Fortified']]

In [124]:
tified.head()

Unnamed: 0,Month,Fortified
0,1980-01,2585
1,1980-02,3368
2,1980-03,3210
3,1980-04,3111
4,1980-05,3756


In [125]:
#convert to time series
tified['Date'] = pd.to_datetime(tified.Month, format = '%Y-%m')
Sales_ts = pd.Series(tified.Fortified.values, index = tified.Date, name = 'Sales')
Sales_ts.index = pd.DatetimeIndex(Sales_ts.index, freq = Sales_ts.index.inferred_freq)

#fit a linear trend
sales_df = tsatools.add_trend(Sales_ts, trend = 'ct')
sales_df['Month'] = sales_df.index.month

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [126]:
sales_df

Unnamed: 0_level_0,Sales,const,trend,Month
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1980-01-01,2585,1.0,1.0,1
1980-02-01,3368,1.0,2.0,2
1980-03-01,3210,1.0,3.0,3
1980-04-01,3111,1.0,4.0,4
1980-05-01,3756,1.0,5.0,5
1980-06-01,4216,1.0,6.0,6
1980-07-01,5225,1.0,7.0,7
1980-08-01,4426,1.0,8.0,8
1980-09-01,3932,1.0,9.0,9
1980-10-01,3816,1.0,10.0,10


In [127]:
nValid = 19
nTrain = len(Sales_ts) - nValid

#partition the data
train_df = sales_df[:nTrain]
valid_df = sales_df[nTrain:]

In [128]:
train_df

Unnamed: 0_level_0,Sales,const,trend,Month
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1980-01-01,2585,1.0,1.0,1
1980-02-01,3368,1.0,2.0,2
1980-03-01,3210,1.0,3.0,3
1980-04-01,3111,1.0,4.0,4
1980-05-01,3756,1.0,5.0,5
1980-06-01,4216,1.0,6.0,6
1980-07-01,5225,1.0,7.0,7
1980-08-01,4426,1.0,8.0,8
1980-09-01,3932,1.0,9.0,9
1980-10-01,3816,1.0,10.0,10


In [129]:
#fit additive seasonality to the time series

sales_lm = sm.ols(formula = 'Sales ~ trend + C(Month)', data = train_df)