In [None]:

import numpy as np 
import pandas as pd 

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
import matplotlib.pyplot as plt
from pandas.plotting import autocorrelation_plot
from statsmodels.tsa.stattools import acf,pacf


In [None]:
!pip install pmdarima
!pip install sktime
from sktime.performance_metrics.forecasting import MeanAbsoluteScaledError

In [None]:
mase = MeanAbsoluteScaledError()

# **Reading the data**

In [None]:
months_quarter_df = pd.read_csv('/kaggle/input/tourism2/tourism2_revision2.csv').fillna(0)

In [None]:
series_index = 'm10'
# series = year_df.Y10
# series = series.dropna()
series = months_quarter_df[series_index]
# series = df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/wwwusage.csv', names=['value'], header=0).value

In [None]:
import matplotlib as mpl
COLOR = 'white'
mpl.rcParams['text.color'] = COLOR
mpl.rcParams['axes.labelcolor'] = COLOR
mpl.rcParams['xtick.color'] = COLOR
mpl.rcParams['ytick.color'] = COLOR
mpl.rcParams['legend.labelcolor'] = 'black'

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose

# Plotting the time series

In [None]:
plt.figure(figsize=(20,8))
plt.plot(series)
plt.title(f'Tourism sales in series {series_index}', fontsize=20)
plt.ylabel('Sales', fontsize=16)
for i in range(len(series.index)):
    plt.axvline(i, color='k', linestyle='--', alpha=0.2)
plt.axhline(series.mean(), color='r', alpha=0.2, linestyle='--')


# Series difference and ADF test

In [None]:
periods = 2
diff = series.diff(periods = periods)[periods:]

plt.figure(figsize=(20,8))
plt.plot(diff)
plt.title(f'Difference {periods} of series {series_index}', fontsize=20)
plt.ylabel('Sales', fontsize=16)
for i in range(len(series.index)):
    plt.axvline(i, color='k', linestyle='--', alpha=0.2)
plt.axhline(diff.mean(), color='r', alpha=0.2, linestyle='--')


In [None]:
from statsmodels.tsa.stattools import adfuller
result = adfuller(diff)
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])
print('Critical Values:')
for key, value in result[4].items():
    print('\t%s: %.3f' % (key, value))

# ACF and PACF

In [None]:
acf_vals = acf(diff)
acf_vals
plt.bar(range((len(acf_vals))), acf_vals)


In [None]:
pacf_vals = pacf(diff)
plt.bar(range(len(pacf_vals)), pacf_vals)

# Train test split

In [None]:
train_ratio = 0.98
split = int(len(series) * train_ratio)
train_data = series[:split]
test_data = series[split:]
pred_start_date = test_data.index[0]
pred_end_date = test_data.index[-1]


# Arima training

In [None]:
from statsmodels.tsa.arima.model import ARIMA
model = ARIMA(train_data, order=(2,2,5))
model_fit = model.fit()
print(model_fit.summary())


In [None]:
predictions = model_fit.predict(start=pred_start_date, end=pred_end_date)
residuals = test_data - predictions


In [None]:
plt.figure(figsize=(20,8))
plt.plot(residuals)
plt.title('Residuals from AR Model', fontsize=20)
plt.ylabel('Error', fontsize=16)
plt.axhline(0, color='r', linestyle='--', alpha=0.2)

In [None]:
plt.figure(figsize=(20,8))

plt.plot(test_data)
plt.plot(predictions)

plt.legend(('Data', 'Predictions'), fontsize=16, labelcolor = 'black')

plt.title('Data vs Predictions', fontsize=20)
plt.ylabel('Sales', fontsize=16)

In [None]:
series.mean()
predictions

In [None]:
print('Mean Absolute Percentage Error:', np.mean(np.abs(residuals/test_data)))
print('Mean Absolute Scaled Error:', mase(test_data, predictions, y_train = train_data))

# Exponential Smoothing

In [None]:
# from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt
from statsmodels.tsa.holtwinters import SimpleExpSmoothing, ExponentialSmoothing
ses_model = SimpleExpSmoothing(train_data, initialization_method="estimated").fit()
predictions = ses_model.predict(start=pred_start_date, end=pred_end_date)
# yhat = model_fit.predict(...)

In [None]:
config = {
    'trend' : None,
    'damped' : False,
    'seasonal' : None,
    'seasonal_periods' : 12,
    'use_boxcox' : True,
    'remove_bias' : True
}



In [None]:
t = config['trend']
d = config['damped']
s = config['seasonal']
p = config['seasonal_periods']
b = config['use_boxcox']
r = config['remove_bias']
model = ExponentialSmoothing(series[:-1], trend=t, damped_trend=d, seasonal=s, seasonal_periods=p)
model_fit = model.fit(optimized=True, remove_bias=r)
yhat = model_fit.predict(start = pred_start_date, end = pred_end_date)


In [None]:
residuals = test_data - yhat

In [None]:
print('Mean Absolute Percentage Error:', np.mean(np.abs(residuals/test_data)))
print('Mean Absolute Scaled Error:', mase(test_data, yhat, y_train = train_data))