In [1]:
import os
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
%matplotlib inline
from pylab import rcParams
from plotly import tools
import chart_studio
import chart_studio.plotly as py
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected = True)
import plotly.graph_objs as go
import plotly.figure_factory as ff
import statsmodels.api as sm
from numpy.random import normal,seed
from scipy.stats import norm
from statsmodels.tsa.arima_model import ARMA
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf
from statsmodels.tsa.arima_process import ArmaProcess
from statsmodels.tsa.arima_model import ARIMA
import math
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.linear_model import LinearRegression

In [None]:
macau = pd.read_csv('Datasets/Macau-2022.csv', index_col='date', parse_dates = ['date'])
macau = macau.fillna(method="ffill")
macau.head()

In [None]:
macau.tail()

In [None]:
macau["arrival"].plot()
plt.title('Macau tourist arrival')
plt.show()

In [None]:
plot_acf(macau["arrival"], lags=25, title="Auto Correlation of Macau data")
plt.xlabel("Time Lags")
plt.ylabel("Auto Correlation value")
plt.show()

In [None]:
plot_pacf(macau["arrival"], lags=25, title="Partial Auto Correlation of Macau data")
plt.xlabel("Time Lags")
plt.ylabel("Partial Auto Correlation value")
plt.show()

In [None]:
#Prediction using Auto Regressive model with lag 1
arrivals = ARMA(macau["arrival"].diff().iloc[1:].values, order=(1,0))
result = arrivals.fit()
result.plot_predict(start=100, end=132)
plt.show()

In [None]:
#Prediction using Auto Regressive model with lag 2
arrivals = ARMA(macau["arrival"].diff().iloc[1:].values, order=(2,0))
result = arrivals.fit()
result.plot_predict(start=100, end=132)
plt.show()

In [None]:
#Prediction using Auto Regressive model with lag 3
arrivals = ARMA(macau["arrival"].diff().iloc[1:].values, order=(3,0))
result = arrivals.fit()
result.plot_predict(start=100, end=132)
plt.show()

In [None]:
print('MAPE: ', mean_absolute_percentage_error(macau["arrival"].diff().iloc[100:132].values, result.predict(start=100,end=131)))

In [None]:
print('RMSE: ', math.sqrt(mean_squared_error(macau["arrival"].diff().iloc[100:132].values, result.predict(start=100,end=131))))

In [None]:
model = ARMA(macau["arrival"].diff().iloc[1:].values,order=(0,3))
result = model.fit()
print(result.summary())
print("μ={} ,θ={}".format(result.params[0],result.params[1]))
result.plot_predict(start=60, end=90)
plt.show()

In [None]:
print('MAPE: ', mean_absolute_percentage_error(macau["arrival"].diff().iloc[100:132].values, result.predict(start=100,end=131)))

In [None]:
print('RMSE: ', math.sqrt(mean_squared_error(macau["arrival"].diff().iloc[100:132].values, result.predict(start=100,end=131))))

In [None]:

model = ARMA(macau["arrival"].diff().iloc[1:].values, order=(3,3))
result = model.fit()
print(result.summary())
print("μ={}, ϕ={}, θ={}".format(result.params[0],result.params[1],result.params[2]))
result.plot_predict(start=100, end=132)
plt.show()

In [None]:
print('MAPE: ', mean_absolute_percentage_error(macau["arrival"].diff().iloc[100:132].values, result.predict(start=100,end=131)))

In [None]:
print('RMSE: ', math.sqrt(mean_squared_error(macau["arrival"].diff().iloc[100:132].values, result.predict(start=100,end=131))))

In [None]:
rcParams['figure.figsize'] = 16, 6
model = ARIMA(macau["arrival"].diff().iloc[1:].values, order=(3,1,3))
result = model.fit()
print(result.summary())
result.plot_predict(start=100, end=132)
plt.show()

In [None]:
print('MAPE: ', mean_absolute_percentage_error(macau["arrival"].diff().iloc[100:132].values, result.predict(start=100,end=131)))

In [None]:
print('RMSE: ', math.sqrt(mean_squared_error(macau["arrival"].diff().iloc[100:132].values, result.predict(start=100,end=131))))

In [None]:
# SARIMAX model
train_sample = macau["arrival"].diff().iloc[1:].values
model = sm.tsa.SARIMAX(train_sample,order=(3,0,3),trend='c')
result = model.fit(maxiter=1000,disp=False)
print(result.summary())
predicted_result = result.predict(start=0, end=99)
result.plot_diagnostics()
# calculating error
mape = mean_absolute_percentage_error(train_sample[1:101], predicted_result)
print("The Mean Absolute Percentage Error is {}.".format(mape))

In [None]:
from statsmodels.tsa.stattools import adfuller

def ad_test(dataset):
    mctest = adfuller(dataset, autolag = 'AIC')
    print("1. ADF: ",mctest[0])
    print("2. P-Value: ",mctest[1])
    print("3. Num of lags: ", mctest[2])
    print("4. Num of observations used for ADF regression and Critical value calculation: ",mctest[3])
    print("5. Critical values: ")
    for key, val in mctest[4].items():
        print("\t",key,": ",val)

In [None]:
ad_test(macau['arrival'])

In [None]:
from pmdarima import auto_arima
import warnings 
warnings.filterwarnings("ignore")

In [None]:
stepwise_fit = auto_arima(macau['arrival'], trace=True, suppress_warnings=True)
stepwise_fit.summary()

In [None]:
print(macau.shape)
train=macau.iloc[:100]
test = macau.iloc[100:]
print(train.shape, test.shape)

In [None]:
model = ARIMA(train['arrival'],order=(0,1,1))
model = model.fit()
model.summary()

In [None]:
start = len(train)
end = len(train) + len(test) - 1
pred = model.predict(start=start, end=end, typ='levels')
print(pred)

In [None]:
pred.plot(legend=True)
test['arrival'].plot(legend=True)