In [None]:
#Importing Libraries
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt

#Read Dataset
url='https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv'
df=pd.read_csv(url,parse_dates=['Month'],index_col='Month')

#Visualizing Passengers over the years
plt.figure(figsize=(10,4))
plt.plot(df,label='Passengers')
plt.title('Passengers over the Years')
plt.show()

#Scatterplot with Passengers vs Time
plt.figure(figsize=(5,5))
plt.scatter(df.index,df['Passengers'])
plt.title('Scatter plot of Passengers vs Time')
plt.show()

#today number of passengers compare with 10 days before number of passengers
from pandas.plotting import lag_plot
plt.figure(figsize=(5,5))
lag_plot(df['Passengers'],lag=10)
plt.title('Scatter Plot Passengers with lag 10')
plt.show()

# Auto correlations 50 days
from statsmodels.tsa.stattools import acf

acf_values=acf(df['Passengers'],nlags=50)

plt.figure(figsize=(10,4))
plt.bar(range(len(acf_values)),acf_values)
plt.title('Auto Correlation Function')
plt.xlabel('Lag')
plt.ylabel('Auto Correlation Value')
plt.show()

#Train Test Split
train=df[:-12]
test=df[-12:]

from sklearn.metrics import mean_squared_error
#Naive Forecast
naive_pred=[train['Passengers'].iloc[-1]]*len(test)
print('RMSE:',np.sqrt(mean_squared_error(test,naive_pred)))

plt.figure(figsize=(10,4))
plt.plot(train,label='Train')
plt.plot(test,label='Actual')
plt.plot(test.index,naive_pred,label='Naive Forecast')
plt.legend()
plt.show()

#Rolling mean/ Moving average
ma_pred=[df['Passengers'].rolling(window=12).mean().iloc[-1]]*len(test)
print('RMSE:',np.sqrt(mean_squared_error(test,ma_pred)))

plt.figure(figsize=(10,4))
plt.plot(train,label='Train')
plt.plot(test,label='Actual')
plt.plot(test.index,ma_pred,label='Moving Average Forecast')
plt.legend()
plt.show()

In [None]:
#Auto Regressive Model
from statsmodels.tsa.ar_model import AutoReg
import warnings
warnings.filterwarnings('ignore')
ar_model=AutoReg(train['Passengers'],lags=12).fit()
ar_pred=ar_model.predict(start=test.index[0],end=test.index[-1])
#print(ar_pred)
print('RMSE:',np.sqrt(mean_squared_error(test,ar_pred)))

plt.figure(figsize=(10,4))
plt.plot(train,label='Train')
plt.plot(test,label='Actual')
plt.plot(test.index,ar_pred,label='Auto Regressive Forecast')
plt.legend()
plt.show()

In [None]:
test.index

In [None]:
#Prediction for 10 years
time=pd.date_range(start=test.index[0]+pd.DateOffset(years=1),periods=120,freq='M')
time=test.index.append(time)
time

ar_pred=ar_model.predict(start=test.index[0],end=test.index[-1]+pd.DateOffset(years=10))
#print(ar_pred)
#print('RMSE:',np.sqrt(mean_squared_error(test,ar_pred)))

plt.figure(figsize=(10,4))
plt.plot(train,label='Train')
plt.plot(test,label='Actual')
plt.plot(time,ar_pred,label='Auto Regressive Forecast')
plt.legend()
plt.show()

In [None]:
ar_model.params

In [None]:
#Simple Exponential Smoothing
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
ses_model=SimpleExpSmoothing(train['Passengers']).fit()
ses_forecast=ses_model.forecast(len(test))

print(np.sqrt(mean_squared_error(test,ses_forecast)))

plt.figure(figsize=(10,4))
plt.plot(train,label='Train')
plt.plot(test, label='Actual')
plt.plot(test.index, ses_forecast, label='SES Forecast')
plt.legend()
plt.show()

In [None]:
#ARIMA --> Auto Regressive Integrated Moving Average 
#- autoregressive models: AR(p)
#- moving average models: MA(q)
#- mixed autoregressive moving average models: ARMA(p, q)
#- integration models: ARIMA(p, d, q)
#- seasonal models: SARIMA(P, D, Q, s)
from statsmodels.tsa.arima.model import ARIMA
import itertools
p=range(0,4)
d=range(0,3)
q=range(0,4)

best_aic=float("inf")
best_order=None
for i in itertools.product(p,d,q): 
    try:  
        arima_model=ARIMA(train['Passengers'],order=i).fit()
        if arima_model.aic<best_aic:
            best_aic= arima_model.aic
            #print(i,arima_model.aic)
            best_order=i
    except:
        continue

In [None]:
arima_model=ARIMA(train['Passengers'],order=best_order).fit()
arima_pred=arima_model.forecast(steps=12)

In [None]:
print(np.sqrt(mean_squared_error(test,arima_pred)))

plt.figure(figsize=(10,4))
plt.plot(train,label='Train')
plt.plot(test, label='Actual')
plt.plot(test.index, arima_pred, label='ARIMA Forecast')
plt.legend()
plt.show()

In [None]:
# SARIMAX
#Seasonal AutoRegressive Integrated Moving Average with eXogenous regressors model
from statsmodels.tsa.statespace.sarimax import SARIMAX
sarimax_model=SARIMAX(train['Passengers'],order=best_order,seasonal_order=(3,1,3,12)).fit()
sarimax_pred=sarimax_model.forecast(steps=12)

print(np.sqrt(mean_squared_error(test,sarimax_pred)))

plt.figure(figsize=(10,4))
plt.plot(train,label='Train')
plt.plot(test, label='Actual')
plt.plot(test.index, sarimax_pred, label='ARIMA Forecast')
plt.legend()
plt.show()