# Chapter 5 - AutoRegressive Integrated Moving Average (ARIMA) Models

In [1]:
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from statsmodels.tsa.stattools import adfuller
import pmdarima as pm
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from itertools import product
from statsmodels.tools.eval_measures import rmspe, rmse
from statsmodels.tsa.seasonal import seasonal_decompose, STL
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import statsmodels.api as sm
import scipy.stats as stats
from sklearn.metrics import mean_absolute_error, mean_squared_error
from statsmodels.tsa.arima_process import ArmaProcess
from pmdarima.arima import CHTest


import warnings
warnings.filterwarnings('ignore')

## 5.24 Table B.10 contains 7 years of monthly data on the number of airline miles flown in the United Kingdom. 
This is seasonal data.

    a. Using the first 6 years of data, develop an appropriate ARIMA model and a procedure for these data.
    
    b. Explain how prediction intervals would be computed.
    
    c. Make one-step-ahead forecasts of the last 12 months. Determine the forecast errors. How well did your procedure work in forecasting the new data?

In [3]:
# Raw data as alternating Month and Miles values
raw_data = [
    ("Jan-1964", 7.269), ("Feb-1964", 6.775), ("Mar-1964", 7.819), ("Apr-1964", 8.371),
    ("May-1964", 9.069), ("Jun-1964", 10.248), ("Jul-1964", 11.030), ("Aug-1964", 10.882),
    ("Sep-1964", 10.333), ("Oct-1964", 9.109), ("Nov-1964", 7.685), ("Dec-1964", 7.682),
    ("Jan-1965", 8.350), ("Feb-1965", 7.829), ("Mar-1965", 8.829), ("Apr-1965", 9.948),
    ("May-1965", 10.638), ("Jun-1965", 11.253), ("Jul-1965", 11.424), ("Aug-1965", 11.391),
    ("Sep-1965", 10.665), ("Oct-1965", 9.396), ("Nov-1965", 7.775), ("Dec-1965", 7.933),
    ("Jan-1966", 8.186), ("Feb-1966", 7.444), ("Mar-1966", 8.484), ("Apr-1966", 9.864),
    ("May-1966", 10.252), ("Jun-1966", 12.282), ("Jul-1966", 11.637), ("Aug-1966", 11.577),
    ("Sep-1966", 12.417), ("Oct-1966", 9.637), ("Nov-1966", 8.094), ("Dec-1966", 9.280),
    ("Jan-1967", 8.334), ("Feb-1967", 7.899), ("Mar-1967", 9.994), ("Apr-1967", 10.078),
    ("May-1967", 10.801), ("Jun-1967", 12.953), ("Jul-1967", 12.222), ("Aug-1967", 12.246),
    ("Sep-1967", 13.281), ("Oct-1967", 10.366), ("Nov-1967", 8.730), ("Dec-1967", 9.614),
    ("Jan-1968", 8.639), ("Feb-1968", 8.772), ("Mar-1968", 10.894), ("Apr-1968", 10.455),
    ("May-1968", 11.179), ("Jun-1968", 10.588), ("Jul-1968", 10.794), ("Aug-1968", 12.770),
    ("Sep-1968", 13.812), ("Oct-1968", 10.857), ("Nov-1968", 9.290), ("Dec-1968", 10.925),
    ("Jan-1969", 9.491), ("Feb-1969", 8.919), ("Mar-1969", 11.607), ("Apr-1969", 8.852),
    ("May-1969", 12.537), ("Jun-1969", 14.759), ("Jul-1969", 13.667), ("Aug-1969", 13.731),
    ("Sep-1969", 15.110), ("Oct-1969", 12.185), ("Nov-1969", 10.645), ("Dec-1969", 12.161),
    ("Jan-1970", 10.840), ("Feb-1970", 10.436), ("Mar-1970", 13.589), ("Apr-1970", 13.402),
    ("May-1970", 13.103), ("Jun-1970", 14.933), ("Jul-1970", 14.147), ("Aug-1970", 14.057),
    ("Sep-1970", 16.234), ("Oct-1970", 12.389), ("Nov-1970", 11.594), ("Dec-1970", 12.772),
]

# Convert to DataFrame
df_air_miles = pd.DataFrame(raw_data, columns=['Month', 'Miles'])

# Convert 'Month' column to datetime format and set as index
df_air_miles['Month'] = pd.to_datetime(df_air_miles['Month'], format='%b-%Y')
df_air_miles.set_index('Month', inplace=True)

# Display the first rows
df_air_miles.head()

Unnamed: 0_level_0,Miles
Month,Unnamed: 1_level_1
1964-01-01,7.269
1964-02-01,6.775
1964-03-01,7.819
1964-04-01,8.371
1964-05-01,9.069


In [6]:
# Split the data
train = df_air_miles[df_air_miles.index.year < 1970]     # All data before 1970
test = df_air_miles[df_air_miles.index.year == 1970]     # All data in 1970

# Show the results
print("Train shape:", train.shape)
print("Test shape:", test.shape)
print()
train.tail(), print(), test.head()

Train shape: (72, 1)
Test shape: (12, 1)




(             Miles
 Month             
 1969-08-01  13.731
 1969-09-01  15.110
 1969-10-01  12.185
 1969-11-01  10.645
 1969-12-01  12.161,
 None,
              Miles
 Month             
 1970-01-01  10.840
 1970-02-01  10.436
 1970-03-01  13.589
 1970-04-01  13.402
 1970-05-01  13.103)