In [None]:
from warnings import filterwarnings
filterwarnings('ignore')
from sklearn.exceptions import ConvergenceWarning
ConvergenceWarning('ignore')




# Import Libraries
import numpy as np
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller
from statsmodels.datasets import get_rdataset

# Load the AirPassengers dataset directly
data = get_rdataset("AirPassengers").data
df = pd.Series(data['value'].values, index=pd.date_range(start='1949-01', periods=len(data['value']), freq='M'))
df.index = pd.DatetimeIndex(df.index).to_period('M')

# Plot the Data
df.plot()
#print(df.head())
plt.title('Airline Passengers Over Time')
plt.show()



# Check for Stationarity
result = adfuller(df)
print('ADF Statistic:', result[0])
print('p-value:', result[1])

# Differencing to make the series stationary if necessary
if result[1] > 0.05:
    df = df.diff().dropna()


# Check for Stationarity
result = adfuller(df)
print('ADF Statistic after differencing:', result[0])
print('p-value after differencing:', result[1])


# Plot ACF and PACF
plot_acf(df.dropna())
plt.title('ACF of Differenced Data')
plt.show()

plot_pacf(df.dropna())
plt.title('PACF of Differenced Data')
plt.show()


In [None]:




# Fit ARIMA Model
# Replace p, d, q with the identified order parameters after examining the ACF and PACF plots
p = 2  # example value
d = 2  # differencing is often necessary for this dataset
q = 2  # example value

model = ARIMA(df, order=(p, d, q))
model_fit = model.fit()

best_bic = np.inf
best_order = (p, d, q)

# Define the range for p, d, q
p_range = range(6, 18)
d_range = range(1, 3)
q_range = range(1, 5)

# Iterate over different combinations of p, d, q
for p in p_range:
    for d in d_range:
        for q in q_range:
            try:
                model = ARIMA(df, order=(p, d, q))
                model_fit = model.fit()
                bic = model_fit.bic
                print(f"BIC({p}, {d}, {q}) = {bic}")

                if bic < best_bic:
                    best_bic = bic
                    best_order = (p, d, q)
                    best_model = model_fit
            except:
                continue

print(f"Best BIC: {best_bic}")
print(f"Best order: {best_order}")

# Model Summary
print(best_model.summary())





# Plot Residuals
residuals = pd.DataFrame(model_fit.resid)
plt.figure()
plt.plot(residuals.index.to_timestamp(), residuals[0])
plt.title('Residuals of ARIMA Model')
plt.show()


# Forecasting
forecast = model_fit.forecast(steps=12)
print('Forecast:', forecast)

# Evaluate Model
# You would typically have test data or perform cross-validation
# to evaluate your model's performance

