Step 1: Import Libraries

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings('ignore')

Step 2: Load Dataset

In [None]:
df = pd.read_csv('/content/airline-passengers.csv')

# Convert 'Month' to datetime and set as index
df['Month'] = pd.to_datetime(df['Month'])
df.set_index('Month', inplace=True)

Step 3: Explore Dataset

In [None]:
# Basic exploration
print(df.shape)
print(df.info())
print(df.describe())

# Plot time series
plt.figure(figsize=(12,6))
plt.plot(df, color='blue')
plt.title('Monthly Airline Passengers')
plt.xlabel('Year')
plt.ylabel('Passengers')
plt.show()

Step 4: Check for Missing Values

In [None]:
print("Missing values:\n", df.isnull().sum())

Step 5: Decompose Time Series

In [None]:
decomposition = seasonal_decompose(df['Passengers'], model='multiplicative')
decomposition.plot()
plt.show()

Step 6: Train-Test Split

In [None]:
train_size = int(len(df)*0.8)
train, test = df[:train_size], df[train_size:]
print("Train shape:", train.shape)
print("Test shape:", test.shape)

Step 7: Fit ARIMA Model

In [None]:
# Fit ARIMA model (p,d,q)
model = ARIMA(train['Passengers'], order=(2,1,2))
model_fit = model.fit()
print(model_fit.summary())

Step 8: Make Predictions

In [None]:
forecast = model_fit.forecast(steps=len(test))
forecast = pd.Series(forecast, index=test.index)

plt.figure(figsize=(12,6))
plt.plot(train['Passengers'], label='Train')
plt.plot(test['Passengers'], label='Test')
plt.plot(forecast, label='Forecast', color='red')
plt.legend()
plt.show()

Step 9: Evaluate Model

In [None]:
mse = mean_squared_error(test['Passengers'], forecast)
rmse = np.sqrt(mse)
print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")

Step 10: Forecast Future

In [None]:
future_forecast = model_fit.forecast(steps=12)
print("Next 12 months forecast:\n", future_forecast)

plt.figure(figsize=(12,6))
plt.plot(df['Passengers'], label='Actual')
plt.plot(future_forecast.index, future_forecast, label='Future Forecast', color='green')
plt.legend()
plt.show()


Step 11: Evaluate Forecast Accuracy

In [None]:
from sklearn.metrics import mean_squared_error
import numpy as np

# Calculate RMSE on test data (optional if you want to check)
forecast_steps = len(future_forecast)
mse = mean_squared_error(df['Passengers'][-forecast_steps:], future_forecast)
rmse = np.sqrt(mse)

print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")


Step 12: Save the Model

In [None]:
import joblib

# Save the ARIMA model to disk
joblib.dump(model_fit, 'airline_arima_model.pkl')
print("Model saved as airline_arima_model.pkl")

Step 13: Load and Use Saved Model

In [None]:
# Load the saved model
loaded_model = joblib.load('airline_arima_model.pkl')

# Forecast next 12 months using loaded model
future_forecast_loaded = loaded_model.forecast(steps=12)
print(future_forecast_loaded)

# Step 14 - Interactive Visualization

In [None]:
import plotly.express as px

fig = px.line(df, y='Passengers', title='Airline Passengers Interactive')
fig.add_scatter(x=future_forecast.index, y=future_forecast, mode='lines', name='Forecast')
fig.show()
