# TSA Chapter 4: Box-Cox Transformation of Airline Data

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/TSA/blob/main/TSA_ch4/TSA_ch4_boxcox_airline/TSA_ch4_boxcox_airline.ipynb)

Box-Cox transformation applied to airline passengers: original, log, square root, optimal lambda, variance comparison, and profile log-likelihood.

In [None]:
!pip install numpy pandas matplotlib scipy -q

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import boxcox, boxcox_llf
import warnings
warnings.filterwarnings('ignore')

In [None]:
COLORS = {'blue': '#1A3A6E', 'red': '#DC3545', 'green': '#2E7D32', 'orange': '#E67E22', 'gray': '#666666', 'purple': '#8E44AD'}
BLUE, RED, GREEN, ORANGE, GRAY, PURPLE = COLORS['blue'], COLORS['red'], COLORS['green'], COLORS['orange'], COLORS['gray'], COLORS['purple']

plt.rcParams.update({
    'figure.facecolor': 'none', 'axes.facecolor': 'none', 'savefig.facecolor': 'none',
    'savefig.transparent': True, 'axes.spines.top': False, 'axes.spines.right': False,
    'axes.grid': False, 'font.size': 10, 'axes.titlesize': 12, 'axes.labelsize': 10,
    'xtick.labelsize': 9, 'ytick.labelsize': 9, 'legend.fontsize': 9, 'figure.dpi': 150,
    'lines.linewidth': 1.2, 'axes.linewidth': 0.6, 'legend.facecolor': 'none',
    'legend.framealpha': 0, 'legend.edgecolor': 'none',
})

def save_chart(fig, name):
    fig.savefig(f'{name}.pdf', bbox_inches='tight', transparent=True, dpi=150)
    fig.savefig(f'{name}.png', bbox_inches='tight', transparent=True, dpi=150)
    print(f'Saved: {name}')

In [None]:
# Airline passengers data (Box-Jenkins, 1949-1960)
airline = np.array([
    112,118,132,129,121,135,148,148,136,119,104,118,
    115,126,141,135,125,149,170,170,158,133,114,140,
    145,150,178,163,172,178,199,199,184,162,146,166,
    171,180,193,181,183,218,230,242,209,191,172,194,
    196,196,236,235,229,243,264,272,237,211,180,201,
    204,188,235,227,234,264,302,293,259,229,203,229,
    242,233,267,269,270,315,364,347,312,274,237,278,
    284,277,317,313,318,374,413,405,355,306,271,306,
    315,301,356,348,355,422,465,467,404,347,305,336,
    340,318,362,348,363,435,491,505,404,359,310,337,
    360,342,406,396,420,472,548,559,463,407,362,405,
    417,391,419,461,472,535,622,606,508,461,390,432,
])
n = len(airline)
months = np.arange(1, n + 1)

fig, axes = plt.subplots(2, 3, figsize=(14, 7))

# Original
axes[0,0].plot(months, airline, color=BLUE, linewidth=1.2)
axes[0,0].set_title(r'Original Data ($\lambda = 1$)', fontweight='bold')
axes[0,0].set_ylabel('Passengers (thousands)')

# Log transform (lambda=0)
y_log = np.log(airline)
axes[0,1].plot(months, y_log, color=GREEN, linewidth=1.2)
axes[0,1].set_title(r'Logarithm ($\lambda = 0$)', fontweight='bold')
axes[0,1].set_ylabel(r'$\ln(Y_t)$')

# Square root (lambda=0.5)
y_sqrt = np.sqrt(airline)
axes[0,2].plot(months, y_sqrt, color=ORANGE, linewidth=1.2)
axes[0,2].set_title(r'Square Root ($\lambda = 0.5$)', fontweight='bold')
axes[0,2].set_ylabel(r'$\sqrt{Y_t}$')

# Optimal Box-Cox
y_bc, lam_opt = boxcox(airline)
axes[1,0].plot(months, y_bc, color=RED, linewidth=1.2)
axes[1,0].set_title(f'Optimal Box-Cox ($\\lambda = {lam_opt:.3f}$)', fontweight='bold')
axes[1,0].set_ylabel(r'$Y_t^{(\lambda)}$')
axes[1,0].set_xlabel('Month (1949-1960)')

# Variance by year: original vs log
yearly_std_orig = [np.std(airline[i*12:(i+1)*12]) for i in range(12)]
yearly_std_log = [np.std(y_log[i*12:(i+1)*12]) for i in range(12)]
years = np.arange(1949, 1961)

ax4 = axes[1,1]
ax4.bar(years - 0.2, yearly_std_orig, 0.35, color=BLUE, alpha=0.7, label='Original')
ax4_twin = ax4.twinx()
ax4_twin.bar(years + 0.2, yearly_std_log, 0.35, color=GREEN, alpha=0.7, label='Log')
ax4.set_title('Standard Deviation by Year', fontweight='bold')
ax4.set_ylabel('Std (original)', color=BLUE)
ax4_twin.set_ylabel('Std (log)', color=GREEN)
ax4.set_xlabel('Year')
ax4.tick_params(axis='x', rotation=45)
ax4_twin.spines['top'].set_visible(False)
lines1, labels1 = ax4.get_legend_handles_labels()
lines2, labels2 = ax4_twin.get_legend_handles_labels()
ax4.legend(lines1 + lines2, labels1 + labels2,
           loc='upper center', bbox_to_anchor=(0.5, -0.22), ncol=2, frameon=False)

# Profile log-likelihood
lambdas = np.linspace(-1, 2, 200)
llf = [boxcox_llf(lam, airline) for lam in lambdas]
axes[1,2].plot(lambdas, llf, color=BLUE, linewidth=1.5)
axes[1,2].axvline(lam_opt, color=RED, linestyle='--', linewidth=1.2,
                  label=f'$\\lambda^* = {lam_opt:.3f}$')
axes[1,2].axvline(0, color=GREEN, linestyle=':', linewidth=1.0,
                  label=r'$\lambda = 0$ (log)')
axes[1,2].set_title('Profile Log-Likelihood', fontweight='bold')
axes[1,2].set_xlabel(r'$\lambda$')
axes[1,2].set_ylabel('Log-Likelihood')
axes[1,2].legend(loc='upper center', bbox_to_anchor=(0.5, -0.22), ncol=2, frameon=False)

plt.tight_layout(); save_chart(fig, 'ch4_boxcox_airline'); plt.show()