<img src="../assets/ittc_logo_full.png" height=150>

# Lecture 7 Time Series Analysis

## In this Practical

In this practical you will:

1. Execute code chunks to fit a complex SARIMA model to CO2 data


# Step 1: Load and Inspect the CO₂ Data
This step loads the weekly atmospheric CO₂ data from the `statsmodels` package, fills missing values, and prepares the dataset for time series analysis.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.datasets import co2
import warnings
warnings.filterwarnings("ignore")

# Load co2 dataset from statsmodels (keep original column name and do not fill NAs)
data = co2.load_pandas().data
# data = data.asfreq('W')
data.head()


In [None]:
# Interpolate missing values using quadratic interpolation
data['co2'] = data['co2'].interpolate(method='quadratic')
data.head()


# Step 2: Visualize the Raw CO₂ Data
Plot the full CO₂ time series to explore patterns, trends, and any noticeable anomalies.

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(data.index, data['co2'], label='co2')
plt.title("Atmospheric co2 Concentration Over Time")
plt.xlabel("Date")
plt.ylabel("co2 (ppm)")
plt.grid(True)
plt.tight_layout()
plt.show()


# Step 3: Add a Moving Average Trend Smoother
Overlay a 52-week moving average on the raw CO₂ data to better visualize the long-term trend.

In [None]:
plt.figure(figsize=(12, 6))
sns.lineplot(x=data.index, y=data['co2'], label='co2')
sns.lineplot(x=data.index, y=data['co2'].rolling(window=52).mean(), label='52-week Moving Average')
plt.title("co2 with Trend Smoother")
plt.xlabel("Date")
plt.ylabel("co2 (ppm)")
plt.legend()
plt.tight_layout()
plt.show()


# Step 4: Decompose the Time Series
Use seasonal decomposition to break the CO₂ series into trend, seasonal, and residual components. This helps assess both trend and periodicity.

In [None]:
# Seasonal decomposition using interpolated series
co2_clean = data['co2'].loc['1990':]
decomposition = seasonal_decompose(co2_clean, model='additive', period=52)
decomposition.plot()
plt.tight_layout()
plt.show()


# Step 5: Plot the ACF and PACF
Generate the autocorrelation function (ACF) and partial autocorrelation function (PACF) plots to help identify suitable parameters for the ARIMA model.

In [None]:
# Use interpolated and differenced series for ACF/PACF
diff_co2 = data['co2'].loc['1990':].diff().dropna()

plot_acf(diff_co2, lags=40)
plt.title("ACF of Differenced CO₂")
plt.tight_layout()
plt.show()

plot_pacf(diff_co2, lags=40)
plt.title("PACF of Differenced CO₂")
plt.tight_layout()
plt.show()


# Step 6: Fit a seasonal ARIMA model to the data

In [None]:
# Fit SARIMA model on interpolated data
co2_series = data['co2'].loc['1990':]

seasonal_model = SARIMAX(co2_series, order=(0, 1, 1), seasonal_order=(0, 1, 1, 52))
seasonal_result = seasonal_model.fit()


In [None]:
seasonal_result.summary()


# Step 7: Forecast the Next Two Years
Use the SARIMA model to forecast CO₂ levels for the next 104 weeks, showing a 90% prediction interval.

In [None]:
n_steps = 104  # 2 years of weekly data
forecast_result = seasonal_result.get_forecast(steps=n_steps)
forecast_mean = forecast_result.predicted_mean
forecast_ci = forecast_result.conf_int(alpha=0.10)

# Create forecast dates
last_date = co2_series.index[-1]
forecast_dates = pd.date_range(start=last_date + pd.Timedelta(weeks=1), periods=n_steps, freq='W')

# Plot forecast
plt.figure(figsize=(12, 6))
plt.plot(co2_series.index, co2_series, label='Original')
plt.plot(forecast_dates, forecast_mean, label='Forecast', linestyle='--')
plt.fill_between(forecast_dates, forecast_ci.iloc[:, 0], forecast_ci.iloc[:, 1],
                 color='gray', alpha=0.3, label='90% Prediction Interval')
plt.title("Forecast of CO₂ for Next 2 Years (SARIMA)")
plt.xlabel("Date")
plt.ylabel("CO₂ (ppm)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
