# Unemployment forecasting demo
ARIMA(1,0,0) per country with a small holdout.

In [None]:
import warnings
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
df = pd.read_csv('../data/raw/labor_panel_sample.csv')
df.head()

In [None]:
def fit_one_country(ts, train_tail=3):
    ts = ts.sort_values('year')
    y = ts['unemployment_rate'].values
    if len(y) <= train_tail + 3:
        return None
    y_train, y_test = y[:-train_tail], y[-train_tail:]
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        res = ARIMA(y_train, order=(1,0,0)).fit()
        fc = res.forecast(steps=train_tail)
    mae = float(np.mean(np.abs(fc - y_test)))
    return mae, fc, y_test

rows = []
for country, g in df.groupby('country'):
    out = fit_one_country(g)
    if out is None:
        continue
    mae, fc, y_test = out
    rows.append({'country': country, 'mae': mae})
pd.DataFrame(rows).sort_values('mae')

In [None]:
country = 'United States'
g = df[df['country']==country].sort_values('year')
mae, fc, y_test = fit_one_country(g)
years = g['year'].values
y = g['unemployment_rate'].values
plt.figure(figsize=(7,4))
plt.plot(years, y, label='actual')
plt.plot(years[-len(fc):], fc, label='forecast')
plt.xlabel('Year'); plt.ylabel('Unemployment rate (%)'); plt.title(country)
plt.legend(); plt.grid(True, alpha=0.3); plt.tight_layout(); plt.show()
mae