# Dynamic Discrete Choice Models: Solutions

**Tutorial Series**: Discrete Choice Econometrics with PanelBox

**Notebook**: 08 - Dynamic Discrete Choice (Solutions)

**Author**: PanelBox Contributors

**Date**: 2026-02-17

---

This notebook contains complete solutions for the exercises in `08_dynamic_discrete.ipynb`.

In [None]:
# Setup (same as main notebook)
import warnings
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm, chi2
from scipy.optimize import minimize
import statsmodels.api as sm

from panelbox.models.discrete.dynamic import DynamicBinaryPanel

warnings.filterwarnings('ignore')
np.random.seed(42)
pd.set_option('display.max_columns', None)
pd.set_option('display.precision', 4)

plt.style.use('seaborn-v0_8-darkgrid')
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 11

DATA_DIR = Path("..") / "data"
OUTPUT_DIR = Path("..") / "outputs"
FIG_DIR = OUTPUT_DIR / "figures"
TABLE_DIR = OUTPUT_DIR / "tables"
FIG_DIR.mkdir(parents=True, exist_ok=True)
TABLE_DIR.mkdir(parents=True, exist_ok=True)

# Load data and prepare dynamic dataset
data = pd.read_csv(DATA_DIR / "labor_dynamics.csv")
data = data.sort_values(['id', 'year'])

exog_vars = ['age', 'educ', 'kids', 'married']
mean_vars = ['age', 'kids', 'married']

data['emp_lag'] = data.groupby('id')['employed'].shift(1)
data['emp_init'] = data.groupby('id')['employed'].transform('first')
for var in mean_vars:
    data[f'{var}_mean'] = data.groupby('id')[var].transform('mean')

data_dyn = data.dropna(subset=['emp_lag']).copy()

# Estimate base models (needed for exercises)
wooldridge_vars = exog_vars + ['emp_lag', 'emp_init'] + [f'{v}_mean' for v in mean_vars]
X_wool = sm.add_constant(data_dyn[wooldridge_vars])
wooldridge_pooled = sm.Probit(data_dyn['employed'], X_wool).fit(method='bfgs', disp=0)

print("Setup complete.")
print(f"Dynamic dataset: {len(data_dyn)} obs, {data_dyn['id'].nunique()} individuals")
print(f"Wooldridge gamma: {wooldridge_pooled.params['emp_lag']:.4f}")

---

## Exercise 1: Data Preparation (Easy)

**Task**: Starting from raw panel data, create the dynamic dataset: add lag of y, initial value $y_{i,0}$, and time means of X. Verify dimensions.

In [None]:
# Exercise 1 Solution

# Step 1: Load fresh data and create lag
ex1_data = pd.read_csv(DATA_DIR / "labor_dynamics.csv")
ex1_data = ex1_data.sort_values(['id', 'year'])

ex1_data['emp_lag'] = ex1_data.groupby('id')['employed'].shift(1)
print(f"Step 1: Created lagged employment")
print(f"  NaN count: {ex1_data['emp_lag'].isna().sum()} (should be {ex1_data['id'].nunique()})")

# Step 2: Initial value
ex1_data['emp_init'] = ex1_data.groupby('id')['employed'].transform('first')
print(f"\nStep 2: Initial employment")
print(f"  P(emp_init=1): {ex1_data['emp_init'].mean():.3f}")

# Step 3: Time means (for time-varying variables)
time_varying = ['age', 'kids', 'married', 'husbinc']
for var in time_varying:
    ex1_data[f'{var}_mean'] = ex1_data.groupby('id')[var].transform('mean')
print(f"\nStep 3: Time means computed for: {time_varying}")

# Step 4: Drop first period (no lag available)
ex1_dyn = ex1_data.dropna(subset=['emp_lag']).copy()
print(f"\nStep 4: Dropped first period")
print(f"  Before: {len(ex1_data)} obs")
print(f"  After:  {len(ex1_dyn)} obs")

# Step 5: Verify dimensions
n_individuals = ex1_data['id'].nunique()
n_periods = ex1_data['year'].nunique()
expected_n = n_individuals * (n_periods - 1)
print(f"\nStep 5: Verification")
print(f"  Expected: {n_individuals} x {n_periods - 1} = {expected_n}")
print(f"  Got: {len(ex1_dyn)}")
print(f"  Match: {len(ex1_dyn) == expected_n}")

In [None]:
# Answer questions
print("=== Answers ===")

# Q1: How many observations lost?
lost = len(ex1_data) - len(ex1_dyn)
print(f"\n1. Observations lost: {lost} (one per individual = {n_individuals})")

# Q2: Correlation between emp_init and mean employment
emp_mean_by_id = ex1_data.groupby('id')['employed'].mean()
emp_init_by_id = ex1_data.groupby('id')['emp_init'].first()
corr = emp_mean_by_id.corr(emp_init_by_id)
print(f"\n2. Corr(emp_init, mean_employed): {corr:.3f}")
print(f"   This shows that initial employment is positively correlated")
print(f"   with overall employment history, confirming the initial")
print(f"   conditions problem: emp_init is not random.")

# Q3: Why only time-varying variables?
print(f"\n3. We only include time-varying variables in Mundlak means because:")
print(f"   - Time-invariant variables (e.g., educ) have X_mean = X for all t")
print(f"   - Including their mean would create perfect collinearity")
print(f"   - The Mundlak device aims to capture within-individual variation")
print(f"     that correlates with the unobserved effect")

---

## Exercise 2: Naive vs Wooldridge (Medium)

**Task**: Estimate dynamic Probit ignoring initial conditions (naive) and with Wooldridge approach. Compare $\gamma$ estimates.

In [None]:
# Exercise 2 Solution

# Step 1: Naive dynamic probit (only X and emp_lag, no initial conditions)
X_naive = sm.add_constant(data_dyn[exog_vars + ['emp_lag']])
naive_probit = sm.Probit(data_dyn['employed'], X_naive).fit(method='bfgs', disp=0)

print("=" * 60)
print("   NAIVE DYNAMIC PROBIT")
print("=" * 60)
print(naive_probit.summary())

In [None]:
# Step 2: Wooldridge probit (already estimated in setup)
print("=" * 60)
print("   WOOLDRIDGE DYNAMIC PROBIT")
print("=" * 60)
print(wooldridge_pooled.summary())

In [None]:
# Step 3: Compare gamma
gamma_naive = naive_probit.params['emp_lag']
gamma_wool = wooldridge_pooled.params['emp_lag']
bias = gamma_naive - gamma_wool

print("=== Comparison ===")
print(f"\n{'':20s} {'Naive':>12s} {'Wooldridge':>12s}")
print("-" * 48)

for var in exog_vars + ['emp_lag']:
    b_n = naive_probit.params[var]
    b_w = wooldridge_pooled.params[var]
    print(f"{var:<20s} {b_n:>12.4f} {b_w:>12.4f}")

print(f"\n{'Log-likelihood':<20s} {naive_probit.llf:>12.2f} {wooldridge_pooled.llf:>12.2f}")
print(f"{'AIC':<20s} {naive_probit.aic:>12.2f} {wooldridge_pooled.aic:>12.2f}")

# Step 4: Compute bias
print(f"\n=== Bias Analysis ===")
print(f"gamma_naive:      {gamma_naive:.4f}")
print(f"gamma_Wooldridge: {gamma_wool:.4f}")
print(f"Bias (naive - Wooldridge): {bias:+.4f}")
print(f"Relative bias: {bias/gamma_wool:+.1%}")

print(f"\n=== Answers ===")
print(f"1. Direction: The naive estimate is biased {'upward' if bias > 0 else 'downward'}.")
print(f"   gamma_naive = {gamma_naive:.4f} vs gamma_Wooldridge = {gamma_wool:.4f}")
print(f"")
print(f"2. Why upward bias: Ignoring initial conditions means the model")
print(f"   attributes part of the unobserved heterogeneity (alpha_i) to the")
print(f"   lagged dependent variable. Since Cov(y_{{i,t-1}}, alpha_i) > 0,")
print(f"   the omitted variable bias pushes gamma upward.")
print(f"")
print(f"3. Is delta_1 (emp_init) significant?")
delta1 = wooldridge_pooled.params['emp_init']
se_delta1 = wooldridge_pooled.bse['emp_init']
z_delta1 = delta1 / se_delta1
p_delta1 = 2 * (1 - norm.cdf(abs(z_delta1)))
print(f"   delta_1 = {delta1:.4f}, SE = {se_delta1:.4f}, z = {z_delta1:.2f}, p = {p_delta1:.6f}")
print(f"   {'Yes, significant' if p_delta1 < 0.05 else 'Not significant'} at 5%.")
print(f"   This confirms initial conditions are correlated with heterogeneity.")

---

## Exercise 3: Persistence Decomposition (Medium)

**Task**: Using estimated parameters, decompose total persistence into state dependence and heterogeneity.

In [None]:
# Exercise 3 Solution

def simulate_persistence(n=1000, T=15, gamma=0.5, sigma_u=0.8, beta_x=0.3):
    """Simulate and compute serial correlation."""
    alpha = np.random.normal(0, sigma_u, n)
    y_matrix = np.zeros((n, T))

    for i in range(n):
        x = np.random.normal(0, 1)
        y_prev = int(np.random.normal(beta_x * x + alpha[i], 1) > 0)
        for t in range(T):
            xb = -0.3 + beta_x * x + gamma * y_prev + alpha[i]
            y = int(np.random.normal(xb, 1) > 0)
            y_matrix[i, t] = y
            y_prev = y

    y_flat = y_matrix[:, 1:].flatten()
    y_lag_flat = y_matrix[:, :-1].flatten()
    return np.corrcoef(y_flat, y_lag_flat)[0, 1]

# Decomposition for different parameter combinations
np.random.seed(42)

print("=== Persistence Decomposition Across Parameter Values ===")
print(f"\n{'gamma':>6s} {'sigma_u':>8s} {'Total':>8s} {'SD Only':>8s} {'Het Only':>8s} {'SD Share':>9s} {'Het Share':>10s}")
print("-" * 70)

results_list = []
for gamma_val in [0.2, 0.5, 0.8]:
    for sigma_val in [0.3, 0.8, 1.5]:
        total = simulate_persistence(gamma=gamma_val, sigma_u=sigma_val)
        sd_only = simulate_persistence(gamma=gamma_val, sigma_u=0.0)
        het_only = simulate_persistence(gamma=0.0, sigma_u=sigma_val)
        sd_share = sd_only / total if total > 0 else 0
        het_share = het_only / total if total > 0 else 0

        print(f"{gamma_val:>6.1f} {sigma_val:>8.1f} {total:>8.3f} {sd_only:>8.3f} {het_only:>8.3f} {sd_share:>8.0%} {het_share:>9.0%}")

        results_list.append({
            'gamma': gamma_val, 'sigma_u': sigma_val,
            'total': total, 'sd_only': sd_only, 'het_only': het_only,
            'sd_share': sd_share, 'het_share': het_share
        })

In [None]:
# Visualization: heatmap of SD share
results_df = pd.DataFrame(results_list)
pivot_sd = results_df.pivot(index='gamma', columns='sigma_u', values='sd_share')
pivot_total = results_df.pivot(index='gamma', columns='sigma_u', values='total')

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Panel A: State dependence share
sns.heatmap(pivot_sd, annot=True, fmt='.0%', cmap='Blues', ax=axes[0],
            linewidths=2, vmin=0, vmax=1)
axes[0].set_title('State Dependence Share of Total Persistence', fontweight='bold')
axes[0].set_xlabel('$\\sigma_u$ (heterogeneity)')
axes[0].set_ylabel('$\\gamma$ (state dependence)')

# Panel B: Total persistence
sns.heatmap(pivot_total, annot=True, fmt='.3f', cmap='YlOrRd', ax=axes[1],
            linewidths=2)
axes[1].set_title('Total Persistence (Autocorrelation)', fontweight='bold')
axes[1].set_xlabel('$\\sigma_u$ (heterogeneity)')
axes[1].set_ylabel('$\\gamma$ (state dependence)')

plt.suptitle('Persistence Decomposition Across Parameter Space',
             fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.show()

print("=== Policy Implications ===")
print("\nWhen state dependence share is HIGH (top-left of heatmap):")
print("  -> Temporary employment programs have lasting effects")
print("  -> Getting someone into a job creates momentum")
print("\nWhen heterogeneity share is HIGH (bottom-right of heatmap):")
print("  -> Need to target specific groups with persistent barriers")
print("  -> Temporary programs won't help in the long run")
print("  -> Focus on structural interventions (education, training)")

---

## Exercise 4: Counterfactual Simulation (Hard)

**Task**: Simulate trajectories for job loss scenario. Compare recovery under different state dependence levels.

In [None]:
# Exercise 4 Solution

np.random.seed(42)
n_cf = 500
n_periods_cf = 20

gamma_est = wooldridge_pooled.params['emp_lag']
intercept = wooldridge_pooled.params['const']
beta_dict = {v: wooldridge_pooled.params[v] for v in exog_vars}
X_mean = data_dyn[exog_vars].mean()

def simulate_counterfactual(gamma, sigma_u=0.8, shock_period=5):
    """Simulate baseline and job-loss counterfactual."""
    alpha = np.random.normal(0, sigma_u, n_cf)
    traj_base = np.zeros((n_cf, n_periods_cf))
    traj_shock = np.zeros((n_cf, n_periods_cf))

    for i in range(n_cf):
        y_b, y_s = 1, 1  # Both start employed
        for t in range(n_periods_cf):
            xb = intercept + sum(beta_dict[v] * X_mean[v] for v in beta_dict) + alpha[i]

            # Baseline
            xb_b = xb + gamma * y_b
            y_b = int(np.random.normal(xb_b, 1) > 0)
            traj_base[i, t] = y_b

            # Shock: force y=0 at shock_period
            if t == shock_period - 1:
                y_s = 0  # Forced job loss
            else:
                xb_s = xb + gamma * y_s
                y_s = int(np.random.normal(xb_s, 1) > 0)
            traj_shock[i, t] = y_s

    return traj_base.mean(axis=0), traj_shock.mean(axis=0)

# Compare different gamma values
fig, axes = plt.subplots(1, 3, figsize=(18, 5))
gamma_values = [0.3, 0.8, 1.5]
periods = np.arange(1, n_periods_cf + 1)

print("=== Counterfactual: Job Loss at t=5 ===")
print(f"\n{'gamma':>6s} {'Gap at t=6':>12s} {'Gap at t=10':>12s} {'Recovery t':>12s}")
print("-" * 48)

for ax, gamma_cf in zip(axes, gamma_values):
    np.random.seed(42)  # Same random draws for comparability
    mean_base, mean_shock = simulate_counterfactual(gamma=gamma_cf)

    ax.plot(periods, mean_base, 'b-o', linewidth=2, markersize=4, label='Baseline')
    ax.plot(periods, mean_shock, 'r-s', linewidth=2, markersize=4, label='Job loss at t=5')
    ax.axvline(x=5, color='gray', linestyle='--', alpha=0.7)
    ax.fill_between(periods, mean_base, mean_shock, alpha=0.15, color='red')
    ax.set_xlabel('Period')
    ax.set_ylabel('Mean Employment Rate')
    ax.set_title(f'$\\gamma = {gamma_cf}$', fontweight='bold')
    ax.set_ylim(0, 1)
    ax.legend(loc='lower right', fontsize=9)
    ax.grid(True, alpha=0.3)

    # Find recovery time
    gap = mean_base - mean_shock
    converge_idx = np.where(gap[5:] < 0.05)[0]
    recovery_t = converge_idx[0] + 6 if len(converge_idx) > 0 else '>20'

    print(f"{gamma_cf:>6.1f} {gap[5]:>12.3f} {gap[9]:>12.3f} {str(recovery_t):>12s}")

plt.suptitle('Counterfactual Analysis: How Long Does Job Loss Effect Last?',
             fontsize=15, fontweight='bold', y=1.03)
plt.tight_layout()
plt.show()

In [None]:
# Answers
print("=== Answers ===")
print("")
print("1. Does the gap ever fully close?")
print("   For small gamma (0.3), the gap closes relatively quickly because")
print("   state dependence is weak. For large gamma (1.5), the gap persists")
print("   much longer because past employment strongly determines future.")
print("   With heterogeneity (sigma_u > 0), the gap may never fully close")
print("   because some individuals drawn with low alpha_i may never recover.")
print("")
print("2. Recovery time vs gamma:")
print("   Higher gamma -> longer recovery. With gamma=1.5, the effect of")
print("   job loss can persist for 10+ periods. This is because each period")
print("   of non-employment feeds back into the next period.")
print("")
print("3. Policy implications:")
print("   - If gamma is large, temporary job programs are very valuable")
print("     because getting someone back to work creates lasting momentum.")
print("   - Early intervention matters: the longer someone is unemployed,")
print("     the harder it is to recover (vicious cycle).")
print("   - Programs should focus on preventing job loss (retention support)")
print("     as much as re-employment assistance.")

---

## Exercise 5: Testing State Dependence (Hard)

**Task**: Formally test $H_0: \gamma = 0$ using both Wald and LR tests.

In [None]:
# Exercise 5 Solution

# Step 1: Wald test for gamma = 0
gamma_hat = wooldridge_pooled.params['emp_lag']
se_gamma = wooldridge_pooled.bse['emp_lag']
z_stat = gamma_hat / se_gamma
p_wald = 2 * (1 - norm.cdf(abs(z_stat)))

print("=== Wald Test for State Dependence ===")
print(f"\nH0: gamma = 0 (no true state dependence)")
print(f"H1: gamma != 0")
print(f"\ngamma_hat  = {gamma_hat:.4f}")
print(f"SE(gamma)  = {se_gamma:.4f}")
print(f"z-stat     = {z_stat:.4f}")
print(f"p-value    = {p_wald:.10f}")
print(f"\nConclusion: {'Reject H0' if p_wald < 0.05 else 'Fail to reject H0'} at 5%")

In [None]:
# Step 2: LR test
# Restricted model: Wooldridge without emp_lag (static CRE probit)
restricted_vars = exog_vars + ['emp_init'] + [f'{v}_mean' for v in mean_vars]
X_restricted = sm.add_constant(data_dyn[restricted_vars])
restricted_probit = sm.Probit(data_dyn['employed'], X_restricted).fit(method='bfgs', disp=0)

# LR statistic
lr_stat = -2 * (restricted_probit.llf - wooldridge_pooled.llf)
p_lr = 1 - chi2.cdf(lr_stat, 1)  # 1 restriction (gamma=0)

print("=== Likelihood Ratio Test ===")
print(f"\nH0: gamma = 0")
print(f"\nLog-L (restricted, no emp_lag): {restricted_probit.llf:.4f}")
print(f"Log-L (unrestricted, with emp_lag): {wooldridge_pooled.llf:.4f}")
print(f"\nLR statistic: {lr_stat:.4f}")
print(f"df: 1")
print(f"p-value: {p_lr:.10f}")
print(f"\nConclusion: {'Reject H0' if p_lr < 0.05 else 'Fail to reject H0'} at 5%")

In [None]:
# Step 3: Compute APE of emp_lag
X_eval = sm.add_constant(data_dyn[wooldridge_vars])
lp = X_eval.values @ wooldridge_pooled.params.values
phi = norm.pdf(lp)
mean_phi = np.mean(phi)

ape_gamma = mean_phi * gamma_hat
ape_se = mean_phi * se_gamma  # Delta method approximation

print("=== Average Partial Effect of y_{t-1} ===")
print(f"\nAPE = mean(phi(X'beta)) * gamma")
print(f"    = {mean_phi:.4f} * {gamma_hat:.4f}")
print(f"    = {ape_gamma:.4f}")
print(f"\nSE(APE) = {ape_se:.4f}")
print(f"95% CI: [{ape_gamma - 1.96*ape_se:.4f}, {ape_gamma + 1.96*ape_se:.4f}]")
print(f"\nInterpretation:")
print(f"  Having worked in period t-1 increases the probability of")
print(f"  working in period t by {ape_gamma:.1%} percentage points,")
print(f"  holding all other factors constant.")

print(f"\n=== Summary ===")
print(f"Both Wald (p={p_wald:.6f}) and LR (p={p_lr:.6f}) tests reject H0.")
print(f"There is strong evidence of true state dependence.")
print(f"Past employment causally increases current employment probability")
print(f"by approximately {ape_gamma:.1%} percentage points.")

---

**End of Solutions**