# APM466 Assignment 1 - Empirical Questions (4, 5, 6)
## Mathematical Finance - Winter 2026

This notebook contains solutions for Questions 4, 5, and 6 of the assignment.

**Data File**: `selected_bonds_data_final.xlsx`  
**Selected Bonds**: 10 Canadian Government Bonds (0.5 to 5-year maturities)  
**Date Range**: 2026-01-05 to 2026-01-19 (11 trading days)

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from scipy.interpolate import interp1d, CubicSpline
from scipy.optimize import fsolve
import seaborn as sns

# Set plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
%matplotlib inline

In [None]:
# Load the bond data
file_path = 'selected_bonds_data_final.xlsx'
df = pd.read_excel(file_path)

# Display the data
print("Bond Data Overview:")
print(f"Number of bonds: {len(df)}")
print(f"Number of trading days: {len(df.columns) - 4}")
print("\nFirst few rows:")
df.head()

In [None]:
# Parse dates and prepare data structures
reference_date = pd.to_datetime('2026-01-05')
df['Issue date'] = pd.to_datetime(df['Issue date'])
df['maturity date'] = pd.to_datetime(df['maturity date'])

# Calculate years to maturity from reference date
df['years_to_maturity'] = (df['maturity date'] - reference_date).dt.days / 365.25

# Get date columns (price data)
date_columns = [col for col in df.columns if isinstance(col, str) and col.startswith('2026')]
trading_dates = [pd.to_datetime(col) for col in date_columns]

print(f"Reference Date: {reference_date.strftime('%Y-%m-%d')}")
print(f"Trading Dates: {len(trading_dates)} days from {trading_dates[0].strftime('%Y-%m-%d')} to {trading_dates[-1].strftime('%Y-%m-%d')}")
print("\nBond Maturities:")
df[['ISIN', 'Coupon', 'maturity date', 'years_to_maturity']]

---
## Question 4a: Calculate YTM and Plot Yield Curves (10 points)

Calculate yield to maturity for each bond on each trading day and plot the 5-year yield curve.

In [None]:
def calculate_ytm(price, coupon, years_to_maturity, face_value=100, freq=2, max_iter=1000, tol=1e-6):
    """
    Calculate Yield to Maturity using Newton-Raphson method.
    
    Parameters:
    - price: Current bond price (as % of par)
    - coupon: Annual coupon rate (as decimal, e.g., 0.04 for 4%)
    - years_to_maturity: Years until maturity
    - face_value: Par value (default 100)
    - freq: Coupon frequency per year (2 for semi-annual)
    - max_iter: Maximum iterations
    - tol: Convergence tolerance
    
    Returns:
    - ytm: Yield to maturity (as decimal)
    """
    n = int(years_to_maturity * freq)  # Total number of coupon payments
    c = coupon * face_value / freq      # Coupon payment per period
    
    # Initial guess: approximate ytm
    ytm_guess = (c + (face_value - price) / (years_to_maturity * freq)) / ((face_value + price) / 2)
    
    def bond_price_diff(ytm_per_period):
        """Calculate difference between theoretical and actual price"""
        if ytm_per_period <= 0:
            return float('inf')
        
        # Present value of coupons
        pv_coupons = sum([c / ((1 + ytm_per_period) ** t) for t in range(1, n + 1)])
        # Present value of face value
        pv_face = face_value / ((1 + ytm_per_period) ** n)
        
        return pv_coupons + pv_face - price
    
    # Solve for ytm per period
    try:
        ytm_per_period = fsolve(bond_price_diff, ytm_guess / freq, maxfev=max_iter)[0]
        ytm_annual = ytm_per_period * freq  # Convert to annual rate
        return ytm_annual
    except:
        return np.nan

# Test the function with first bond
test_price = df[date_columns[0]].iloc[0]
test_coupon = df['Coupon'].iloc[0]
test_years = df['years_to_maturity'].iloc[0]
test_ytm = calculate_ytm(test_price, test_coupon, test_years)
print(f"Test YTM calculation:")
print(f"  Bond: {df['ISIN'].iloc[0]}")
print(f"  Price: {test_price:.3f}")
print(f"  Coupon: {test_coupon*100:.3f}%")
print(f"  Years to Maturity: {test_years:.3f}")
print(f"  YTM: {test_ytm*100:.3f}%")

In [None]:
# Calculate YTM for all bonds and all dates
ytm_data = {}

for date_col in date_columns:
    ytms = []
    for idx, row in df.iterrows():
        price = row[date_col]
        coupon = row['Coupon']
        
        # Calculate years to maturity from the specific trading date
        trading_date = pd.to_datetime(date_col)
        years_to_mat = (row['maturity date'] - trading_date).days / 365.25
        
        ytm = calculate_ytm(price, coupon, years_to_mat)
        ytms.append(ytm)
    
    ytm_data[date_col] = ytms

# Create YTM DataFrame
ytm_df = pd.DataFrame(ytm_data, index=df['ISIN'])
ytm_df['years_to_maturity'] = df['years_to_maturity'].values

print("YTM Data (first 5 bonds, first 5 dates):")
print(ytm_df.iloc[:5, :6])

In [None]:
# Plot 4a: Yield Curves for all trading days
plt.figure(figsize=(12, 7))

# Plot each day's yield curve
for date_col in date_columns:
    maturities = ytm_df['years_to_maturity'].values
    yields = ytm_df[date_col].values * 100  # Convert to percentage
    
    # Sort by maturity for clean line
    sort_idx = np.argsort(maturities)
    maturities_sorted = maturities[sort_idx]
    yields_sorted = yields[sort_idx]
    
    # Interpolate for smooth curve (cubic spline)
    cs = CubicSpline(maturities_sorted, yields_sorted)
    maturities_smooth = np.linspace(maturities_sorted[0], 5.0, 100)
    yields_smooth = cs(maturities_smooth)
    
    plt.plot(maturities_smooth, yields_smooth, alpha=0.6, linewidth=1.5, 
             label=pd.to_datetime(date_col).strftime('%Y-%m-%d'))

plt.xlabel('Time to Maturity (Years)', fontsize=12)
plt.ylabel('Yield to Maturity (%)', fontsize=12)
plt.title('5-Year Yield Curves (YTM) - All Trading Days', fontsize=14, fontweight='bold')
plt.grid(True, alpha=0.3)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=8)
plt.tight_layout()
plt.savefig('ytm_curves.png', dpi=300, bbox_inches='tight')
plt.show()

print("✓ Plot saved as 'ytm_curves.png'")
print("\nInterpolation method used: Cubic Spline")
print("Rationale: Cubic spline provides smooth, continuous curves that pass through")
print("all data points while maintaining realistic shape for yield curves.")

---
## Question 4b: Derive Spot Curve using Bootstrapping (15 points)

### Pseudo-code Algorithm:

```
ALGORITHM: Bootstrap Spot Rates from Bond Prices

INPUT: Bond prices, coupons, maturities (semi-annual payment frequency)
OUTPUT: Spot rates for 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0 years

1. Sort bonds by maturity (ascending)

2. FOR each bond i (starting from shortest maturity):
   
   a. Identify bond i's maturity T_i and coupon C_i
   
   b. Calculate number of coupon payments: n = T_i × 2 (semi-annual)
   
   c. IF i == 1 (first bond, shortest maturity):
      - Use direct formula: r_1 = ((C_1/2 + 100) / P_1)^(1/n) - 1
      - Convert to annual rate: spot_1 = r_1 × 2
   
   d. ELSE (subsequent bonds):
      - Set up bond pricing equation:
        P_i = Σ[t=1 to n-1] (C_i/2) / (1 + r_t/2)^t  +  (C_i/2 + 100) / (1 + r_n/2)^n
              └─ known from previous bonds ─┘              └─ unknown ─┘
      
      - Solve for r_n (spot rate at maturity n):
        PV_known = Σ[t=1 to n-1] (C_i/2) / (1 + r_t/2)^t
        r_n = 2 × [((C_i/2 + 100) / (P_i - PV_known))^(1/n) - 1]
   
   e. Store spot rate r_n

3. RETURN spot rates for all maturities

KEY SIMPLIFICATION (from assignment):
- All Canadian Gov bonds pay semi-annual coupons (frequency = 2)
- This ensures we have exactly 10 bonds for 10 half-year intervals
- Day count convention simplified to actual days / 365.25
```

In [None]:
def bootstrap_spot_rates(prices, coupons, maturities, face_value=100, freq=2):
    """
    Bootstrap spot rates from bond prices using the iterative method.
    
    Parameters:
    - prices: Array of bond prices
    - coupons: Array of annual coupon rates (as decimals)
    - maturities: Array of years to maturity
    - face_value: Par value
    - freq: Coupon frequency per year
    
    Returns:
    - spot_rates: Array of spot rates (annual, as decimals)
    - spot_maturities: Array of maturities corresponding to spot rates
    """
    # Sort by maturity
    sort_idx = np.argsort(maturities)
    prices = np.array(prices)[sort_idx]
    coupons = np.array(coupons)[sort_idx]
    maturities = np.array(maturities)[sort_idx]
    
    spot_rates = []
    spot_maturities = []
    
    for i in range(len(prices)):
        price = prices[i]
        coupon = coupons[i]
        maturity = maturities[i]
        n_periods = int(round(maturity * freq))  # Number of coupon payments
        coupon_payment = coupon * face_value / freq
        
        if i == 0:
            # First bond: direct calculation
            # P = (C + F) / (1 + r)^n
            # r = ((C + F) / P)^(1/n) - 1
            total_payment = coupon_payment + face_value
            r_per_period = (total_payment / price) ** (1 / n_periods) - 1
            spot_annual = r_per_period * freq
        else:
            # Subsequent bonds: bootstrap
            # Calculate PV of known cash flows (all but the last)
            pv_known = 0
            for t in range(1, n_periods):
                # Find the spot rate for this period
                # Interpolate if necessary
                period_years = t / freq
                
                # Find spot rate for this maturity
                if period_years in spot_maturities:
                    idx = spot_maturities.index(period_years)
                    r_t = spot_rates[idx]
                else:
                    # Linear interpolation
                    r_t = np.interp(period_years, spot_maturities, spot_rates)
                
                discount_factor = (1 + r_t / freq) ** t
                pv_known += coupon_payment / discount_factor
            
            # Solve for spot rate at maturity n
            # P = PV_known + (C + F) / (1 + r_n)^n
            # r_n = ((C + F) / (P - PV_known))^(1/n) - 1
            final_payment = coupon_payment + face_value
            remaining_pv = price - pv_known
            
            if remaining_pv <= 0:
                remaining_pv = 0.01  # Prevent division errors
            
            r_per_period = (final_payment / remaining_pv) ** (1 / n_periods) - 1
            spot_annual = r_per_period * freq
        
        spot_rates.append(spot_annual)
        spot_maturities.append(maturity)
    
    return np.array(spot_rates), np.array(spot_maturities)

# Test bootstrapping with first date
test_date = date_columns[0]
test_prices = df[test_date].values
test_coupons = df['Coupon'].values
test_maturities = df['years_to_maturity'].values

spot_rates_test, spot_mat_test = bootstrap_spot_rates(test_prices, test_coupons, test_maturities)

print("Test Spot Rate Calculation:")
print(f"Date: {test_date}\n")
for i in range(len(spot_rates_test)):
    print(f"Maturity {spot_mat_test[i]:.2f}y: Spot Rate = {spot_rates_test[i]*100:.3f}%")

In [None]:
# Calculate spot rates for all trading days
spot_data = {}

for date_col in date_columns:
    # Get trading date for accurate maturity calculation
    trading_date = pd.to_datetime(date_col)
    
    # Recalculate years to maturity from trading date
    years_to_mat = [(row['maturity date'] - trading_date).days / 365.25 
                    for _, row in df.iterrows()]
    
    prices = df[date_col].values
    coupons = df['Coupon'].values
    
    spot_rates, spot_maturities = bootstrap_spot_rates(prices, coupons, years_to_mat)
    spot_data[date_col] = {'rates': spot_rates, 'maturities': spot_maturities}

print(f"✓ Calculated spot rates for all {len(date_columns)} trading days")

In [None]:
# Plot 4b: Spot Curves for all trading days
plt.figure(figsize=(12, 7))

for date_col in date_columns:
    maturities = spot_data[date_col]['maturities']
    rates = spot_data[date_col]['rates'] * 100  # Convert to percentage
    
    # Interpolate for smooth curve
    cs = CubicSpline(maturities, rates)
    maturities_smooth = np.linspace(maturities[0], 5.0, 100)
    rates_smooth = cs(maturities_smooth)
    
    plt.plot(maturities_smooth, rates_smooth, alpha=0.6, linewidth=1.5,
             label=pd.to_datetime(date_col).strftime('%Y-%m-%d'))

plt.xlabel('Time to Maturity (Years)', fontsize=12)
plt.ylabel('Spot Rate (%)', fontsize=12)
plt.title('5-Year Spot Curves (Bootstrapped) - All Trading Days', fontsize=14, fontweight='bold')
plt.grid(True, alpha=0.3)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=8)
plt.tight_layout()
plt.savefig('spot_curves.png', dpi=300, bbox_inches='tight')
plt.show()

print("✓ Plot saved as 'spot_curves.png'")

---
## Question 4c: Derive 1-Year Forward Curve (15 points)

### Pseudo-code Algorithm:

```
ALGORITHM: Calculate 1-Year Forward Rates from Spot Rates

INPUT: Spot rates for maturities 1, 2, 3, 4, 5 years
OUTPUT: 1-year forward rates: 1yr-1yr, 1yr-2yr, 1yr-3yr, 1yr-4yr

FORMULA (Semi-annual compounding):
Forward rate from year t to year (t+1):

F(t, t+1) = [((1 + S(t+1)/2)^(2(t+1))) / ((1 + S(t)/2)^(2t))]^(1/2) - 1

where:
- S(t) = spot rate for maturity t years
- F(t, t+1) = forward rate from year t to year t+1
- Division by 2 accounts for semi-annual compounding
- Exponent 2t is number of periods (2 per year × t years)

STEPS:
1. FOR t in {1, 2, 3, 4}:
   
   a. Get S(t) and S(t+1) from spot curve
   
   b. Calculate forward rate:
      numerator = (1 + S(t+1)/2)^(2×(t+1))
      denominator = (1 + S(t)/2)^(2×t)
      F(t,t+1) = (numerator / denominator)^(1/2) - 1
   
   c. Store F(t,t+1) as the 1yr-t forward rate

2. RETURN forward rates for 1yr-1yr, 1yr-2yr, 1yr-3yr, 1yr-4yr

EXAMPLE:
If S(1) = 3.0%, S(2) = 3.5%:
F(1,2) = [((1 + 0.035/2)^4) / ((1 + 0.030/2)^2)]^(1/2) - 1
       = 1yr-1yr forward rate (rate for year 2, agreed upon at year 1)
```

In [None]:
def calculate_forward_rates(spot_rates, spot_maturities, freq=2):
    """
    Calculate 1-year forward rates from spot rates.
    
    Parameters:
    - spot_rates: Array of spot rates (annual, as decimals)
    - spot_maturities: Array of maturities for spot rates
    - freq: Compounding frequency per year
    
    Returns:
    - forward_rates: Array of 1-year forward rates
    - forward_labels: Labels for forward rates (e.g., '1yr-1yr')
    """
    # Target maturities for forward calculation
    target_maturities = [1, 2, 3, 4, 5]
    
    # Interpolate spot rates at target maturities
    spot_interp = np.interp(target_maturities, spot_maturities, spot_rates)
    
    forward_rates = []
    forward_labels = []
    
    # Calculate 1-year forward rates: 1yr-1yr, 1yr-2yr, 1yr-3yr, 1yr-4yr
    for t in range(1, 5):  # t = 1, 2, 3, 4
        s_t = spot_interp[t - 1]      # Spot rate at year t (index t-1)
        s_t_plus_1 = spot_interp[t]   # Spot rate at year t+1 (index t)
        
        # Formula for 1-year forward rate (semi-annual compounding)
        # F(t,t+1) = [((1 + s_{t+1}/2)^{2(t+1)}) / ((1 + s_t/2)^{2t})]^{1/2} - 1
        
        numerator = (1 + s_t_plus_1 / freq) ** (freq * (t + 1))
        denominator = (1 + s_t / freq) ** (freq * t)
        
        forward_rate = (numerator / denominator) ** (1 / freq) - 1
        
        forward_rates.append(forward_rate)
        forward_labels.append(f'1yr-{t}yr')
    
    return np.array(forward_rates), forward_labels

# Test with first date
test_spot_rates = spot_data[date_columns[0]]['rates']
test_spot_maturities = spot_data[date_columns[0]]['maturities']

forward_test, forward_labels = calculate_forward_rates(test_spot_rates, test_spot_maturities)

print("Test Forward Rate Calculation:")
print(f"Date: {date_columns[0]}\n")
for i, label in enumerate(forward_labels):
    print(f"{label}: {forward_test[i]*100:.3f}%")

In [None]:
# Calculate forward rates for all trading days
forward_data = {}

for date_col in date_columns:
    spot_rates = spot_data[date_col]['rates']
    spot_maturities = spot_data[date_col]['maturities']
    
    forward_rates, forward_labels = calculate_forward_rates(spot_rates, spot_maturities)
    forward_data[date_col] = {'rates': forward_rates, 'labels': forward_labels}

print(f"✓ Calculated forward rates for all {len(date_columns)} trading days")

In [None]:
# Plot 4c: Forward Curves for all trading days
plt.figure(figsize=(12, 7))

# X-axis positions for forward rates (2, 3, 4, 5 years)
forward_x_positions = [2, 3, 4, 5]

for date_col in date_columns:
    forward_rates = forward_data[date_col]['rates'] * 100  # Convert to percentage
    
    # Interpolate for smooth curve
    cs = CubicSpline(forward_x_positions, forward_rates)
    x_smooth = np.linspace(2, 5, 100)
    rates_smooth = cs(x_smooth)
    
    plt.plot(x_smooth, rates_smooth, alpha=0.6, linewidth=1.5,
             label=pd.to_datetime(date_col).strftime('%Y-%m-%d'))

plt.xlabel('End Year of Forward Period', fontsize=12)
plt.ylabel('1-Year Forward Rate (%)', fontsize=12)
plt.title('1-Year Forward Curves (Years 2-5) - All Trading Days', fontsize=14, fontweight='bold')
plt.xticks([2, 3, 4, 5], ['1yr-1yr\n(Year 2)', '1yr-2yr\n(Year 3)', '1yr-3yr\n(Year 4)', '1yr-4yr\n(Year 5)'])
plt.grid(True, alpha=0.3)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=8)
plt.tight_layout()
plt.savefig('forward_curves.png', dpi=300, bbox_inches='tight')
plt.show()

print("✓ Plot saved as 'forward_curves.png'")

---
## Question 5: Covariance Matrices of Log-Returns (20 points)

Calculate covariance matrices for:
1. Daily log-returns of yield rates (1yr, 2yr, 3yr, 4yr, 5yr)
2. Daily log-returns of forward rates (1yr-1yr, 1yr-2yr, 1yr-3yr, 1yr-4yr)

Formula: $X_{i,j} = \log(r_{i,j+1} / r_{i,j})$ for $j = 1, ..., 9$ (9 log-returns from 10 days)

In [None]:
# Prepare yield rate data at target maturities (1, 2, 3, 4, 5 years)
target_years = [1, 2, 3, 4, 5]
yield_rates_matrix = []

for target_year in target_years:
    rates_for_maturity = []
    for date_col in date_columns:
        # Interpolate YTM at target maturity
        maturities = ytm_df['years_to_maturity'].values
        yields = ytm_df[date_col].values
        rate_at_target = np.interp(target_year, maturities, yields)
        rates_for_maturity.append(rate_at_target)
    yield_rates_matrix.append(rates_for_maturity)

yield_rates_matrix = np.array(yield_rates_matrix)  # Shape: (5, 11)

print("Yield Rates Matrix (5 maturities × 11 days):")
print(f"Shape: {yield_rates_matrix.shape}")
print(f"\nFirst 3 maturities, first 5 days (as %):\n{yield_rates_matrix[:3, :5] * 100}")

In [None]:
# Calculate log-returns for yield rates
# X_{i,j} = log(r_{i,j+1} / r_{i,j})

log_returns_yield = np.log(yield_rates_matrix[:, 1:] / yield_rates_matrix[:, :-1])

print("Log-Returns of Yield Rates:")
print(f"Shape: {log_returns_yield.shape} (5 maturities × 10 log-returns)")
print(f"\nFirst 3 maturities, first 5 log-returns:\n{log_returns_yield[:3, :5]}")

# Calculate covariance matrix for yield log-returns
cov_matrix_yield = np.cov(log_returns_yield)

print("\nCovariance Matrix for Yield Log-Returns (5×5):")
print(cov_matrix_yield)

# Create a nice DataFrame for display
cov_df_yield = pd.DataFrame(cov_matrix_yield, 
                            index=[f'{y}yr' for y in target_years],
                            columns=[f'{y}yr' for y in target_years])
print("\nFormatted Covariance Matrix (Yields):")
print(cov_df_yield)

In [None]:
# Prepare forward rate data
forward_rates_matrix = []

for i in range(4):  # 4 forward rates: 1yr-1yr, 1yr-2yr, 1yr-3yr, 1yr-4yr
    rates_over_time = []
    for date_col in date_columns:
        forward_rate = forward_data[date_col]['rates'][i]
        rates_over_time.append(forward_rate)
    forward_rates_matrix.append(rates_over_time)

forward_rates_matrix = np.array(forward_rates_matrix)  # Shape: (4, 11)

print("Forward Rates Matrix (4 forward rates × 11 days):")
print(f"Shape: {forward_rates_matrix.shape}")
print(f"\nFirst 2 forward rates, first 5 days (as %):\n{forward_rates_matrix[:2, :5] * 100}")

In [None]:
# Calculate log-returns for forward rates
log_returns_forward = np.log(forward_rates_matrix[:, 1:] / forward_rates_matrix[:, :-1])

print("Log-Returns of Forward Rates:")
print(f"Shape: {log_returns_forward.shape} (4 forward rates × 10 log-returns)")
print(f"\nFirst 2 forward rates, first 5 log-returns:\n{log_returns_forward[:2, :5]}")

# Calculate covariance matrix for forward log-returns
cov_matrix_forward = np.cov(log_returns_forward)

print("\nCovariance Matrix for Forward Log-Returns (4×4):")
print(cov_matrix_forward)

# Create a nice DataFrame for display
forward_labels_display = ['1yr-1yr', '1yr-2yr', '1yr-3yr', '1yr-4yr']
cov_df_forward = pd.DataFrame(cov_matrix_forward,
                              index=forward_labels_display,
                              columns=forward_labels_display)
print("\nFormatted Covariance Matrix (Forwards):")
print(cov_df_forward)

In [None]:
# Visualize covariance matrices
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Yield covariance matrix
sns.heatmap(cov_df_yield, annot=True, fmt='.2e', cmap='coolwarm', 
            ax=axes[0], cbar_kws={'label': 'Covariance'})
axes[0].set_title('Covariance Matrix: Yield Log-Returns', fontweight='bold')

# Forward covariance matrix
sns.heatmap(cov_df_forward, annot=True, fmt='.2e', cmap='coolwarm',
            ax=axes[1], cbar_kws={'label': 'Covariance'})
axes[1].set_title('Covariance Matrix: Forward Log-Returns', fontweight='bold')

plt.tight_layout()
plt.savefig('covariance_matrices.png', dpi=300, bbox_inches='tight')
plt.show()

print("✓ Plot saved as 'covariance_matrices.png'")

---
## Question 6: Eigenvalues and Eigenvectors (15 points)

Calculate eigenvalues and eigenvectors for both covariance matrices, and explain what the largest eigenvalue and its eigenvector represent.

In [None]:
# Calculate eigenvalues and eigenvectors for yield covariance matrix
eigenvalues_yield, eigenvectors_yield = np.linalg.eig(cov_matrix_yield)

# Sort by eigenvalue (descending)
idx_yield = eigenvalues_yield.argsort()[::-1]
eigenvalues_yield = eigenvalues_yield[idx_yield]
eigenvectors_yield = eigenvectors_yield[:, idx_yield]

print("="*80)
print("YIELD RATES - Eigenanalysis")
print("="*80)
print("\nEigenvalues (sorted descending):")
for i, ev in enumerate(eigenvalues_yield, 1):
    variance_explained = ev / eigenvalues_yield.sum() * 100
    print(f"  λ{i} = {ev:.6e}  ({variance_explained:.2f}% of total variance)")

print("\nEigenvectors (columns are eigenvectors):")
eigenvec_df_yield = pd.DataFrame(eigenvectors_yield,
                                 index=[f'{y}yr' for y in target_years],
                                 columns=[f'PC{i+1}' for i in range(5)])
print(eigenvec_df_yield)

In [None]:
# Calculate eigenvalues and eigenvectors for forward covariance matrix
eigenvalues_forward, eigenvectors_forward = np.linalg.eig(cov_matrix_forward)

# Sort by eigenvalue (descending)
idx_forward = eigenvalues_forward.argsort()[::-1]
eigenvalues_forward = eigenvalues_forward[idx_forward]
eigenvectors_forward = eigenvectors_forward[:, idx_forward]

print("="*80)
print("FORWARD RATES - Eigenanalysis")
print("="*80)
print("\nEigenvalues (sorted descending):")
for i, ev in enumerate(eigenvalues_forward, 1):
    variance_explained = ev / eigenvalues_forward.sum() * 100
    print(f"  λ{i} = {ev:.6e}  ({variance_explained:.2f}% of total variance)")

print("\nEigenvectors (columns are eigenvectors):")
eigenvec_df_forward = pd.DataFrame(eigenvectors_forward,
                                   index=forward_labels_display,
                                   columns=[f'PC{i+1}' for i in range(4)])
print(eigenvec_df_forward)

In [None]:
# Visualize first 3 principal components
fig, axes = plt.subplots(2, 3, figsize=(15, 10))

# Yield rates - first 3 PCs
for i in range(3):
    axes[0, i].bar(range(5), eigenvectors_yield[:, i], color='steelblue', alpha=0.7)
    axes[0, i].set_xticks(range(5))
    axes[0, i].set_xticklabels([f'{y}yr' for y in target_years])
    axes[0, i].axhline(0, color='black', linewidth=0.5)
    axes[0, i].set_title(f'Yield PC{i+1} ({eigenvalues_yield[i]/eigenvalues_yield.sum()*100:.1f}%)', 
                        fontweight='bold')
    axes[0, i].set_ylabel('Eigenvector Component')
    axes[0, i].grid(True, alpha=0.3)

# Forward rates - first 3 PCs
for i in range(3):
    axes[1, i].bar(range(4), eigenvectors_forward[:, i], color='coral', alpha=0.7)
    axes[1, i].set_xticks(range(4))
    axes[1, i].set_xticklabels(forward_labels_display, rotation=45)
    axes[1, i].axhline(0, color='black', linewidth=0.5)
    axes[1, i].set_title(f'Forward PC{i+1} ({eigenvalues_forward[i]/eigenvalues_forward.sum()*100:.1f}%)',
                        fontweight='bold')
    axes[1, i].set_ylabel('Eigenvector Component')
    axes[1, i].grid(True, alpha=0.3)

plt.suptitle('Principal Components Analysis - First 3 Components', 
             fontsize=16, fontweight='bold', y=1.00)
plt.tight_layout()
plt.savefig('pca_components.png', dpi=300, bbox_inches='tight')
plt.show()

print("✓ Plot saved as 'pca_components.png'")

### Interpretation of First Eigenvalue and Eigenvector:

**For Yield Rates:**

The first (largest) eigenvalue represents the amount of variance in the yield curve movements that is explained by the first principal component. The associated eigenvector shows how this dominant mode of variation affects different maturities. 

Typically, the first principal component represents a **parallel shift** in the yield curve - all maturities move up or down together in the same direction. This accounts for the majority of yield curve movements and reflects changes in the overall level of interest rates driven by monetary policy, inflation expectations, or general economic conditions.

**For Forward Rates:**

Similarly, the first eigenvalue for forward rates captures the dominant pattern of variation in forward rate movements. The first principal component typically represents a common factor affecting all forward rates simultaneously, reflecting market expectations about future interest rate changes.

The percentage of total variance explained by the first eigenvalue indicates how much of the interest rate risk can be hedged by managing exposure to this single dominant factor.

In [None]:
# Summary statistics
print("="*80)
print("SUMMARY - Principal Component Analysis")
print("="*80)

print("\nYIELD RATES:")
print(f"  First eigenvalue: {eigenvalues_yield[0]:.6e}")
print(f"  Variance explained: {eigenvalues_yield[0]/eigenvalues_yield.sum()*100:.2f}%")
print(f"  First eigenvector: {eigenvectors_yield[:, 0]}")
print(f"  Interpretation: All components have {'same' if np.all(eigenvectors_yield[:, 0] > 0) or np.all(eigenvectors_yield[:, 0] < 0) else 'mixed'} sign")
print(f"                  → Represents {'parallel shift' if np.all(eigenvectors_yield[:, 0] > 0) or np.all(eigenvectors_yield[:, 0] < 0) else 'complex movement'}")

print("\nFORWARD RATES:")
print(f"  First eigenvalue: {eigenvalues_forward[0]:.6e}")
print(f"  Variance explained: {eigenvalues_forward[0]/eigenvalues_forward.sum()*100:.2f}%")
print(f"  First eigenvector: {eigenvectors_forward[:, 0]}")
print(f"  Interpretation: All components have {'same' if np.all(eigenvectors_forward[:, 0] > 0) or np.all(eigenvectors_forward[:, 0] < 0) else 'mixed'} sign")
print(f"                  → Represents {'parallel shift' if np.all(eigenvectors_forward[:, 0] > 0) or np.all(eigenvectors_forward[:, 0] < 0) else 'complex movement'}")

print("\n" + "="*80)

---
## Summary and Export Results

Export all calculated data to Excel for reference.

In [None]:
# Create Excel file with all results
with pd.ExcelWriter('APM466_Assignment1_Results.xlsx', engine='openpyxl') as writer:
    # YTM data
    ytm_export = ytm_df.drop(columns=['years_to_maturity'])
    ytm_export.index.name = 'ISIN'
    ytm_export.to_excel(writer, sheet_name='YTM')
    
    # Spot rates (from first date as example)
    spot_export_data = {}
    for date_col in date_columns:
        spot_export_data[date_col] = spot_data[date_col]['rates']
    spot_export = pd.DataFrame(spot_export_data, 
                               index=[f'{m:.2f}yr' for m in spot_data[date_columns[0]]['maturities']])
    spot_export.index.name = 'Maturity'
    spot_export.to_excel(writer, sheet_name='Spot Rates')
    
    # Forward rates
    forward_export_data = {}
    for date_col in date_columns:
        forward_export_data[date_col] = forward_data[date_col]['rates']
    forward_export = pd.DataFrame(forward_export_data,
                                  index=forward_labels_display)
    forward_export.index.name = 'Forward Rate'
    forward_export.to_excel(writer, sheet_name='Forward Rates')
    
    # Covariance matrices
    cov_df_yield.to_excel(writer, sheet_name='Cov Matrix Yields')
    cov_df_forward.to_excel(writer, sheet_name='Cov Matrix Forwards')
    
    # Eigenvalues
    eigen_yield_df = pd.DataFrame({
        'Eigenvalue': eigenvalues_yield,
        'Variance %': eigenvalues_yield / eigenvalues_yield.sum() * 100
    }, index=[f'PC{i+1}' for i in range(len(eigenvalues_yield))])
    eigen_yield_df.to_excel(writer, sheet_name='Eigenvalues Yields')
    
    eigen_forward_df = pd.DataFrame({
        'Eigenvalue': eigenvalues_forward,
        'Variance %': eigenvalues_forward / eigenvalues_forward.sum() * 100
    }, index=[f'PC{i+1}' for i in range(len(eigenvalues_forward))])
    eigen_forward_df.to_excel(writer, sheet_name='Eigenvalues Forwards')
    
    # Eigenvectors
    eigenvec_df_yield.to_excel(writer, sheet_name='Eigenvectors Yields')
    eigenvec_df_forward.to_excel(writer, sheet_name='Eigenvectors Forwards')

print("✓ All results exported to 'APM466_Assignment1_Results.xlsx'")
print("\nSheets created:")
print("  1. YTM - Yield to maturity for all bonds and dates")
print("  2. Spot Rates - Bootstrapped spot rates")
print("  3. Forward Rates - 1-year forward rates")
print("  4. Cov Matrix Yields - Covariance matrix of yield log-returns")
print("  5. Cov Matrix Forwards - Covariance matrix of forward log-returns")
print("  6. Eigenvalues Yields - Eigenvalues and variance explained")
print("  7. Eigenvalues Forwards - Eigenvalues and variance explained")
print("  8. Eigenvectors Yields - Principal components for yields")
print("  9. Eigenvectors Forwards - Principal components for forwards")

---
## Conclusion

This notebook has completed all empirical questions (4, 5, 6) for APM466 Assignment 1:

### Question 4:
- **4a**: Calculated YTM for all 10 bonds across 11 trading days and plotted yield curves
- **4b**: Derived spot curves using bootstrapping method with detailed pseudo-code
- **4c**: Calculated 1-year forward curves with detailed pseudo-code

### Question 5:
- Calculated covariance matrices for:
  - Daily log-returns of yield rates (5×5 matrix)
  - Daily log-returns of forward rates (4×4 matrix)

### Question 6:
- Computed eigenvalues and eigenvectors for both covariance matrices
- Interpreted the first principal component as representing parallel shifts in rates

### Generated Files:
1. `ytm_curves.png` - Yield curves plot
2. `spot_curves.png` - Spot curves plot
3. `forward_curves.png` - Forward curves plot
4. `covariance_matrices.png` - Heatmaps of covariance matrices
5. `pca_components.png` - Principal components visualization
6. `APM466_Assignment1_Results.xlsx` - All numerical results