# Canadian Bond Day Count Arbitrage - Data Exploration

This notebook explores the Canadian Government bond universe to identify
potential arbitrage opportunities arising from day count convention mismatches.

## Objectives
1. Load and explore Canadian Government bond data
2. Analyze coupon period distributions
3. Identify bonds with 181 or 182-day coupon periods
4. Visualize the opportunity landscape

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

from data_acquisition import CanadianBondDataAcquisition
from feature_engineering import CanadianBondFeatureEngineering

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
pd.set_option('display.float_format', '{:.4f}'.format)

# Plotting style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

## 1. Data Acquisition

In [None]:
# Initialize data acquirer
acquirer = CanadianBondDataAcquisition('../config.yaml')

# Get Canadian Government bonds
bonds = acquirer.get_canadian_government_bonds()

print(f"Total bonds retrieved: {len(bonds)}")
bonds.head()

## 2. Feature Engineering

In [None]:
# Get detailed bond data
if len(bonds) > 0:
    bond_details = acquirer.get_bond_details(bonds['identifier'].tolist())
    
    # Engineer features
    fe = CanadianBondFeatureEngineering('../config.yaml')
    features = fe.engineer_features(bond_details)
    
    print(f"Bonds with features: {len(features)}")
    features.head()

## 3. Coupon Period Distribution Analysis

In [None]:
# Analyze coupon period length distribution
if len(features) > 0:
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))
    
    # Histogram
    features['coupon_period_length'].hist(bins=30, ax=axes[0], edgecolor='black')
    axes[0].set_xlabel('Coupon Period Length (days)')
    axes[0].set_ylabel('Frequency')
    axes[0].set_title('Distribution of Coupon Period Lengths')
    axes[0].axvline(181, color='red', linestyle='--', label='181 days')
    axes[0].axvline(182, color='orange', linestyle='--', label='182 days')
    axes[0].legend()
    
    # Value counts
    period_counts = features['coupon_period_length'].value_counts().sort_index()
    period_counts.plot(kind='bar', ax=axes[1], color='steelblue')
    axes[1].set_xlabel('Coupon Period Length (days)')
    axes[1].set_ylabel('Count')
    axes[1].set_title('Coupon Period Length Frequency')
    axes[1].tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.show()
    
    print("\nCoupon Period Length Statistics:")
    print(features['coupon_period_length'].describe())

## 4. Target Period Identification (181/182 days)

In [None]:
# Filter for target periods
if len(features) > 0:
    target_periods = features[features['is_target_period'] == True]
    
    print(f"\nBonds with 181 or 182-day coupon periods: {len(target_periods)}")
    print(f"Percentage of universe: {len(target_periods)/len(features)*100:.2f}%")
    
    if len(target_periods) > 0:
        print("\nBreakdown by period length:")
        print(target_periods['coupon_period_length'].value_counts())
        
        # Display target bonds
        display_cols = [
            'identifier', 'CPN', 'coupon_period_length', 
            'days_to_next_coupon', 'arbitrage_profit_bps'
        ]
        print("\nTarget Period Bonds:")
        print(target_periods[display_cols])

## 5. Arbitrage Profit Analysis

In [None]:
# Analyze theoretical arbitrage profits
if len(features) > 0 and 'arbitrage_profit_bps' in features.columns:
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # 1. Arbitrage profit distribution
    features['arbitrage_profit_bps'].hist(bins=50, ax=axes[0, 0], edgecolor='black')
    axes[0, 0].set_xlabel('Arbitrage Profit (bps)')
    axes[0, 0].set_ylabel('Frequency')
    axes[0, 0].set_title('Distribution of Arbitrage Profits')
    
    # 2. Profit by coupon period length
    features.boxplot(column='arbitrage_profit_bps', by='coupon_period_length', ax=axes[0, 1])
    axes[0, 1].set_xlabel('Coupon Period Length (days)')
    axes[0, 1].set_ylabel('Arbitrage Profit (bps)')
    axes[0, 1].set_title('Arbitrage Profit by Period Length')
    
    # 3. Profit vs days to coupon
    axes[1, 0].scatter(features['days_to_next_coupon'], 
                      features['arbitrage_profit_bps'], 
                      alpha=0.5, c=features['coupon_period_length'])
    axes[1, 0].set_xlabel('Days to Next Coupon')
    axes[1, 0].set_ylabel('Arbitrage Profit (bps)')
    axes[1, 0].set_title('Profit vs Days to Coupon')
    
    # 4. Target periods highlight
    target = features[features['is_target_period']]
    non_target = features[~features['is_target_period']]
    
    axes[1, 1].scatter(non_target['days_to_next_coupon'], 
                      non_target['arbitrage_profit_bps'], 
                      alpha=0.3, label='Non-target', color='gray')
    axes[1, 1].scatter(target['days_to_next_coupon'], 
                      target['arbitrage_profit_bps'], 
                      alpha=0.8, label='Target (181/182)', color='red')
    axes[1, 1].set_xlabel('Days to Next Coupon')
    axes[1, 1].set_ylabel('Arbitrage Profit (bps)')
    axes[1, 1].set_title('Target vs Non-Target Periods')
    axes[1, 1].legend()
    
    plt.tight_layout()
    plt.show()
    
    print("\nArbitrage Profit Statistics:")
    print("All bonds:")
    print(features['arbitrage_profit_bps'].describe())
    
    if len(target) > 0:
        print("\nTarget periods (181/182 days):")
        print(target['arbitrage_profit_bps'].describe())

## 6. Entry Window Analysis

In [None]:
# Analyze bonds in entry window
if len(features) > 0:
    in_window = features[features['in_entry_window'] == True]
    
    print(f"\nBonds in entry window: {len(in_window)}")
    
    if len(in_window) > 0:
        # Plot entry window distribution
        fig, ax = plt.subplots(figsize=(12, 6))
        
        in_window.groupby('days_to_next_coupon')['arbitrage_profit_bps'].mean().plot(
            kind='bar', ax=ax, color='green', alpha=0.7
        )
        ax.set_xlabel('Days to Next Coupon')
        ax.set_ylabel('Average Arbitrage Profit (bps)')
        ax.set_title('Average Profit by Days to Coupon (Entry Window Only)')
        plt.tight_layout()
        plt.show()

## 7. Summary Statistics

In [None]:
# Generate comprehensive summary
if len(features) > 0:
    summary = {
        'Total Bonds': len(features),
        'Target Period Bonds (181/182)': len(features[features['is_target_period']]),
        'Bonds in Entry Window': len(features[features['in_entry_window']]),
        'Avg Arbitrage Profit (all)': features['arbitrage_profit_bps'].mean(),
        'Avg Arbitrage Profit (target)': features[features['is_target_period']]['arbitrage_profit_bps'].mean(),
        'Max Arbitrage Profit': features['arbitrage_profit_bps'].max(),
        'Min Coupon Period': features['coupon_period_length'].min(),
        'Max Coupon Period': features['coupon_period_length'].max(),
    }
    
    print("\n" + "="*60)
    print("BOND UNIVERSE SUMMARY")
    print("="*60)
    for key, value in summary.items():
        print(f"{key:40s}: {value}")
    print("="*60)

## 8. Save Processed Data

In [None]:
# Save engineered features for further analysis
if len(features) > 0:
    features.to_csv('../data/bond_features.csv', index=False)
    print("\nâœ… Saved bond features to ../data/bond_features.csv")