# 1. Data Exploration

This notebook explores the synthetic loan data and macro scenarios generated by the `privatecredit` package.

## Contents
1. Loan Tape Generation
2. Loan Characteristics
3. Macro Scenario Generation
4. Correlation Analysis

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

# Import privatecredit
from privatecredit.data import LoanTapeGenerator, MacroScenarioGenerator

## 1. Loan Tape Generation

Generate a synthetic loan portfolio with multiple asset classes.

In [None]:
# Initialize generator
loan_gen = LoanTapeGenerator(
    n_loans=1000,
    start_date='2020-01-01',
    end_date='2025-01-01',
    seed=42
)

# Generate loan tape and panel data
loans_df, panel_df = loan_gen.generate()

print(f"Loan tape shape: {loans_df.shape}")
print(f"Panel data shape: {panel_df.shape}")

In [None]:
# Examine loan tape structure
loans_df.head()

In [None]:
# Summary statistics
loans_df.describe()

## 2. Loan Characteristics

In [None]:
# Asset class distribution
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# Asset class
loans_df['asset_class'].value_counts().plot(kind='bar', ax=axes[0, 0], color='steelblue')
axes[0, 0].set_title('Asset Class Distribution')
axes[0, 0].set_xlabel('Asset Class')
axes[0, 0].set_ylabel('Count')

# Original balance distribution
loans_df['original_balance'].hist(bins=50, ax=axes[0, 1], color='steelblue', edgecolor='white')
axes[0, 1].set_title('Original Balance Distribution')
axes[0, 1].set_xlabel('Balance ($)')
axes[0, 1].set_ylabel('Frequency')

# Interest rate distribution
loans_df['interest_rate'].hist(bins=30, ax=axes[1, 0], color='steelblue', edgecolor='white')
axes[1, 0].set_title('Interest Rate Distribution')
axes[1, 0].set_xlabel('Rate (%)')
axes[1, 0].set_ylabel('Frequency')

# Credit score distribution
loans_df['credit_score'].hist(bins=30, ax=axes[1, 1], color='steelblue', edgecolor='white')
axes[1, 1].set_title('Credit Score Distribution')
axes[1, 1].set_xlabel('FICO Score')
axes[1, 1].set_ylabel('Frequency')

plt.tight_layout()
plt.show()

In [None]:
# Interest rate by asset class
fig, ax = plt.subplots(figsize=(10, 6))
loans_df.boxplot(column='interest_rate', by='asset_class', ax=ax)
ax.set_title('Interest Rate by Asset Class')
ax.set_xlabel('Asset Class')
ax.set_ylabel('Interest Rate (%)')
plt.suptitle('')  # Remove automatic title
plt.tight_layout()
plt.show()

## 3. Macro Scenario Generation

In [None]:
# Generate macro scenarios
macro_gen = MacroScenarioGenerator(
    n_months=60,
    start_date='2020-01-01',
    seed=42
)

# Generate all scenarios
scenarios = {}
for scenario_type in ['baseline', 'adverse', 'severely_adverse', 'stagflation']:
    scenarios[scenario_type] = macro_gen.generate_scenario(scenario_type)
    
# Display baseline scenario
scenarios['baseline'].head()

In [None]:
# Plot macro variables across scenarios
macro_vars = ['gdp_growth_yoy', 'unemployment_rate', 'inflation_rate', 'credit_spread_hy']
scenario_colors = {
    'baseline': 'green',
    'adverse': 'orange',
    'severely_adverse': 'red',
    'stagflation': 'purple'
}

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

for ax, var in zip(axes.flat, macro_vars):
    for name, df in scenarios.items():
        ax.plot(df['date'], df[var], label=name.replace('_', ' ').title(), 
                color=scenario_colors[name], linewidth=2)
    ax.set_title(var.replace('_', ' ').title())
    ax.set_xlabel('Date')
    ax.legend()
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 4. Correlation Analysis

In [None]:
# Correlation matrix for macro variables (baseline)
numeric_cols = ['gdp_growth_yoy', 'unemployment_rate', 'inflation_rate', 
                'policy_rate', 'yield_10y', 'credit_spread_ig', 
                'credit_spread_hy', 'property_price_index', 'equity_return']

corr_matrix = scenarios['baseline'][numeric_cols].corr()

fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='RdBu_r', center=0,
            square=True, ax=ax)
ax.set_title('Macro Variable Correlation Matrix (Baseline Scenario)')
plt.tight_layout()
plt.show()

In [None]:
# Loan characteristic correlations
loan_numeric = ['original_balance', 'interest_rate', 'term_months', 
                'credit_score', 'dti_ratio', 'ltv_ratio']

loan_corr = loans_df[loan_numeric].corr()

fig, ax = plt.subplots(figsize=(8, 6))
sns.heatmap(loan_corr, annot=True, fmt='.2f', cmap='RdBu_r', center=0,
            square=True, ax=ax)
ax.set_title('Loan Characteristic Correlations')
plt.tight_layout()
plt.show()

## Summary

This notebook demonstrated:
- Generation of synthetic loan tapes with realistic characteristics
- Four macro scenario types: baseline, adverse, severely adverse, stagflation
- Correlation structure of macro variables and loan features

**Next Steps:**
- Train Macro VAE on scenario data (Notebook 02)
- Analyze state transitions (Notebook 03)