# Transaction Cost Analysis (TCA)
## Strategy Capacity and Cost Decomposition

This notebook analyzes:
- Implementation shortfall decomposition
- Slippage analysis
- Impact cost vs opportunity cost
- Strategy capacity curves
- Scalability assessment

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.config import *
from src.seven_evaluation_metrics import PnLCalculator, TCAAnalyzer, CapacityAnalyzer
from utils.plotting_utils import plot_capacity_curve, plot_pnl_attribution
from utils.metrics_utils import calculate_tca_metrics, calculate_implementation_shortfall

%matplotlib inline
plt.style.use('seaborn-v0_8-darkgrid')

## 1. Load Trade Execution Data

In [None]:
# Generate synthetic trade data for demonstration
n_trades = 200

trades_df = pd.DataFrame({
    'timestamp': pd.date_range('2025-01-01', periods=n_trades, freq='min'),
    'arrival_price': 100 + np.random.randn(n_trades) * 0.5,
    'execution_price': 100 + np.random.randn(n_trades) * 0.5,
    'quantity': np.random.randint(10, 100, n_trades),
    'side': np.random.choice(['B', 'S'], n_trades),
    'fee': np.random.rand(n_trades) * 2 + 1
})

print(f"Loaded {len(trades_df)} trades")
print(trades_df.head())

## 2. Calculate TCA Metrics

In [None]:
# Calculate slippage and costs
tca_df = calculate_tca_metrics(
    trades_df,
    arrival_price_col='arrival_price',
    execution_price_col='execution_price',
    side_col='side',
    quantity_col='quantity',
    fee_col='fee'
)

print("TCA Summary Statistics:")
print(tca_df[['slippage_bps', 'total_cost_bps']].describe())

## 3. Slippage Distribution

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Slippage histogram
ax1.hist(tca_df['slippage_bps'], bins=50, alpha=0.7, edgecolor='black')
ax1.axvline(tca_df['slippage_bps'].mean(), color='red', linestyle='--',
            label=f"Mean: {tca_df['slippage_bps'].mean():.2f} bps")
ax1.axvline(tca_df['slippage_bps'].median(), color='green', linestyle='--',
            label=f"Median: {tca_df['slippage_bps'].median():.2f} bps")
ax1.set_xlabel('Slippage (bps)')
ax1.set_ylabel('Frequency')
ax1.set_title('Slippage Distribution')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Total cost histogram
ax2.hist(tca_df['total_cost_bps'], bins=50, alpha=0.7, edgecolor='black', color='orange')
ax2.axvline(tca_df['total_cost_bps'].mean(), color='red', linestyle='--',
            label=f"Mean: {tca_df['total_cost_bps'].mean():.2f} bps")
ax2.set_xlabel('Total Cost (bps)')
ax2.set_ylabel('Frequency')
ax2.set_title('Total Cost Distribution')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 4. Implementation Shortfall Decomposition

In [None]:
# Calculate IS components
decision_price = 100.0
total_quantity = trades_df['quantity'].sum()

is_result = calculate_implementation_shortfall(
    trades_df,
    decision_price,
    'B',  # Buy side
    total_quantity
)

print("Implementation Shortfall Components:")
for key, value in is_result.items():
    print(f"{key}: {value:.2f}")

In [None]:
# Plot IS decomposition
is_components = {
    'Impact Cost': is_result['impact_cost'],
    'Fees': is_result['fees'],
    'Opportunity Cost': is_result['opportunity_cost']
}

fig = plot_pnl_attribution(
    is_components,
    title='Implementation Shortfall Decomposition'
)
plt.show()

## 5. Slippage Over Time

In [None]:
# Plot slippage time series
fig, ax = plt.subplots(figsize=(14, 6))

ax.plot(tca_df['timestamp'], tca_df['slippage_bps'], alpha=0.6, linewidth=0.5)
ax.plot(tca_df['timestamp'], tca_df['slippage_bps'].rolling(20).mean(),
        color='red', linewidth=2, label='20-period MA')
ax.axhline(0, color='black', linestyle='--', alpha=0.5)
ax.set_xlabel('Time')
ax.set_ylabel('Slippage (bps)')
ax.set_title('Slippage Over Time')
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 6. Strategy Capacity Analysis

In [None]:
# Simulate capacity at different scale levels
adv_percentages = np.array([0.1, 0.5, 1.0, 2.0, 5.0])

# Simulate degradation with scale
pnl_per_share = np.array([0.05, 0.048, 0.042, 0.035, 0.020])

capacity_df = pd.DataFrame({
    'adv_pct': adv_percentages,
    'pnl_per_share': pnl_per_share,
    'slippage_bps': adv_percentages * 0.5  # Slippage increases with size
})

print("Capacity Analysis:")
print(capacity_df)

In [None]:
# Plot capacity curve
fig = plot_capacity_curve(
    capacity_df['adv_pct'].values,
    capacity_df['pnl_per_share'].values,
    title='Strategy Capacity Curve'
)
plt.show()

## 7. Cost by Trade Size

In [None]:
# Analyze cost vs trade size
fig, ax = plt.subplots(figsize=(10, 6))

ax.scatter(tca_df['quantity'], tca_df['total_cost_bps'], alpha=0.5)
ax.set_xlabel('Trade Size')
ax.set_ylabel('Total Cost (bps)')
ax.set_title('Transaction Cost vs Trade Size')
ax.grid(True, alpha=0.3)

# Add trend line
z = np.polyfit(tca_df['quantity'], tca_df['total_cost_bps'], 1)
p = np.poly1d(z)
ax.plot(sorted(tca_df['quantity']), p(sorted(tca_df['quantity'])),
        'r--', linewidth=2, label='Trend')
ax.legend()

plt.tight_layout()
plt.show()

## 8. Summary and Recommendations

Key findings:
- Average slippage: X bps
- Average total cost: Y bps
- Strategy remains profitable up to Z% of ADV
- Recommended maximum position size: ...
- Cost optimization opportunities: ...