In [3]:
pd.set_option('mode.chained_assignment', None)
import numpy as np
from scipy.optimize import linprog
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import gmean


def calculate_bond_returns(data):
    # Create a copy to avoid modifying original data
    df = data.copy()
    
    # Initialize returns column
    df['Returns'] = 0.0
    df['IsCouponDay'] = False
    
    # Extract numeric ID and ensure it's sorted
    df['Id'] = df['SecurityId'].str.extract(r'(\d+)').astype(int)
    df = df.sort_values(by=['Id'])  # Added Date to sorting
    
    for security in df['Id'].unique():
        # Create mask for current security
        mask = df['Id'] == security
        security_data = df[mask].copy()
        
        # Calculate previous day's values
        security_data['PrevDirtyPrice'] = security_data['DirtyPrice'].shift(1)
        security_data['PrevCleanPrice'] = security_data['CleanPrice'].shift(1)
        
        # Identify coupon days (when accrued interest is minimum for this security)
        min_accrued = security_data['AccruedInterest'].min()
        coupon_days = (security_data['AccruedInterest'] == min_accrued).astype(int)
        security_data['IsCouponDay'] = coupon_days
        
        # Calculate returns for coupon days
        coupon_returns = (
            (security_data['DirtyPrice'] - security_data['CleanPrice']) - 
            (security_data['PrevDirtyPrice'] - security_data['PrevCleanPrice'])
        ) / security_data['PrevDirtyPrice']
        
        # Calculate returns for non-coupon days
        price_returns = (
            security_data['DirtyPrice'] - security_data['PrevDirtyPrice']
        ) / security_data['PrevDirtyPrice']
        
        # Combine returns based on whether it's a coupon day
        security_data['Returns'] = np.where(coupon_days, coupon_returns, price_returns)
        
        # Update the main dataframe with calculated returns
        df.loc[mask, 'Returns'] = security_data['Returns']
        df.loc[mask, 'IsCouponDay'] = security_data['IsCouponDay']
    
    # Pivot the returns into a matrix
    returns_matrix = df.pivot(
        index='Date',
        columns='Id',
        values='Returns'
    )

    returns_matrix = returns_matrix.fillna(0)

    
    return returns_matrix

pd.set_option('mode.chained_assignment', None)
bond_data = pd.read_csv('bonds_w_exp_returns_final.csv')
bond_data['Date'] = pd.to_datetime(bond_data['Date'])
bond_data['DTS'] = bond_data['ModifiedDuration'] * bond_data['spread']
bond_data['transaction_cost'] = bond_data['BidAskSpread']
bond_data['OAS'] = pd.to_numeric(bond_data['spread'], errors='coerce')

returns_matrix = calculate_bond_returns(bond_data)

model = 'cVaR'
constraints = []
# Initial budget to invest ($100,000)
initialVal = 100000  

# Length of investment period 
investPeriod = 1

# Identify the tickers and the dates
tickers = bond_data['SecurityId'].unique()
dates = bond_data['Date'].unique()

n = len(tickers)   # Number of bonds
NoPeriods = len(dates) // investPeriod  

# Preallocate space for portfolio weights (x0 will track turnover)
x = np.zeros([n, NoPeriods])
x0 = np.zeros([n, NoPeriods])
weights0 = np.zeros(n)


lookback_window = 122  # Past 122 days
start_period = lookback_window // investPeriod  # Determine the first valid period

# Preallocate space for `currentVal` to match testing periods
currentVal = np.zeros(NoPeriods - start_period + 1)
currentVal[0] = initialVal  # Initialize with initial portfolio value

# Preallocate space for portfolio returns to match testing periods
portfolio_returns = np.zeros(NoPeriods - start_period)
turnover = np.zeros(NoPeriods - start_period)
transaction_costs = np.zeros(NoPeriods - start_period)

for i, period in enumerate(range(start_period, NoPeriods)):
    current_date = dates[period]  # The current day
    print(current_date)
    daily_real_return = returns_matrix.iloc[period].values
    print("period returns: ", daily_real_return)
    # Get the last 126 unique trading days (from the 'Date' column)
    end_date = current_date  # Current date is the last day
    start_date = dates[period - lookback_window]  # Start date is 126 days earlier

    # Filter data to only include the unique trading dates
    trading_days_in_range = bond_data[(bond_data['Date'] >= start_date) & (bond_data['Date'] <= end_date)]
    unique_trading_dates = trading_days_in_range['Date'].unique()

    if len(unique_trading_dates) < lookback_window:
        print(f"Skipping period {period}: not enough unique trading days.")
        continue
    
    # Use only the last 122 unique trading days
    last_122_trading_days = unique_trading_dates[-lookback_window:]

    # Filter the original data to keep only rows from the last 122 unique trading days
    rolling_window_data = bond_data[bond_data['Date'].isin(last_122_trading_days)]

    # Pivot to create a matrix of daily returns (rows: days, columns: bonds)
    daily_returns_matrix = rolling_window_data.pivot(index='Date', columns='SecurityId', values='ExpectedReturn')

    # Drop any columns (bonds) with missing data
    daily_returns_matrix = daily_returns_matrix.dropna(axis=1)

    # Check if there are enough scenarios (days) and bonds
    if daily_returns_matrix.shape[0] < lookback_window or daily_returns_matrix.shape[1] < len(tickers):
        print(f"Skipping period {period}: insufficient historical data for bonds.")
        continue

    current_bonds = bond_data[bond_data['Date'] == current_date]
    # Use the daily returns matrix as `scenario_returns`
    scenario_returns = daily_returns_matrix.values  # Convert to NumPy array
    weights = CVaR_optimization2(bond_data, scenario_returns, weights0 , alpha=0.95)
    print(weights)

    if weights is None:
        print("Model returned nothing for weights")
        continue

    # Debugging before the assignment to x[:, period]
    #print(f"Period {period}: Weights before assignment: {weights}")
    #print(f"Period {period}: x[:, {period}] before assignment: {x[:, period]}")

    # Perform the assignment
    x[:, period] = weights

    # Debugging after the assignment to x[:, period]
    #print(f"Period {period}: x[:, {period}] after assignment: {x[:, period]}")

    x0[:, period] = weights

    #print(f"Period {period}: x0[:, {period}] after assignment: {x0[:, period]}")

    # Portfolio calculations
    portfolio_return = np.sum(weights * daily_real_return)
    print("day: ", period, "return: ", portfolio_return)
    
    # Update `currentVal` using relative index `i`
    if not np.isnan(portfolio_return):  # Only update if the return is not NaN
        currentVal[i + 1] = currentVal[i] * (1 + portfolio_return)
    else:
        print(f"Skipping period {period}: missing data for portfolio return.")
    currentVal[i + 1] = currentVal[i] * (1 + portfolio_return)
    
    portfolio_returns[i] = portfolio_return

    # Turnover and transaction costs
    turnover_weights = np.abs(weights - weights0)
    turnover[i] = np.sum(turnover_weights) / 2
    transaction_costs[i] = np.sum(turnover_weights * current_bonds['BidAskSpread'].values)

    # Deduct transaction costs from portfolio value
    currentVal[i + 1] -= transaction_costs[i]

    weights0 = weights


# Calculate Sharpe ratio
excess_returns = portfolio_returns
SR = (gmean(excess_returns + 1) - 1) / excess_returns.std()

# Average turnover and cumulative transaction costs
avgTurnover = np.mean(turnover)
total_transaction_cost = np.sum(transaction_costs)

print("currenval; ", currentVal)

print('Sharpe ratio: ', SR)
print('Avg. turnover: ', avgTurnover)
print('Total transaction costs: ', total_transaction_cost)

# Plot Portfolio Value Evolution
plt.figure(figsize=(10, 6))
plt.plot(range(len(currentVal)), currentVal, label='Portfolio Value')
plt.title('Portfolio Value Evolution')
plt.xlabel('Periods')
plt.ylabel('Portfolio Value ($)')
plt.legend()
plt.grid(True)
plt.show()


 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]' has dtype incompatible with bool, please explicitly cast to a compatible dtype first.
  df.loc[mask, 'IsCouponDay'] = security_data['IsCouponDay']
