# Imports

In [48]:
# From Imports
from math import log, sqrt, pi, exp
from scipy.stats import norm
from datetime import datetime, date
from pandas import DataFrame
from datetime import datetime
from scipy.optimize import minimize
from arch import arch_model
from sklearn.linear_model import LinearRegression

# Alias Imports
import numpy_financial as npf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns

# Imports
import math
import arch
import openpyxl 
import pprint

print("Import Complete")

Import Complete


# Importing Bond Trading Data

In [49]:
# Importing Excel data of 2Y and 10Y Tbills
tbill2Y = pd.read_excel("Treasury_Data_V1.xlsx", "T 4.625 02 28 25 Govt TRADES")
tbill10Y = pd.read_excel("Treasury_Data_V1.xlsx", "T 3.375 05 15 33 Govt TRADES")
#note, volume is traded in increments of $1000 USD at STGT Exchange
#note, need to clean 10Y data, bids / asks don't line up

# Making Trade Time, Trade Volume, and Ask Time into lists for 2Y Bond Data
dates2Y = list(tbill2Y["Trade Time"])
volumes2Y = list(tbill2Y["Trade Volume"])
asks2Y = list(tbill2Y["Ask Time"]) # Currently Not Used

print("Excel Data Imported")

Excel Data Imported


# Cleaning Bond Trading Data

In [50]:
# Cleaning Trade Time data
clean_dates2Y = []

# Going through the range and converting dates to Numpy datetime64s
for i in range(len(dates2Y)):
    date = np.datetime64(str(dates2Y[i].date()))

    # Removing all NaT values from the list
    if not np.isnat(date):

        # Adding all dates to our clean_dates2Y list
        clean_dates2Y.append(date)

    # If no date given, we make datetime max value for easy spotting
    else:
        clean_dates2Y.append(np.datetime64(datetime.max))

# Cleaning Ask Time data
clean_asks2Y = []

# Going through the range and converting dates to Numpy datetime64s
for i in range(len(asks2Y)):
    date = np.datetime64(str(asks2Y[i].date()))

    # Removing all NaT values from the list
    if not np.isnat(date):

        # Adding all dates to our clean_asks2Y list
        clean_asks2Y.append(date)

    # If no date given, we make datetime max value for easy spotting
    else:
        clean_dates2Y.append(np.datetime64(datetime.min))

print("Data cleaned")

Data cleaned


# Calculating Average Daily Volume

In [52]:
# Adding new Date object column to data
tbill2Y['Trade Time Date'] = clean_dates2Y
tbill2Y['Ask Time Date'] = clean_asks2Y

# Calculating the total volume on each specific day     
day_volumes = tbill2Y.groupby(['Trade Time Date'])['Trade Volume'].sum()

# Getting all the unique dates from Trade Time Date
unique_dates = tbill2Y['Trade Time Date'].unique()

# Creating Average Hourly Trading Volumes from Daily Volumes in Dictionary
average_volume_daily = {} #key is date, value is total_volume

# Creating dictionary around unique dates, and summed daily volumes
for day, vol in zip(unique_dates, day_volumes):
    
    # Calculate the average volume for the date and store it in the dictionary
    average_volume_daily[day] = vol / 12.0

# Splitting into lists of Dates, Volumes 
dates = average_volume_daily.keys()
hourly_volumes = average_volume_daily.values()

# Print Dates, Volumes
# print(dates)
# print(daily_volumes)

# Print the resulting dictionary
# pprint.pprint(average_volume_daily)

# Formatting Float in Pandas
pd.options.display.float_format = '{:.2f}'.format

# Print the resulting list
daily_volumes = pd.DataFrame()
daily_volumes['Trading Day'] = dates
daily_volumes['Hourly Volume'] = hourly_volumes
# daily_volumes

print("Daily Volumes")

# Simulating Price and Spread Process

In [165]:
#1) Use Cox-Ingersoll-Ross Model (CIR) model to simulate bond yield process (approximation of price process)
#2) Use GARCH model to simulate bid-ask spreads in one step ahead forecasts
#3) Z is simulated price, ask is Zu, bid is Zl, utilize to calculate daily portfolio value and loss
#4) Calculate compounded interest loss
#5) Add final portfolio loss to interest loss, print resuslts of all simulations
#OTHER NOTES#
#determine what k value we want to keep, assume constant k, $50B issue size

# (HELPER FUNCTION) Calculate instantaneous LP position value as a function of k, Zl (bid), Zu (ask), and Z (price)

In [166]:
def alpha_t(k, Z, Zl, Zu):
    if Zl < Z < Zu:
        xi = k * (Z**(1/2) - (Zl)**(1/2))
        yi = k * (Z**(-1/2) - Zu**(-1/2))
        alpha = xi + yi * Z
    elif Z < Zl:
        xi = 0
        yi = k * ((Zl)**(-1/2) - (Zu)**(-1/2))
        alpha = xi + yi * Z
    elif Z > Zu:
        xi = k * ((Zu)**(1/2) - (Zl)**(1/2))
        yi = 0
        alpha = xi + yi * Z
    return alpha

# Calculating Historical Midpoint between Bid and Ask

In [167]:
historical_bid_data = tbill2Y["Bid"]
historical_ask_data = tbill2Y["Ask"]
historical_midpoint = (historical_bid_data + historical_ask_data) / 2
historical_spread = abs(historical_bid_data - historical_ask_data)
# historical_midpoint

# (HELPER FUNCTION) Calculating Price to Yield

In [168]:
def price_to_yield(price, face_value, coupon_rate, coupon_frequency, current_date, maturity_date):
    time_to_maturity_days = (maturity_date - current_date).days
    time_to_maturity_hours = time_to_maturity_days * 24  # Convert to hours
    
    full_coupon_periods = int(time_to_maturity_hours / (365.0 * 24 / coupon_frequency))
    partial_coupon_period_hours = time_to_maturity_hours % (365.0 * 24 / coupon_frequency)
    
    cash_flows = [(coupon_rate * face_value) / coupon_frequency] * full_coupon_periods
    
    # Handle the partial coupon period
    if partial_coupon_period_hours > 0:
        partial_coupon_payment = (coupon_rate * face_value * partial_coupon_period_hours) / (365.0 * 24)
        cash_flows.append(partial_coupon_payment)
    
    cash_flows[-1] += face_value  # Add the face value at maturity
    yield_value = np.nan
    
    try:
        yield_value = npf.irr([-price] + cash_flows)
    except ValueError:
        pass
    
    return yield_value

# (HELPER FUNCTION) Estimate CIR model parameters using historical yields

In [169]:
def cir_likelihood(parameters, data):
    mu, sigma, kappa, theta = parameters
    dt = 1 / (252.0 * 12)  # Hourly data assumed
    n = len(data)
    log_likelihood = 0.0
    
    for i in range(1, n):
        Zl = data[i - 1]
        Z = data[i]
        Z_diff = Z - Zl
        gamma = np.sqrt(kappa**2 + 2 * sigma**2)
        
        log_likelihood += (
            -(n - 1) * (np.log(2 * gamma) - np.log(sigma))
            - (kappa + gamma) * Z_diff
            - 2 * np.log((2 * gamma * np.exp(kappa + gamma * dt)) / (2 * gamma + (kappa + gamma) * (np.exp(gamma * dt) - 1)))
        )
    
    return -log_likelihood

initial_parameters = [0.05, 0.1, 0.2, 0.03]  # Initial guesses for parameters
result = minimize(cir_likelihood, initial_parameters, args=(historical_midpoint,), method='L-BFGS-B')
mu, sigma, kappa, theta = result.x
print(result.x)

[ 0.05        1.05484352 -0.00940215  0.03      ]


# Simulate bond yields based on the estimated CIR parameters

In [170]:
T = 1  # Time to maturity, years
n_simulations = 1000  # Num of sims
n_periods = 252 * 12  # Num of hourly periods in a year
coupon_rate = 0.04625  
coupon_frequency = 2  

simulated_yields = np.zeros((n_simulations, n_periods + 1)) #init array

for i in range(n_simulations):
    dt = T / n_periods
    current_date = tbill2Y["Bid Time"][0] 
    maturity_date = current_date + pd.DateOffset(years=T)
    bond_price = historical_midpoint.iloc[0]  # Initial bond price at first midpoint value
    
    for j in range(1, len(historical_midpoint)):
        bond_yield = price_to_yield(bond_price, 1000, coupon_rate, coupon_frequency, current_date, maturity_date)
        Z_diff = bond_yield - historical_midpoint.iloc[j - 1]
        gamma = np.sqrt(kappa**2 + 2 * sigma**2)
        bond_yield += kappa * (theta - bond_yield) * dt + sigma * np.sqrt(bond_yield) * np.random.normal(0, np.sqrt(dt))  # CIR process
        bond_price = 1000 / ((1 + bond_yield / coupon_frequency) ** (coupon_frequency * (maturity_date - current_date).days / (365 * 24)))  # Convert yield to price
        simulated_yields[i, j] = bond_yield

# Train a volume model based on historical volume data 

In [None]:
historical_volume_data = daily_volumes

# Prepare data for training the volume model
data = pd.DataFrame({'Bond_Price': bond_prices[:, 1:].flatten(), 'Interest_Rate': simulated_interest_rates[:, 1:].flatten()})
X = data.values
y = historical_volume_data.values

# Train a linear regression model, replace process with more realistic simulation later
volume_model = LinearRegression()
volume_model.fit(X, y)

# Predict volumes for the simulated data
simulated_volumes = volume_model.predict(X).reshape(n_simulations, n_periods)

# Replacing Spread And Volume Simulation Code

In [62]:
#Estimating OU (Ornstein–Uhlenbeck) Process Paramters for Spread Simulation

# Historical spread data is used from above calculation

# Function to calculate the OU likelihood
def ou_likelihood(parameters, data):
    mean_reversion, vol, initial_spread_min = parameters
    dt = 1  # Time step (you can adjust this based on your data frequency)
    log_likelihood = 0.0
    spread = initial_spread_min
    
    for i in range(0, len(historical_spread)):
        spread_diff = historical_spread[i]
        log_likelihood += -0.5 * (spread_diff / vol) ** 2
        log_likelihood -= 0.5 * np.log(2 * np.pi * vol ** 2 * dt)
        spread += mean_reversion * (initial_spread_min - spread) * dt

    return -log_likelihood

# Initial parameter guesses
intial_mean_reversion = 0.1
intial_vol = 0.05
intial_min_arg = historical_spread[0]

# Packing them into parameter tupe
initial_parameters = (intial_mean_reversion, intial_vol, intial_min_arg)

# Minimize the negative log-likelihood to estimate OU parameters
result = minimize(ou_likelihood, initial_parameters, args=(historical_spread,), method='L-BFGS-B')

# Extract estimated parameters
mean_reversion, volatility, initial_spread = result.x

TypeError: float() argument must be a string or a real number, not 'ellipsis'

In [59]:
#Simulating Spreads with OU Process

n_simulations = 1000  # Number of simulations
n_periods = len(historical_spread)  # Number of periods to simulate

simulated_spreads = np.zeros((n_simulations, n_periods))

for i in range(n_simulations):
    spread = initial_spread
    for j in range(n_periods):
        spread += mean_reversion * (initial_spread - spread) * dt + volatility * np.sqrt(dt) * np.random.normal(0, 1)
        simulated_spreads[i, j] = spread

In [60]:
#Simulating Volume using VWAP and volume and Poisson Process

# Replace with volume data
historical_hourly_volume = [10, 15, 12, 9, 14, 11, 8, 13, 10, 16, 12, 9]

# Calculate the average trading rate (λ) from historical data
average_trading_rate = np.mean(historical_hourly_volume)

# Simulate trading volume using a Poisson process
simulated_hourly_volume = np.random.poisson(average_trading_rate, len(historical_hourly_volume))

# Adjust for daily variation (for example, increase trading rate during high-activity hours)
# Can customize this adjustment based on your market's characteristics
simulated_hourly_volume[0] *= 1.2  # Increase volume during market open
simulated_hourly_volume[-1] *= 1.2  # Increase volume during market close

# Normalize simulated volume to match historical data
simulated_hourly_volume *= sum(historical_hourly_volume) / sum(simulated_hourly_volume)

# Create a DataFrame to store the simulated volume data
simulated_data = pd.DataFrame({'Hourly_Volume': simulated_hourly_volume})

# Simulate VWAP prices based on historical VWAP data
# Assuming a simple linear relationship between volume and price
simulated_data['VWAP'] = np.interp(simulated_data['Hourly_Volume'],
                                   np.cumsum(historical_hourly_volume),
                                   historical_hourly_vwap)


UFuncTypeError: Cannot cast ufunc 'multiply' output from dtype('float64') to dtype('int64') with casting rule 'same_kind'

In [46]:
daily_profits = forecasted_spreads * simulated_volumes * fee_rate / 2  # Divide by 2 to account for bid and ask

total_daily_pnl = np.array(portfolio_losses) + np.array(daily_opportunity_cost) + daily_profits

NameError: name 'forecasted_spreads' is not defined

# Plot the LP PnL time series

In [47]:
plt.figure(figsize=(12, 6))
plt.plot(total_daily_pnl, label='LP PnL')
plt.title('Liquidity Provider (LP) PnL Over Time')
plt.xlabel('Day')
plt.ylabel('PnL')
plt.legend()
plt.grid(True)
plt.show()

NameError: name 'total_daily_pnl' is not defined

<Figure size 1200x600 with 0 Axes>

# Create a heatmap of LP PnL

In [None]:
plt.figure(figsize=(12, 6))
sns.heatmap(np.array([total_daily_pnl]), annot=True, cmap='coolwarm', fmt='.2f', cbar=True)
plt.title('Liquidity Provider (LP) PnL Heatmap')
plt.xlabel('Day')
plt.ylabel('Simulation')
plt.show()