In [1]:
from dotenv import load_dotenv
import pandas as pd
import numpy as np
from polygon import RESTClient
from datetime import datetime, timedelta
import pandas_market_calendars as mcal
import polygon
import os, pickle
from scipy.optimize import minimize
from nelson_siegel_svensson.calibrate import calibrate_nss_ols

In [2]:
datetime_diff = lambda date1, date2 : (datetime.strptime(date1, '%Y-%m-%d') - datetime.strptime(date2, '%Y-%m-%d')).days

### Data Collection

In [3]:
load_dotenv("/Users/brad/mlprojects/guidelight/guidelight-api/.env")
token = os.getenv("POLYGON_TOKEN")

In [4]:
client = RESTClient(api_key=token)

In [5]:
import pickle
# pickle.dump(data, open("options_data/AAPL-2023-03-27.pkl", "wb"))
if not os.path.exists("all_contracts.pkl"):
	reqs = client.list_options_contracts("AAPL",as_of="2024-04-16", expired=True, expiration_date_gt="2023-04-16")
	all_contracts = list(reqs)
	pickle.dump(all_contracts, open("all_contracts.pkl", "wb"))
else:
	all_contracts = pickle.load(open("all_contracts.pkl", "rb"))

In [6]:
len(all_contracts)

6164

In [7]:
import pandas as pd

# data = {
#     ('AAPL_2207C00500000', '2023-07-21'): [
#         {'trading_date': '2023-07-01', 'strike': 500, 'open': 15.5, 'high': 16.0, 'low': 15.0, 'close': 15.75, 'volume': 300},
#         {'trading_date': '2023-07-02', 'strike': 500, 'open': 15.6, 'high': 17.0, 'low': 15.2, 'close': 16.50, 'volume': 350}
#     ],
#     ('MSFT_2207P00250000', '2023-07-21'): [
#         {'trading_date': '2023-07-01', 'strike': 250, 'open': 8.5, 'high': 9.0, 'low': 8.0, 'close': 8.75, 'volume': 200},
#         {'trading_date': '2023-07-02', 'strike': 250, 'open': 8.6, 'high': 10.0, 'low': 8.1, 'close': 9.75, 'volume': 250}
#     ]
# }
if not os.path.exists("data.pkl"):
    indices = [(contract.ticker, contract.expiration_date, contract.strike_price) for contract in all_contracts]
    data = {}
    for index in indices:
        ticker, expiration_date, strike_price = index
        current_date = datetime.strptime(expiration_date, "%Y-%m-%d")
        past_date = current_date - timedelta(days=14)

        # get key value data for each agg

        a = [vars(agg) for agg in client.get_aggs(ticker, 1, 'day', past_date, current_date)]
        data[index] = a


    pickle.dump(data, open("data.pkl", "wb"))

else:
    data = pickle.load(open("data.pkl", "rb"))

In [9]:

# dataset = pd.DataFrame(columns=['Weight', 'price', 'maturity', 'S'])
# dataset['Weight'] = df['volume'] / df.volume.sum()
# dataset['price']  = df['price']
# dataset['maturity'] = df['maturity']
# dataset['S']        = df['strike_price']
# dataset['time_from_last_trade'] = -df['days since last trade']
# dataset

In [8]:
# Flatten the data while preserving the option ticker and expiration date
flattened_data = []
for (ticker, expiration, strike_price), entries in data.items():
    for entry in entries:
        entry.update({
            "ticker": ticker,
            "expiration_date": expiration,
            "strike_price": strike_price
        })
        flattened_data.append(entry)

# Create a DataFrame
df = pd.DataFrame(flattened_data)

# Set a MultiIndex using the ticker, expiration date, and trading date
df.set_index(['ticker', "strike_price", 'expiration_date'], inplace=True)

# get by ticker
# 1681099200000
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms').dt.strftime("%Y-%m-%d")
# df.index = df.index.set_levels(pd.to_datetime(df.index.get_level_values('timestamp'), unit='ms').strftime('%Y-%m-%d %H:%M:%S'), level='timestamp')

In [9]:
df.to_csv("options_data-aapl.csv", index_label=['ticker', "strike_price", 'expiration_date'])

In [10]:
df2 = pd.read_csv('options_data-aapl.csv', index_col=[0, 1, 2])
# get only the tickers traded on 2023-04-10
# df2.groupby('timestamp').filter(lambda x: x['timestamp']== '2023-04-10')
# get the level values

agg_series= df.loc[('O:AAPL230421C00050000', slice(None), slice(None))]
target_timestamp = '2023-04-19'

# Create a boolean mask for rows where the timestamp matches the target
i = np.where(agg_series['timestamp'].values == target_timestamp)[0][0]
# agg_series
diff = datetime_diff(agg_series['timestamp'].iloc[i], agg_series['timestamp'].iloc[i-1])
diff

9

In [11]:
def daily_option_data(underlying_ticker:str, date:str):
	# client = RESTClient(api_key=token)
	if os.path.exists(f"options_data/{underlying_ticker}-{date}.csv"):
		return pd.read_csv(f"options_data/{underlying_ticker}-{date}.csv")

	df = pd.read_csv(f'options_data-{underlying_ticker}.csv', index_col=[0, 1, 2])
	option_contracts = df.loc[df['timestamp'] == date]
	option_contracts.reset_index(inplace=True)
	# print(option_contracts)
	colnames = ["ticker", "maturity", "Weight", 'price', 'days since last trade', 'strike', 'S']
	volsurface = pd.DataFrame(columns=colnames)

	for ticker in option_contracts['ticker'].unique():
		agg_series= df.loc[(ticker, slice(None), slice(None))]
		i = np.where(agg_series['timestamp'].values == date)[0][0]
	# agg_series
		if i <= 0:
			continue

		diff = datetime_diff(agg_series['timestamp'].iloc[i], agg_series['timestamp'].iloc[i-1])
		if diff <= 3:
			expiration_date = agg_series.index.get_level_values(1).unique()[0]
			row = pd.DataFrame({
				'ticker': ticker,
				'maturity': datetime_diff(expiration_date, date) / 365,
				'price': agg_series["vwap"].values[i],
				'Weight': agg_series["volume"].values[i] / agg_series["volume"].sum(),
				'days since last trade': diff,
				'strike': agg_series.index.get_level_values(0).unique()[0],
				'S': agg_series['open'].values[i]
			}, columns=colnames, index=[0])

			volsurface = pd.concat([volsurface, row], ignore_index=True)
			

	
	volsurface.to_csv( os.path.join(os.getcwd(), f"options_data/{underlying_ticker}-{date}.csv"))
	return volsurface

In [12]:
cleaned = daily_option_data("AAPL", "2023-04-18")

In [13]:
cleaned

Unnamed: 0.1,Unnamed: 0,ticker,maturity,Weight,price,days since last trade,strike,S
0,0,O:AAPL230421C00100000,0.008219,0.012232,66.7050,1,100.0,67.00
1,1,O:AAPL230421C00115000,0.008219,0.217021,51.6200,1,115.0,51.62
2,2,O:AAPL230421C00120000,0.008219,0.011641,46.5000,1,120.0,46.50
3,3,O:AAPL230421C00125000,0.008219,0.094000,41.2028,1,125.0,41.35
4,4,O:AAPL230421C00130000,0.008219,0.061602,36.0035,1,130.0,35.37
...,...,...,...,...,...,...,...,...
110,110,O:AAPL230428P00170000,0.027397,0.042426,4.3512,1,170.0,4.70
111,111,O:AAPL230428P00172500,0.027397,0.026523,6.2053,1,172.5,5.85
112,112,O:AAPL230428P00175000,0.027397,0.002534,8.2420,1,175.0,8.00
113,113,O:AAPL230428P00177500,0.027397,0.010563,12.2000,1,177.5,12.55


# Multiprocessing 

In [18]:
import pandas as pd
from multiprocessing import Pool
import os

def worker(date, underlying_ticker):
    return daily_option_data(underlying_ticker, date)


def process_multiple_days(underlying_ticker, start_date, end_date):
    # Generate list of dates
    dates = mcal.get_calendar("NYSE").valid_days(start_date=start_date, end_date=end_date)
    
    # Define a helper to wrap your existing function for use with starmap
    
    dataset = [daily_option_data(underlying_ticker, timestamp.date().strftime("%Y-%m-%d")) for timestamp in dates]
    # Setup multiprocessing pool
    with Pool() as pool:
        pool.starmap(worker, dataset)

    # print("Data processing complete for all specified dates.")
    

    # return dataset

dfs = process_multiple_days('AAPL', '2023-05-01', '2023-05-14')
dfs

  volsurface = pd.concat([volsurface, row], ignore_index=True)
  volsurface = pd.concat([volsurface, row], ignore_index=True)
  volsurface = pd.concat([volsurface, row], ignore_index=True)
  volsurface = pd.concat([volsurface, row], ignore_index=True)
  volsurface = pd.concat([volsurface, row], ignore_index=True)
  volsurface = pd.concat([volsurface, row], ignore_index=True)
  volsurface = pd.concat([volsurface, row], ignore_index=True)
  volsurface = pd.concat([volsurface, row], ignore_index=True)
  volsurface = pd.concat([volsurface, row], ignore_index=True)
  volsurface = pd.concat([volsurface, row], ignore_index=True)


[                    ticker  maturity    Weight    price days since last trade  \
 0    O:AAPL230505C00120000  0.010959  0.009412  49.3000                     3   
 1    O:AAPL230505C00130000  0.010959  0.005747  40.0200                     3   
 2    O:AAPL230505C00135000  0.010959  0.241176  34.6490                     3   
 3    O:AAPL230505C00140000  0.010959  0.055888  30.0039                     3   
 4    O:AAPL230505C00142000  0.010959  0.064327  27.4309                     3   
 ..                     ...       ...       ...      ...                   ...   
 126  O:AAPL230512P00167500  0.030137  0.014162   3.0838                     3   
 127  O:AAPL230512P00170000  0.030137  0.008740   3.9983                     3   
 128  O:AAPL230512P00172500  0.030137  0.002902   5.2753                     3   
 129  O:AAPL230512P00175000  0.030137  0.001630   6.7016                     3   
 130  O:AAPL230512P00177500  0.030137  0.000546   8.4900                     3   
 
      strike  

In [17]:
yield_rates = pd.read_csv("five-year-rates.csv")
d = datetime.strftime(datetime.strptime("2023-03-27", "%Y-%m-%d"), "%m/%d/%Y")
yields = yield_rates.loc[yield_rates["Date"]==d].values[:,1:].astype(np.float64).reshape(-1)
yields
# 

array([4.22, 4.47, 4.91, 4.9 , 4.86, 4.51, 3.94, 3.79, 3.59, 3.57, 3.53,
       3.9 , 3.77])

In [18]:
aapl_aggs.groupby("day")

NameError: name 'aapl_aggs' is not defined

In [None]:
daily_aggs =  [aapl_aggs.groupby('day').get_group(x) for x in aapl_aggs['day'].unique()]

In [None]:
hist_voilatilities = torch.empty(len(daily_aggs))
for i, day in enumerate(daily_aggs):
	hist_voilatilities[i] = estimate_historical_volatility(day['close'].values)

# calculate the historical volatility for each day





In [None]:
import pickle
from heston_param import *
hist_voilatilities = pickle.load(open("hist_voilatilities.pkl", "rb"))
daily_aggs = pickle.load(open("daily_aggs.pkl", "rb"))

In [None]:
while True:
	try:
		print(calibrate_daily_parameters(hist_voilatilities[0], 0.1, daily_aggs[0]["close"].values, 0.0237, daily_aggs[0]["close"].values.shape[0], 50))
		break
	except RuntimeError:
		continue

In [None]:
def single_day_calibration(args):
		i, hist_volatilities, daily_agg = args
		while True:
			try:
				params = calibrate_daily_parameters(hist_volatilities[i], 0.1, day["close"].values, 0.0237, day["close"].values.shape[0], 300)
				
				return i, params
			except RuntimeError:
				continue

In [None]:
len(daily_aggs)

### Naive Monte Carlo:
Runtime: 1.15 Hour. Suboptimal.

In [None]:
daily_params = np.empty((len(daily_aggs), 5))
for i, day in enumerate((daily_aggs[:20])):
	while True:
		try:
			daily_params[i] = calibrate_daily_parameters(hist_voilatilities[i], 0.1, daily_aggs[i]["close"].values, 0.0237, daily_aggs[i]["close"].values.shape[0], 50)
			if (i + 1) % 10 == 0:
					rate =  100 *  np.round((i + 1) /len(daily_params[:20]), 2)
					print(f"{rate}% completed.")
			
			break
		except RuntimeError:
			continue



In [None]:
def calibrate_worker(args):
	single_day_calibration(args)

In [None]:
daily_params = pickle.load(open("parameters.pkl", "rb"))

In [None]:
# runtime is 1.5 hours
daily_params.shape

In [None]:
from multiprocess import Pool

def calibrate_parameters_multiprocessing(daily_aggs, hist_volatilities):
    # Prepare arguments for each task
    tasks = [(i, hist_volatilities[i], daily_aggs[i]) for i in range(len(daily_aggs))]

    # Number of processes, could be set to the number of CPUs or cores
    num_processes = 4

    # Create a multiprocessing pool and map tasks to worker function
    with Pool(processes=num_processes) as pool:
        results = pool.map(calibrate_worker, tasks)

    # Process results
    daily_params = np.empty((len(daily_aggs[:20]), 5))
    for index, params in results:
        if params is not None:
            daily_params[index] = params
        else:
            print(f"Calibration failed for index {index}")

    return daily_params



In [None]:
calibrate_parameters_multiprocessing(daily_aggs[:20], hist_voilatilities[:20])

###  Simulation using FFT for the characteristic equations

https://medium.com/@alexander.tsoskounoglou/pricing-options-with-fourier-series-p3-the-heston-model-d157369a217a

In [19]:
def heston_char(u, params):
    kappa, theta, zeta, rho, v0, r, q, T, S0 = params 
    t0 = 0.0 ;  q = 0.0
    m = np.log(S0) + (r - q)*(T-t0)
    D = np.sqrt((rho*zeta*1j*u - kappa)**2 + zeta**2*(1j*u + u**2))
    C = (kappa - rho*zeta*1j*u - D) / (kappa - rho*zeta*1j*u + D)
    beta = ((kappa - rho*zeta*1j*u - D)*(1-np.exp(-D*(T-t0)))) / (zeta**2*(1-C*np.exp(-D*(T-t0))))
    alpha = ((kappa*theta)/(zeta**2))*((kappa - rho*zeta*1j*u - D)*(T-t0) - 2*np.log((1-C*np.exp(-D*(T-t0))/(1-C))))
    return np.exp(1j*u*m + alpha + beta*v0)

In [20]:
import numpy as np
from numpy import sqrt, exp, pi, cos, sin, log, abs
from numba import njit

@njit
def Fourier_Heston_Put(S0, K, T, r, 
                  # Heston Model Paramters
                  kappa, # Speed of the mean reversion 
                  theta, # Long term mean
                  rho,   # correlation between 2 random variables
                  zeta,  # Volatility of volatility
                  v0,    # Initial volatility 
                  opt_type,
                  N = 1_012,
                  z = 24
                  ):

  def heston_char(u): 
    t0 = 0.0 ;  q = 0.0
    m = log(S0) + (r - q)*(T-t0)
    D = sqrt((rho*zeta*1j*u - kappa)**2 + zeta**2*(1j*u + u**2))
    C = (kappa - rho*zeta*1j*u - D) / (kappa - rho*zeta*1j*u + D)
    beta = ((kappa - rho*zeta*1j*u - D)*(1-exp(-D*(T-t0)))) / (zeta**2*(1-C*exp(-D*(T-t0))))
    alpha = ((kappa*theta)/(zeta**2))*((kappa - rho*zeta*1j*u - D)*(T-t0) - 2*log((1-C*exp(-D*(T-t0))/(1-C))))
    return exp(1j*u*m + alpha + beta*v0)
  
  # # Parameters for the Function to make sure the approximations are correct.
  c1 = log(S0) + r*T - .5*theta*T
  c2 = theta/(8*kappa**3)*(-zeta**2*exp(-2*kappa*T) + 4*zeta*exp(-kappa*T)*(zeta-2*kappa*rho) 
        + 2*kappa*T*(4*kappa**2 + zeta**2 - 4*kappa*zeta*rho) + zeta*(8*kappa*rho - 3*zeta))
  a = c1 - z*sqrt(abs(c2))
  b = c1 + z*sqrt(abs(c2))
  
  h       = lambda n : (n*pi) / (b-a) 
  g_n     = lambda n : (exp(a) - (K/h(n))*sin(h(n)*(a - log(K))) - K*cos(h(n)*(a - log(K)))) / (1 + h(n)**2)
  g0      = K*(log(K) - a - 1) + exp(a)
  
  F = g0 
  for n in range(1, N+1):
    h_n = h(n)
    F += 2*heston_char(h_n) * exp(-1j*a*h_n) * g_n(n)

  F = exp(-r*T)/(b-a) * np.real(F)
  F = F if opt_type == 'p' else F + S0 - K*exp(-r*T)
  return F if F > 0 else 0




In [21]:
S0      = 100.      # initial asset price
K       = 50.       # strike
r       = 0.03      # risk free rate
T       = 1/365     # time to maturity

v0=0.4173 ; kappa=0.4352 ; theta=0.2982 ; zeta=1.3856 ; rho=-0.0304

In [22]:
import py_vollib_vectorized

price = 0.10 ; S = 95 ; K = 100 ; t = .2 ; r = .2 ; flag = 'c'

def implied_volatility(price, S, K, t, r, flag):
  return py_vollib_vectorized.vectorized_implied_volatility(
    price, S, K, t, r, flag, q=0.0, on_error='ignore', model='black_scholes_merton',return_as='numpy') 


In [None]:
print('Speed Analysis of the implied volatility Function: Per Option')
%timeit implied_volatility(price, S, K, t, r, flag)

Speed Analysis of the implied volatility Function: Per Option
432 µs ± 127 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [23]:
# %pip install pyFFTW
import pyfftw

In [24]:
import numpy as np
from numpy import exp, pi, log, sqrt
# from  numpy.fft import fft
import pyfftw
# @njit
def heston_fft2(S0, K, T, r, kappa, theta, rho, zeta, v0, opt_type, N=1024, alpha=1.5):
    eta = 0.25  # Grid spacing for the integration variable
    lambda_u = 2 * pi / (N * eta)  # Grid spacing for log strike

    # Adjustments for the damping factor
    alpha = alpha  # Damping factor, typically 1 or 1.5

    # Characteristic function for the Heston model as before
    def heston_char(u):
        t0 = 0.0 ; q = 0.0
        m = log(S0) + (r - q) * (T - t0)
        D = sqrt((rho * zeta * 1j * u - kappa) ** 2 + zeta ** 2 * (1j * u + u ** 2))
        C = (kappa - rho * zeta * 1j * u - D) / (kappa - rho * zeta * 1j * u + D)
        beta = ((kappa - rho * zeta * 1j * u - D) * (1 - exp(-D * (T - t0)))) / (zeta ** 2 * (1 - C * exp(-D * (T - t0))))
        alpha = ((kappa * theta) / (zeta ** 2)) * ((kappa - rho * zeta * 1j * u - D) * (T - t0) - 2 * log((1 - C * exp(-D * (T - t0))) / (1 - C)))
        return exp(1j * u * m + alpha + beta * v0)

    # Array of discretized u values (integration variable)
    u = np.arange(1, N) * eta
    u = np.hstack((np.array([0.000001]), u))  # Avoid division by zero in calculations

    # Weights for the integration
    weights = np.ones(N)
    weights[0] = 0.5  # Trapezoidal rule: first weight is 0.5
    weights = weights * eta

    # Damping factor applied to characteristic function
    adjusted_char_fn = exp(-r * T) * (heston_char(u - (alpha + 1) * 1j) / (alpha ** 2 + alpha - u ** 2 + 1j * (2 * alpha + 1) * u))

    # FFT calculation
    fft_object = pyfftw.builders.rfft(adjusted_char_fn.real * weights, threads=4)
    fft_values = fft_object()
    fft_values = fft_values[:N // 2]  # Only need the first half of the FFT output

    # Calculate strike prices corresponding to FFT output
    strikes = S0 * exp(-lambda_u * np.arange(N // 2))

    # Option prices
    prices = np.exp(-alpha * np.log(strikes)) / pi * fft_values.real

    # Find the index of the strike closest to K
    index = np.argmin(np.abs(strikes - K))
    price = prices[index]
    return price if opt_type == 'p' else price + S0 - K * exp(-r * T)

# Example call to function
price = heston_fft2(S0=100, K=100, T=1, r=0.05, kappa=0.2, theta=0.04, rho=-0.7, zeta=0.2, v0=0.04, opt_type='p')
                   


In [None]:
price

0.09904877056696768

In [None]:
Fourier_Heston_Put(S0=100, K=100, T=1, r=0.05, kappa=0.2, theta=0.04, rho=-0.7, zeta=0.2, v0=0.04, opt_type='p')

8.117640116356107

In [25]:
from nelson_siegel_svensson.calibrate import calibrate_nss_ols
yield_maturities = np.array([1/12, 2/12, 3/12, 4/12, 6/12, 1, 2, 3, 5, 7, 10, 20, 30])
# yields  = np.array([5.30,5.39,5.50,5.50,5.44,5.11,4.33,3.98,3.70,3.66,3.61,3.98,3.84])
# get the first row of the yield rates
yields = yield_rates.iloc[0].values[1:].astype(np.float64)
curve_fit, status = calibrate_nss_ols(yield_maturities,yields)
# yields

In [26]:
curve_fit, status

(NelsonSiegelSvenssonCurve(beta0=-5.345961485007193, beta1=10.920561543787652, beta2=11.074852298011132, beta3=-4.034323942085258, tau1=43.44535134546507, tau2=3.30260991718184),
   message: Desired error not necessarily achieved due to precision loss.
   success: False
    status: 2
       fun: 0.010378206853388534
         x: [ 4.345e+01  3.303e+00]
       nit: 31
       jac: [ 7.105e-07 -6.778e-05]
  hess_inv: [[ 3.425e+04 -2.641e+02]
             [-2.641e+02  2.338e+01]]
      nfev: 312
      njev: 100)

In [27]:
print('Speed Analysis of Fourier_Heston_Put: Per Option')
%timeit Fourier_Heston_Put(S0,K, T, r, kappa, theta, rho, zeta, v0, 'p', N = 1_012, z = 24)

Speed Analysis of Fourier_Heston_Put: Per Option
214 µs ± 59.9 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [28]:
def get_implied_volatility(price, S, K, t, r, flag):
    return py_vollib_vectorized.vectorized_implied_volatility(
        price, S, K, t, r, flag, q=0.0, on_error='ignore', model='black_scholes_merton',return_as='numpy') 

In [35]:
def SqErr(x, volSurface, _S, _K, _T, _r, _IV, _Weight):
    v0, kappa, theta, zeta, rho = x
    
    # Calculate prices using Heston Model
    Price_Heston = get_resutls_array_Heston(
        volSurface, v0, kappa, theta, zeta, rho, N=1_012, z=24
    )
    
    # Calculate implied volatilities
    IV_Heston = get_implied_volatility(
        price=Price_Heston, S=_S, K=_K, t=_T, r=_r, flag='p'
    )
    
    # Handle undefined IV calculations
    diff = IV_Heston - _IV
    idx = np.isnan(diff) | np.isinf(diff)
    diff[idx] = 0 - _IV[idx]
    IV_Heston[idx] = 0
    
    # Calculate RMSE
    rmse = sqrt(np.mean((diff * 100) ** 2 * _Weight))
    
    # Debugging info
    zeros = int(np.where(IV_Heston == 0, 1, 0).sum())
    wmae  = np.mean(np.abs(diff * 100) * _Weight)
    # TODO: heston debug 
    # print_debug_info(v0, kappa, theta, zeta, rho, wmae, idx, zeros, rmse)
    
    return rmse
def get_error_Heston(volSurface, v0, kappa, theta, zeta, rho):
    """Calculates the error between the Heston model and the market prices.
    Arguments:
        volSurface: DataFrame with the market prices.
        v0: Initial variance.
        kappa: Mean reversion speed.
        theta: Long-run variance.
        zeta: Volatility of volatility.
        rho: Correlation between the variance and the asset.
    """
    error = 0
    for _, row in volSurface.iterrows():
        P = row['price']
        HP = Fourier_Heston_Put(S0=row['S'], K=row['strike'], v0=v0, kappa=kappa, theta=theta, zeta=zeta, rho=rho, T=row['maturity'], r=row['rate'], N=2048)
        error += (P - HP)**2

    return error / volSurface.shape[0]

def get_resutls_array_Heston(volSurface, v0, kappa, theta, zeta, rho, N=10_000, z=64):
    # Initialize the results array
    results = -np.ones(volSurface.shape[0])
    # reset the index of the options dataframe
    volSurface.index = np.arange(0, volSurface.shape[0])
    # loop through the rows of the options dataframe and run the Fourier_Heston_Put function
    for idx, row in volSurface.iterrows():
        results[idx] = Fourier_Heston_Put(S0=int(row['S']), K=int(row['strike']), v0=v0, kappa=kappa, theta=theta, zeta=zeta, rho=rho, T=row['maturity'], r=row['rate'], N=N, opt_type='p',z=z)
    return results

def get_resutls_df_Heston(volSurface, v0, kappa, theta, zeta, rho, N=2048, z=100):
    observed = volSurface.copy(deep=True)
    heston = volSurface.copy(deep=True)
    observed['source'] = 'Observed'
    heston['source'] = 'Heston Model'

    heston_prices = [] 
    implied_volatilities = []
    for _, row in volSurface.iterrows():
        heston_price = Fourier_Heston_Put(S0=row['S'], K=row['strike'], v0=v0, kappa=kappa, theta=theta, zeta=zeta, rho=rho, T=row['maturity'], r=row['rate'], N=N, opt_type='p', z=z)
        heston_prices.append(heston_price)
        # np.array(... , ndmin=1) So the type of the input is compatible with what numba expects
        maturity  = np.array(row['maturity'],ndmin=1)
        observed_price  = np.array(heston_price,ndmin=1)
        S0 = np.array(row['S'],ndmin=1)
        K  = np.array(row['strike'],ndmin=1)
        r  = np.array(row['rate'],ndmin=1)
        implied_volatility = get_implied_volatility(price=observed_price, S=S0, K=K, t=maturity, r=r, flag=option_type)
        implied_volatilities.append(implied_volatility[0])

    heston['price'] = heston_prices
    heston['IV']    = implied_volatilities

    return pd.concat([observed, heston])

def get_error_df_Heston(volSurface, v0, kappa, theta, zeta, rho, diff='Price', error='Error', weighted=True, N=10_000, z=64):
    if   error == 'Error':          _name = f'Weighted Error {diff}'             if weighted else f'Error {diff}'
    elif error == 'Perc Error':     _name = f'Weighted Persentage Error {diff}'  if weighted else f'Persentage Error {diff}'
    elif error == 'Squared Error':  _name = f'Weighted Squared Error {diff}'     if weighted else f'Squared Error {diff}'
    else: raise Exception("Error: variable 'error' is not defined correctly")
    
    results_df = {'strike':[], 'maturity':[], _name:[], 'Opt. Type':[], 'Weight':[]}

    for _, row in volSurface.copy(deep=True).iterrows():
        _P = Fourier_Heston_Put(S0=row['S'], K=row['strike'], v0=v0, kappa=kappa, theta=theta, zeta=zeta, rho=rho, T=row['maturity'], r=row['rate'], N=N, z=z, opt_type=row['Type'])
        # np.array(... , ndmin=1) So the type of the input is compatible with what numba expects
        _T  = np.array(row['maturity'],ndmin=1)
        _C  = np.array(_P,ndmin=1)
        _P  = np.array(row['price'],ndmin=1)
        _S0 = np.array(row['S'],ndmin=1)
        _K  = np.array(row['strike'],ndmin=1)
        _r  = np.array(row['rate'],ndmin=1)

        _IV  = get_implied_volatility(price=_C, S=_S0, K=_K, t=_T, r=_r, flag='p')
        _IV2 = get_implied_volatility(price=row['price'], S=_S0, K=_K, t=_T, r=_r, flag='p')

        if error    == 'Error':
            if diff == 'IV':  _error  = (_IV - _IV2) *                (row['Weight'] if weighted else 1)
            else           :  _error  = (_C - _P) *                   (row['Weight'] if weighted else 1)
        elif error  == 'Perc Error':
            if diff == 'IV':  _error  = ((_IV - _IV2)/_IV2) * 100 *   (row['Weight'] if weighted else 1)
            else           :  _error  = ((_C - _P)/_P) * 100 *        (row['Weight'] if weighted else 1)
        elif error  == 'Squared Error':
            if diff == 'IV':  _error  = (_IV - _IV2)**2 *             (row['Weight'] if weighted else 1)
            else           :  _error  = (_C - _P)**2 *                (row['Weight'] if weighted else 1)

        results_df[_name].append(_error[0])
        results_df['maturity'].append(_T[0])
        results_df['strike'].append(_K[0])
        results_df['Weight'].append(row['Weight']*10)

    return pd.DataFrame(results_df)

In [37]:
import py_vollib_vectorized
def heston_volSurface(cleaned_df, yields):
 
    volSurface = cleaned_df.drop(columns=['days since last trade', 'ticker'])
    price = 0.10 ; S = 95 ; K = 100 ; t = .2 ; r = .2 ; flag = 'p'

    

    def implied_volatility(price, S, K, t, r, flag):
        return py_vollib_vectorized.vectorized_implied_volatility(
            price, S, K, t, r, flag, q=0.0, on_error='ignore', model='black_scholes_merton',return_as='numpy')

    yield_maturities = np.array([1/12, 2/12, 3/12, 4/12, 6/12, 1, 2, 3, 5, 7, 10, 20, 30])
    # yields  = np.array([5.30,5.39,5.50,5.50,5.44,5.11,4.33,3.98,3.70,3.66,3.61,3.98,3.84])
    # get the first row of the yield rates
    # TODO: set K's
    
    curve_fit, status = calibrate_nss_ols(yield_maturities,yields)
    volSurface['rate'] = volSurface['maturity'].apply(curve_fit) / 100
    volSurface['IV'] = implied_volatility(volSurface['price'], volSurface['S'], volSurface['strike'], volSurface['maturity'], volSurface['rate'], flag)
    return volSurface

def heston_daily_volSurface(underlying_ticker, date):
    cleaned = daily_option_data(underlying_ticker, date)
    yield_rates = pd.read_csv("five-year-rates.csv")
    d = datetime.strftime(datetime.strptime("2023-03-27", "%Y-%m-%d"), "%m/%d/%Y")
    yields = yield_rates.loc[yield_rates["Date"]==d].values[:,1:].astype(np.float64).reshape(-1)
    volSurface = heston_volSurface(cleaned, yields)
    return volSurface


def heston_parameters(VolSurface):
	# Extract data from dailyVolSurface DataFrame
    _K = VolSurface['strike'].to_numpy()

    _C = VolSurface['price'].to_numpy()
    _T      = VolSurface['maturity'].to_numpy()
    _r      = VolSurface['rate'].to_numpy()
    _S      = VolSurface['S'].to_numpy()
    _IV     = VolSurface['IV'].to_numpy()
    _Weight = VolSurface['Weight'].to_numpy()

    # Initial parameters and bounds for optimization
    params = {
        "v0": {"x0": 0.002874, "lbub": [1e-3, 1.2]},
        "kappa": {"x0": 1.6891, "lbub": [1e-3, 10]},
        "theta": {"x0": 0.0190, "lbub": [1e-3, 1.2]},
        "zeta": {"x0": 3.7472, "lbub": [1e-2, 4]},
        "rho": {"x0": -0.2731, "lbub": [-1, 1]}
    }
    x0 = [param["x0"] for _, param in params.items()]
    bnds = [param["lbub"] for _, param in params.items()]
    result = minimize(
    SqErr, x0, args=(VolSurface,  _S, _K, _T, _r, _IV, _Weight),  tol=1e-5, method='SLSQP',
    options={'maxiter': 80, 'ftol': 1e-5, 'disp': True},
    bounds=bnds, jac='3-point'
	)

    return result.x

def heston_day_params(underlying_ticker, date):
    volSurface = heston_daily_volSurface(underlying_ticker, date)
    return heston_parameters(volSurface)

In [39]:

heston_day_params('AAPL', "2023-04-18")

Optimization terminated successfully    (Exit mode 0)
            Current function value: 78.54496509870489
            Iterations: 25
            Function evaluations: 393
            Gradient evaluations: 25


array([ 0.00453845,  1.66746411,  0.02271885,  3.77448265, -0.37226964])

In [None]:
import numpy as np
from numpy import sqrt, exp, pi, cos, sin, log, abs
from numba import njit, prange

S0      = 100.      # initial asset price
K       = 50.       # strike
r       = 0.03      # risk free rate
T       = 1/365     # time to maturity

v0=0.4173 ; kappa=0.4352 ; theta=0.2982 ; zeta=1.3856 ; rho=-0.0304

@njit(parallel=True, fastmath=False, cache=True)
def Fourier_Heston_Put(S0, K, T, r, 
                    # Heston Model Paramters
                    kappa, # Speed of the mean reversion 
                    theta, # Long term mean
                    rho,   # correlation between 2 random variables
                    zeta,  # Volatility of volatility
                    v0,    # Initial volatility 
                    opt_type,
                    N = 10_012,
                    z = 24
                    ):

    def heston_char(u): 
        t0 = 0.0 ;  q = 0.0
        m = log(S0) + (r - q)*(T-t0)
        D = sqrt((rho*zeta*1j*u - kappa)**2 + zeta**2*(1j*u + u**2))
        C = (kappa - rho*zeta*1j*u - D) / (kappa - rho*zeta*1j*u + D)
        beta = ((kappa - rho*zeta*1j*u - D)*(1-exp(-D*(T-t0)))) / (zeta**2*(1-C*exp(-D*(T-t0))))
        alpha = ((kappa*theta)/(zeta**2))*((kappa - rho*zeta*1j*u - D)*(T-t0) - 2*log((1-C*exp(-D*(T-t0))/(1-C))))
        return exp(1j*u*m + alpha + beta*v0)
    
    c1 = log(S0) + r*T - .5*theta*T
    c2 = theta/(8*kappa**3)*(-zeta**2*exp(-2*kappa*T) + 4*zeta*exp(-kappa*T)*(zeta-2*kappa*rho) 
            + 2*kappa*T*(4*kappa**2 + zeta**2 - 4*kappa*zeta*rho) + zeta*(8*kappa*rho - 3*zeta))
    a = c1 - z*sqrt(abs(c2))
    b = c1 + z*sqrt(abs(c2))
    
    h       = lambda n : (n*pi) / (b-a) 
    g_n     = lambda n : (exp(a) - (K/h(n))*sin(h(n)*(a - log(K))) - K*cos(h(n)*(a - log(K)))) / (1 + h(n)**2)
    g0      = K*(log(K) - a - 1) + exp(a)
    
    F = g0 
    for n in prange(1, N+1):
        h_n = h(n)
        F += 2*heston_char(h_n) * exp(-1j*a*h_n) * g_n(n)

    F = exp(-r*T)/(b-a) * np.real(F)
    F = F if opt_type == 'p' else F + S0 - K*exp(-r*T)
    return F if F > 0 else 0

N = 1_012
print(f"Speed Analysis of Fourier_Heston_Put: Per Option. N = {N} : {Fourier_Heston_Put(S0,K, T, r, kappa, theta, rho, zeta, v0, 'p', N = N, z = 24)}")
%timeit Fourier_Heston_Put(S0,K, T, r, kappa, theta, rho, zeta, v0, 'p', N = N, z = 24)

Speed Analysis of Fourier_Heston_Put: Per Option. N = 1012 : 4.2574521354627604e-05
117 µs ± 12.7 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
