In [34]:
from dotenv import load_dotenv
import pandas as pd
import numpy as np
from polygon import RESTClient
from datetime import datetime,timezone
import os

### Data Collection

In [35]:
load_dotenv("/Users/brad/mlprojects/guidelight/guidelight-api/.env")
token = os.getenv("POLYGON_TOKEN")

In [36]:
client = RESTClient(api_key=token)

In [37]:
a = client.list_options_contracts(underlying_ticker="AAPL", as_of="2023-04-04", contract_type="put")
option_contracts =  list(a)

In [38]:
df = pd.DataFrame(columns=list(option_contracts[0].__dict__.keys()))
for contract in option_contracts:
	a = pd.DataFrame([contract.__dict__.values()], columns=contract.__dict__.keys())
	df = pd.concat([df, a])

df = df.drop(columns=["additional_underlyings", "correction"])
df

Unnamed: 0,cfi,contract_type,exercise_style,expiration_date,primary_exchange,shares_per_contract,strike_price,ticker,underlying_ticker
0,OPASPS,put,american,2023-04-06,BATO,100,50,O:AAPL230406P00050000,AAPL
0,OPASPS,put,american,2023-04-06,BATO,100,55,O:AAPL230406P00055000,AAPL
0,OPASPS,put,american,2023-04-06,BATO,100,60,O:AAPL230406P00060000,AAPL
0,OPASPS,put,american,2023-04-06,BATO,100,65,O:AAPL230406P00065000,AAPL
0,OPASPS,put,american,2023-04-06,BATO,100,70,O:AAPL230406P00070000,AAPL
...,...,...,...,...,...,...,...,...,...
0,OPASPS,put,american,2025-12-19,BATO,100,270,O:AAPL251219P00270000,AAPL
0,OPASPS,put,american,2025-12-19,BATO,100,280,O:AAPL251219P00280000,AAPL
0,OPASPS,put,american,2025-12-19,BATO,100,290,O:AAPL251219P00290000,AAPL
0,OPASPS,put,american,2025-12-19,BATO,100,300,O:AAPL251219P00300000,AAPL


In [71]:
yield_rates = pd.read_csv("five-year-rates.csv")

array([5.47, 5.5, 5.43, 5.41, 5.34, 5.05, 4.73, 4.54, 4.38, 4.39, 4.39,
       4.65, 4.54], dtype=object)

In [None]:
ticker = "AAPL"

# List Aggregates (Bars)
aggs = []
for a in client.list_aggs(ticker=ticker, multiplier=1, timespan="minute", from_="2022-01-01", to="2023-12-30", limit=50000, options=T):
    aggs.append(a)

print(len(aggs))



In [None]:
aggs[-1]

In [None]:
#unix millisecond timestamp to datetime
# from pytz import timezone

# Convert timestamp to datetime in UTC timezone
def ts_to_dt(timestamp):
    return datetime.fromtimestamp(timestamp/1000, tz=timezone.utc)

timestamps = np.vectorize(ts_to_dt)

# Example usage
a = np.array([a.timestamp for a in aggs])
times = timestamps(a)
aapl_aggs = pd.DataFrame({"day": [time.strftime("%Y-%m-%d") for time in times],
                            "timestamp": [time.strftime("%Y-%m-%d, %H:%M") for time in times],
                           "close": [a.close for a in aggs], 
                           "volume": [a.volume for a in aggs],
                           "low": [a.low for a in aggs],
                           "high": [a.high for a in aggs]})



In [None]:
from heston_param import *

In [None]:
aapl_aggs.groupby("day")

In [None]:
daily_aggs =  [aapl_aggs.groupby('day').get_group(x) for x in aapl_aggs['day'].unique()]

In [None]:
hist_voilatilities = torch.empty(len(daily_aggs))
for i, day in enumerate(daily_aggs):
	hist_voilatilities[i] = estimate_historical_volatility(day['close'].values)

# calculate the historical volatility for each day





In [None]:
import pickle
from heston_param import *
hist_voilatilities = pickle.load(open("hist_voilatilities.pkl", "rb"))
daily_aggs = pickle.load(open("daily_aggs.pkl", "rb"))

In [None]:
while True:
	try:
		print(calibrate_daily_parameters(hist_voilatilities[0], 0.1, daily_aggs[0]["close"].values, 0.0237, daily_aggs[0]["close"].values.shape[0], 50))
		break
	except RuntimeError:
		continue

In [None]:
def single_day_calibration(args):
		i, hist_volatilities, daily_agg = args
		while True:
			try:
				params = calibrate_daily_parameters(hist_volatilities[i], 0.1, day["close"].values, 0.0237, day["close"].values.shape[0], 300)
				
				return i, params
			except RuntimeError:
				continue

In [None]:
len(daily_aggs)

### Naive Monte Carlo:
Runtime: 1.15 Hour. Suboptimal.

In [None]:
daily_params = np.empty((len(daily_aggs), 5))
for i, day in enumerate((daily_aggs[:20])):
	while True:
		try:
			daily_params[i] = calibrate_daily_parameters(hist_voilatilities[i], 0.1, daily_aggs[i]["close"].values, 0.0237, daily_aggs[i]["close"].values.shape[0], 50)
			if (i + 1) % 10 == 0:
					rate =  100 *  np.round((i + 1) /len(daily_params[:20]), 2)
					print(f"{rate}% completed.")
			
			break
		except RuntimeError:
			continue



In [None]:
def calibrate_worker(args):
	single_day_calibration(args)

In [None]:
daily_params = pickle.load(open("parameters.pkl", "rb"))

In [None]:
# runtime is 1.5 hours
daily_params.shape

In [None]:
from multiprocess import Pool

def calibrate_parameters_multiprocessing(daily_aggs, hist_volatilities):
    # Prepare arguments for each task
    tasks = [(i, hist_volatilities[i], daily_aggs[i]) for i in range(len(daily_aggs))]

    # Number of processes, could be set to the number of CPUs or cores
    num_processes = 4

    # Create a multiprocessing pool and map tasks to worker function
    with Pool(processes=num_processes) as pool:
        results = pool.map(calibrate_worker, tasks)

    # Process results
    daily_params = np.empty((len(daily_aggs[:20]), 5))
    for index, params in results:
        if params is not None:
            daily_params[index] = params
        else:
            print(f"Calibration failed for index {index}")

    return daily_params



In [None]:
calibrate_parameters_multiprocessing(daily_aggs[:20], hist_voilatilities[:20])

https://medium.com/@alexander.tsoskounoglou/pricing-options-with-fourier-series-p3-the-heston-model-d157369a217a

In [None]:
def heston_char(u, params):
    kappa, theta, zeta, rho, v0, r, q, T, S0 = params 
    t0 = 0.0 ;  q = 0.0
    m = np.log(S0) + (r - q)*(T-t0)
    D = np.sqrt((rho*zeta*1j*u - kappa)**2 + zeta**2*(1j*u + u**2))
    C = (kappa - rho*zeta*1j*u - D) / (kappa - rho*zeta*1j*u + D)
    beta = ((kappa - rho*zeta*1j*u - D)*(1-np.exp(-D*(T-t0)))) / (zeta**2*(1-C*np.exp(-D*(T-t0))))
    alpha = ((kappa*theta)/(zeta**2))*((kappa - rho*zeta*1j*u - D)*(T-t0) - 2*np.log((1-C*np.exp(-D*(T-t0))/(1-C))))
    return np.exp(1j*u*m + alpha + beta*v0)

In [1]:
import numpy as np
from numpy import sqrt, exp, pi, cos, sin, log, abs
from numba import njit

@njit
def Fourier_Heston_Put(S0, K, T, r, 
                  # Heston Model Paramters
                  kappa, # Speed of the mean reversion 
                  theta, # Long term mean
                  rho,   # correlation between 2 random variables
                  zeta,  # Volatility of volatility
                  v0,    # Initial volatility 
                  opt_type,
                  N = 1_012,
                  z = 24
                  ):

  def heston_char(u): 
    t0 = 0.0 ;  q = 0.0
    m = log(S0) + (r - q)*(T-t0)
    D = sqrt((rho*zeta*1j*u - kappa)**2 + zeta**2*(1j*u + u**2))
    C = (kappa - rho*zeta*1j*u - D) / (kappa - rho*zeta*1j*u + D)
    beta = ((kappa - rho*zeta*1j*u - D)*(1-exp(-D*(T-t0)))) / (zeta**2*(1-C*exp(-D*(T-t0))))
    alpha = ((kappa*theta)/(zeta**2))*((kappa - rho*zeta*1j*u - D)*(T-t0) - 2*log((1-C*exp(-D*(T-t0))/(1-C))))
    return exp(1j*u*m + alpha + beta*v0)
  
  # # Parameters for the Function to make sure the approximations are correct.
  c1 = log(S0) + r*T - .5*theta*T
  c2 = theta/(8*kappa**3)*(-zeta**2*exp(-2*kappa*T) + 4*zeta*exp(-kappa*T)*(zeta-2*kappa*rho) 
        + 2*kappa*T*(4*kappa**2 + zeta**2 - 4*kappa*zeta*rho) + zeta*(8*kappa*rho - 3*zeta))
  a = c1 - z*sqrt(abs(c2))
  b = c1 + z*sqrt(abs(c2))
  
  h       = lambda n : (n*pi) / (b-a) 
  g_n     = lambda n : (exp(a) - (K/h(n))*sin(h(n)*(a - log(K))) - K*cos(h(n)*(a - log(K)))) / (1 + h(n)**2)
  g0      = K*(log(K) - a - 1) + exp(a)
  
  F = g0 
  for n in range(1, N+1):
    h_n = h(n)
    F += 2*heston_char(h_n) * exp(-1j*a*h_n) * g_n(n)

  F = exp(-r*T)/(b-a) * np.real(F)
  F = F if opt_type == 'p' else F + S0 - K*exp(-r*T)
  return F if F > 0 else 0



""" Results (Macbook Air M1):
Speed Analysis of Fourier_Heston_Put: Per Option
159 µs ± 8.51 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
"""

Speed Analysis of Fourier_Heston_Put: Per Option


NameError: name 'S0' is not defined

In [1]:
import py_vollib_vectorized

price = 0.10 ; S = 95 ; K = 100 ; t = .2 ; r = .2 ; flag = 'c'

def implied_volatility(price, S, K, t, r, flag):
  return py_vollib_vectorized.vectorized_implied_volatility(
    price, S, K, t, r, flag, q=0.0, on_error='ignore', model='black_scholes_merton',return_as='numpy') 


In [2]:
print('Speed Analysis of the implied volatility Function: Per Option')
%timeit implied_volatility(price, S, K, t, r, flag)

Speed Analysis of the implied volatility Function: Per Option
130 µs ± 70.7 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [90]:
from nelson_siegel_svensson.calibrate import calibrate_nss_ols
yield_maturities = np.array([1/12, 2/12, 3/12, 4/12, 6/12, 1, 2, 3, 5, 7, 10, 20, 30])
# yields  = np.array([5.30,5.39,5.50,5.50,5.44,5.11,4.33,3.98,3.70,3.66,3.61,3.98,3.84])
# get the first row of the yield rates
yields = yield_rates.iloc[0].values[1:].astype(np.float64)
curve_fit, status = calibrate_nss_ols(yield_maturities,yields)
# yields

In [91]:
curve_fit, status

(NelsonSiegelSvenssonCurve(beta0=-5.345961485007193, beta1=10.920561543787652, beta2=11.074852298011132, beta3=-4.034323942085258, tau1=43.44535134546507, tau2=3.30260991718184),
   message: Desired error not necessarily achieved due to precision loss.
   success: False
    status: 2
       fun: 0.010378206853388534
         x: [ 4.345e+01  3.303e+00]
       nit: 31
       jac: [ 7.105e-07 -6.778e-05]
  hess_inv: [[ 3.425e+04 -2.641e+02]
             [-2.641e+02  2.338e+01]]
      nfev: 312
      njev: 100)

In [None]:
print('Speed Analysis of Fourier_Heston_Put: Per Option')
# %timeit Fourier_Heston_Put(S0,K, T, r, kappa, theta, rho, zeta, v0, 'p', N = 1_012, z = 24)

In [15]:
import yfinance as yf

In [19]:
ticker = yf.Ticker("AAPL")
ticker.option_chain("2024-04-12").puts

Unnamed: 0,contractSymbol,lastTradeDate,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,impliedVolatility,inTheMoney,contractSize,currency
0,AAPL240412P00100000,2024-04-03 19:14:39+00:00,100.0,0.01,0.0,0.02,0.0,0.0,1,166,1.500002,False,REGULAR,USD
1,AAPL240412P00105000,2024-04-02 19:45:06+00:00,105.0,0.01,0.0,0.02,0.0,0.0,1,8,1.375003,False,REGULAR,USD
2,AAPL240412P00110000,2024-04-05 19:57:11+00:00,110.0,0.01,0.0,0.01,0.0,0.0,20,136,1.187504,False,REGULAR,USD
3,AAPL240412P00115000,2024-04-05 19:03:11+00:00,115.0,0.01,0.0,0.02,0.0,0.0,2,362,1.125004,False,REGULAR,USD
4,AAPL240412P00120000,2024-04-05 17:11:49+00:00,120.0,0.01,0.0,0.0,0.0,0.0,1,108,0.500005,False,REGULAR,USD
5,AAPL240412P00125000,2024-04-05 19:56:31+00:00,125.0,0.01,0.0,0.03,0.0,0.0,32,267,0.937501,False,REGULAR,USD
6,AAPL240412P00130000,2024-04-08 13:30:03+00:00,130.0,0.02,0.0,0.02,0.01,100.0,10,381,0.796877,False,REGULAR,USD
7,AAPL240412P00135000,2024-04-05 19:49:06+00:00,135.0,0.01,0.0,0.02,0.0,0.0,249,1980,0.687503,False,REGULAR,USD
8,AAPL240412P00140000,2024-04-05 19:45:54+00:00,140.0,0.01,0.0,0.0,0.0,0.0,92,437,0.250007,False,REGULAR,USD
9,AAPL240412P00144000,2024-04-05 19:11:40+00:00,144.0,0.02,0.0,0.03,0.0,0.0,51,283,0.531255,False,REGULAR,USD


In [29]:
from alpaca.data.historical.option import OptionHistoricalDataClient
from alpaca.data.requests import OptionBarsRequest, OptionChainRequest
from alpaca.data.timeframe import TimeFrame
from datetime import datetime

In [30]:
req_params = OptionChainRequest(underlying_symbol="AAPL", start=datetime(2022, 7, 1), end=datetime(2022, 9, 1), timeframe=TimeFrame.Minute)
req_params

{'feed': None, 'underlying_symbol': 'AAPL'}

In [8]:
alphaca_client = OptionHistoricalDataClient('PK6PBCFA2EOGAHFCXKUR','wGEBkkRj10O6QRDjs3yZnr2Fm5EUCwzob06T2dlU')

In [31]:
alphaca_client.get_option_chain(req_params)

{'AAPL240503C00135000': {   'daily_bar': None,
     'latest_quote': {   'ask_exchange': 'Z',
                         'ask_price': 34.48,
                         'ask_size': 1.0,
                         'bid_exchange': 'T',
                         'bid_price': 33.92,
                         'bid_size': 31.0,
                         'conditions': 'A',
                         'symbol': 'AAPL240503C00135000',
                         'tape': None,
                         'timestamp': datetime.datetime(2024, 4, 8, 14, 1, 14, 241696, tzinfo=TzInfo(UTC))},
     'latest_trade': {   'conditions': 'I',
                         'exchange': 'C',
                         'id': None,
                         'price': 35.86,
                         'size': 3.0,
                         'symbol': 'AAPL240503C00135000',
                         'tape': None,
                         'timestamp': datetime.datetime(2024, 4, 3, 19, 34, 13, 681539, tzinfo=TzInfo(UTC))},
     'minute_bar': None,
  