In [1]:
import numpy as np
import pandas as pd
from scipy.stats import norm
from scipy.optimize import brentq
import os

# Heston model for underlying asset prices
def heston_model(S0, V0, T, mu, kappa, theta, xi, rho, dt, N):
    timesteps = np.linspace(0, T, N + 1)
    S = np.zeros_like(timesteps)
    V = np.zeros_like(timesteps)
    S[0] = S0
    V[0] = V0

    for t in range(1, N + 1):
        z1 = np.random.normal()
        z2 = rho * z1 + np.sqrt(1 - rho ** 2) * np.random.normal()
        V[t] = np.abs(V[t - 1] + kappa * (theta - V[t - 1]) * dt + xi * np.sqrt(V[t - 1] * dt) * z2)
        S[t] = S[t - 1] * np.exp((mu - 0.5 * V[t]) * dt + np.sqrt(V[t] * dt) * z1)
    return S, V

def black_scholes_call(S, K, T, r, sigma):
    d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
    d2 = d1 - sigma * np.sqrt(T)
    return S * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2)

def find_iv(market_price, S, K, T, r):
    def objective(sigma): return market_price - black_scholes_call(S, K, T, r, sigma)
    try:
        return brentq(objective, 1e-6, 3.0, xtol=1e-6)
    except ValueError:
        return np.nan  # or any other default value

# Calculate log return
def calculate_log_return(S):
    return np.log(S[1:] / S[:-1])

# Adjusted calculate_skew_slope function to dynamically use TTM
def calculate_skew_slope(iv_row, ttm, moneyness_levels):
    # Select IV values for the specific TTM and moneyness levels
    atm_iv = iv_row.get((ttm, 1), 0)  # ATM IV for the specified TTM
    iv_85 = iv_row.get((ttm, 0.85), 0)  # IV for moneyness=0.85
    iv_115 = iv_row.get((ttm, 1.15), 0)  # IV for moneyness=1.15

    # Calculate skew
    skew = (iv_85 + iv_115 - 2 * atm_iv) / 2 if atm_iv else 0  # Avoid division by zero

    # Assuming slope calculation between shortest and longest TTM for simplicity
    # Adjust these values based on available TTM data in your dataset
    shortest_ttm = 1
    longest_ttm = 24
    iv_short = iv_row.get((shortest_ttm, 1), 0)  # Shortest TTM IV
    iv_long = iv_row.get((longest_ttm, 1), 0)  # Longest TTM IV

    # Calculate slope
    slope = iv_long - iv_short

    return skew, slope


# Model parameters
S0 = 100 #inital stock price
V0 = 0.04 #inital variance
T = 1 #time period
mu = 0.05 #drift of asset (long term returns)
kappa = 3.0 #rate of reversion of variance
theta = 0.04 #long term average variance
xi = 0.1 #volatility of volatility
rho = -0.7 #correlation btw stochastic process S and V
#dt = 1 / 252 #time steps
dt = 1/6000
N = int(T / dt)
r = 0.05  # Risk-free rate

# Moneyness levels and time to maturity (in years)
moneyness_levels = [0.7, 0.85, 1, 1.15, 1.3]
time_to_maturity_years = [1, 3, 6, 12, 24]

# Simulate Heston model
S, V = heston_model(S0, V0, T, mu, kappa, theta, xi, rho, dt, N)


# Calculate historical log returns
log_returns = calculate_log_return(S)

# Creating DataFrame for IV surfaces and additional features
columns = pd.MultiIndex.from_product([time_to_maturity_years, moneyness_levels], names=['TTM', 'Moneyness'])
iv_surfaces = pd.DataFrame(index=range(1, N + 1), columns=columns)
additional_features = pd.DataFrame(index=range(1, N), columns=['Log Return', 'Skew', 'Slope'])


## Adjusts IV based on moneyness: Real-world data shows that IV tends to vary with moneyness, often depicted in the "volatility smile" or "volatility skew" phenomena. IV for ATM options might be different from that of ITM or OTM options.
## Incorporates TTM effects: IV can also change with TTM, where short-term options might exhibit different IV patterns compared to long-term options.
## Reflects market conditions: Utilising θ (from the Heston model) as a baseline for market volatility, adjustments should reflect periods of high or low market volatility.
np.random.seed(42)  # For reproducibility
for i in range(1, N + 1):
    S_t = S[i]  # Current simulated asset price
    sigma_t = np.sqrt(V[i])  # Current simulated volatility
    iv_row = {}
    
    for ttm in time_to_maturity_years:
        for moneyness in moneyness_levels:
            K = S_t * moneyness
            T = ttm / 12  # Correcting TTM to years
            market_price = black_scholes_call(S_t, K, T, r, sigma_t)
            
            if moneyness == 1:  # ATM
                iv_adjustment = 0.02 + np.random.normal(0, 0.005)
            else:
                iv_adjustment = -0.01 + np.random.normal(0, 0.005)
            
            ttm_effect = 0.005 * np.log(ttm) + np.random.normal(0, 0.005)
            iv = find_iv(market_price, S_t, K, T, r) + iv_adjustment + ttm_effect
            #if np.isnan(iv):
                #continue
            iv_row[(ttm, moneyness)] = abs(iv)
    
    iv_surfaces.loc[i] = pd.Series(iv_row)
    if i > 1:  # Calculate additional features for i > 1
        skew, slope = calculate_skew_slope(iv_row, ttm, moneyness_levels)
        additional_features.at[i - 1, 'Log Return'] = log_returns[i - 2]
        additional_features.at[i - 1, 'Skew'] = skew
        additional_features.at[i - 1, 'Slope'] = slope

# Combining IV surfaces with additional features
combined_data = pd.concat([iv_surfaces.shift(-1), additional_features], axis=1).dropna()

# Save asset prices to a new file
asset_prices_data = pd.DataFrame({'Asset Price': S})
asset_prices_file_path = os.getcwd() + '/asset_prices.csv'
asset_prices_data.to_csv(asset_prices_file_path, index_label='Timestep')

print(f"Asset prices saved to: {asset_prices_file_path}")

cwd = os.getcwd()
file_path = cwd + '/combined_iv_data_new.csv'
combined_data.to_csv(file_path)

print(f"Dataset saved to: {file_path}")

Asset prices saved to: /Users/abicem/dev/y3/thesis-prototype/cvae-hull/asset_prices.csv
Dataset saved to: /Users/abicem/dev/y3/thesis-prototype/cvae-hull/combined_iv_data_new.csv


In [3]:
pip install mibian


Collecting mibian
  Downloading mibian-0.1.3.zip (4.3 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: mibian
  Building wheel for mibian (setup.py) ... [?25ldone
[?25h  Created wheel for mibian: filename=mibian-0.1.3-py3-none-any.whl size=4024 sha256=d527ec6d155e95c4342489182295578d3bb2fd5acadeb46615245bd120763c97
  Stored in directory: /Users/abicem/Library/Caches/pip/wheels/2c/4f/a7/be034e17cc306b0850f5f1a5b4541281b49475c58620a7ff40
Successfully built mibian
Installing collected packages: mibian
Successfully installed mibian-0.1.3
Note: you may need to restart the kernel to use updated packages.


In [None]:
import numpy as np
import pandas as pd
from scipy.stats import norm
import mibian


# Parameters
r = 0.05  # Risk-free interest rate
sigma = 0.2  # Volatility

moneyness_levels = [0.7, 0.85, 1, 1.15, 1.3]
time_to_maturity_months = [1, 3, 6, 12, 24]

# Read the time series of asset prices from the CSV file
file_path = 'asset_prices.csv'
df = pd.read_csv(file_path, index_col=0)
asset_prices = df['Asset Price'].values
num_steps = len(asset_prices)

# Generate option prices at each time step
data = []
for i in range(num_steps):
    S = asset_prices[i]
    for m in moneyness_levels:
        K = S * m
        for T_months in time_to_maturity_months:
            T_years = T_months / 12
            call, put = black_scholes(S, K, T_years, r, sigma)
            
            # Add noise to the option prices
            noise_call = np.random.normal(0, 0.01 * call)
            noise_put = np.random.normal(0, 0.01 * put)
            
            data.append([i, S, K, T_months, call + noise_call, put + noise_put])

# Create a DataFrame
df = pd.DataFrame(data, columns=["Time Step", "Asset Price", "Strike", "Time to Maturity (Months)", "Call Price", "Put Price"])

# Save the DataFrame to a CSV file
df.to_csv("option_prices_timeseries.csv", index=False)