# Option data

In [1]:
import pandas as pd
ticker_data = pd.read_csv('data/nasdaq_screener_1717056609861.csv')
top_cap = ticker_data.sort_values('Market Cap', ascending=False).iloc[:200]
top_tickers = top_cap['Symbol']

In [2]:
import yfinance as yf
import pandas as pd

def get_ticker(name, data):
    ticker = yf.Ticker(name)
    try:
        exp_dates = ticker.options
    except TypeError:
        return data
    for dt in exp_dates:
        data_tmp = ticker.option_chain(dt).calls
        data_tmp['ticker'] = name
        data_tmp['exp_date'] = dt
        data = pd.concat((data, data_tmp))
    return data

def get_data(names):
    data = pd.DataFrame()
    for nm in names: 
        data = get_ticker(nm, data)
        cnt = data.shape[0]
        if cnt > 100000:
            break
    return data

In [6]:
option_features_tmp = get_data(top_tickers)

# Market data

In [11]:
import pandas as pd
import yfinance as yf
import numpy as np

start_date = '2024-01-01'
end_date = '2024-05-15'

# Replace with your chosen stock tickers
stock_tickers = top_tickers.values

def get_stock_data(stock_ticker, start_date, end_date):
    stock_data = yf.download(stock_ticker, start=start_date, end=end_date)
    return stock_data['Close']

def get_risk_free_rate(ticker="^TNX", start_date="2023-01-01", end_date="2023-12-31"):
    risk_free_data = yf.download(ticker, start=start_date, end=end_date)
    return risk_free_data['Close']

def calculate_historical_volatility(stock_prices, window=63):
    daily_returns = stock_prices.pct_change().dropna()
    historical_volatility = daily_returns.rolling(window=window).std() * np.sqrt(252)
    return historical_volatility.dropna()

data = []

for ticker in stock_tickers:
    stock_prices = get_stock_data(ticker, start_date, end_date)
    risk_free_rates = get_risk_free_rate(start_date=start_date, end_date=end_date)
    historical_volatility = calculate_historical_volatility(stock_prices)

    dates = historical_volatility.index
    prices_at_dates = stock_prices.loc[dates]
    rates_at_dates = risk_free_rates[dates].fillna(method='ffill')

    for date, price, rate, volatility in zip(dates, prices_at_dates, rates_at_dates, historical_volatility):
        data.append([date, ticker, rate, price, volatility])

market_data = pd.DataFrame(data, columns=['spot_date', 'ticker', 'risk_free_rate', 'spot_price', 'volatility'])

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

In [12]:
option_features = option_features_tmp.set_index('ticker').join(market_data, on = 'ticker', how='inner')
mask = option_features['spot_date'] <= option_features['date']
mask = option_features['spot_date'] > option_features['date'] - pd.Timedelta(14, 'D')
option_features = option_features.query(mask)
option_features.to_csv('data/spot_price.csv', index=False)