In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, date, timedelta
from tqdm.auto import tqdm

import yfinance as yf


In [2]:
# load S&P-500 tickers
table=pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
tickers_df = table[0]
print(tickers_df.Symbol.tolist())
tickers_df.head()

['MMM', 'AOS', 'ABT', 'ABBV', 'ACN', 'ATVI', 'ADM', 'ADBE', 'ADP', 'AAP', 'AES', 'AFL', 'A', 'APD', 'AKAM', 'ALK', 'ALB', 'ARE', 'ALGN', 'ALLE', 'LNT', 'ALL', 'GOOGL', 'GOOG', 'MO', 'AMZN', 'AMCR', 'AMD', 'AEE', 'AAL', 'AEP', 'AXP', 'AIG', 'AMT', 'AWK', 'AMP', 'ABC', 'AME', 'AMGN', 'APH', 'ADI', 'ANSS', 'AON', 'APA', 'AAPL', 'AMAT', 'APTV', 'ACGL', 'ANET', 'AJG', 'AIZ', 'T', 'ATO', 'ADSK', 'AZO', 'AVB', 'AVY', 'AXON', 'BKR', 'BALL', 'BAC', 'BBWI', 'BAX', 'BDX', 'WRB', 'BRK.B', 'BBY', 'BIO', 'TECH', 'BIIB', 'BLK', 'BK', 'BA', 'BKNG', 'BWA', 'BXP', 'BSX', 'BMY', 'AVGO', 'BR', 'BRO', 'BF.B', 'BG', 'CHRW', 'CDNS', 'CZR', 'CPT', 'CPB', 'COF', 'CAH', 'KMX', 'CCL', 'CARR', 'CTLT', 'CAT', 'CBOE', 'CBRE', 'CDW', 'CE', 'CNC', 'CNP', 'CDAY', 'CF', 'CRL', 'SCHW', 'CHTR', 'CVX', 'CMG', 'CB', 'CHD', 'CI', 'CINF', 'CTAS', 'CSCO', 'C', 'CFG', 'CLX', 'CME', 'CMS', 'KO', 'CTSH', 'CL', 'CMCSA', 'CMA', 'CAG', 'COP', 'ED', 'STZ', 'CEG', 'COO', 'CPRT', 'GLW', 'CTVA', 'CSGP', 'COST', 'CTRA', 'CCI', 'CSX', 'C

Unnamed: 0,Symbol,Security,GICS Sector,GICS Sub-Industry,Headquarters Location,Date added,CIK,Founded
0,MMM,3M,Industrials,Industrial Conglomerates,"Saint Paul, Minnesota",1957-03-04,66740,1902
1,AOS,A. O. Smith,Industrials,Building Products,"Milwaukee, Wisconsin",2017-07-26,91142,1916
2,ABT,Abbott,Health Care,Health Care Equipment,"North Chicago, Illinois",1957-03-04,1800,1888
3,ABBV,AbbVie,Health Care,Pharmaceuticals,"North Chicago, Illinois",2012-12-31,1551152,2013 (1888)
4,ACN,Accenture,Information Technology,IT Consulting & Other Services,"Dublin, Ireland",2011-07-06,1467373,1989


In [3]:
def options_chain(symbol, type='call'):

    tk = yf.Ticker(symbol)
    stock_price = tk.history().iloc[-1,3]
    # Expiration dates
    exps = tk.options

    # Get options for each expiration
    options = pd.DataFrame()
    for e in exps:
        opt = tk.option_chain(e)
        opt_df = opt.calls if type=='call' else opt.puts
        opt_df['expir'] = e
        opt_df['stock_price'] = stock_price
        options = pd.concat([options, opt_df], ignore_index=True)

    # Bizarre error in yfinance that gives the wrong expiration date
    # Add 1 day to get the correct expiration date
    options['expir'] = pd.to_datetime(options['expir']) + timedelta(days = 1)
    options['maturity'] = (options['expir'] - datetime.today()).dt.days / 256
    
    options[['bid', 'ask', 'strike']] = options[['bid', 'ask', 'strike']].apply(pd.to_numeric)
    options['opt_price'] = (options['bid'] + options['ask']) / 2 # Calculate the midpoint of the bid-ask
    
    # Drop unnecessary and meaningless columns
    options = options.drop(columns=[
        'openInterest', 'inTheMoney', 'contractSize', 
        'currency', 'change', 'percentChange', 'lastTradeDate', 'lastPrice'
    ])
    options = options.rename(columns={'impliedVolatility' : 'vol'})
    
    return options

In [4]:
available_tickers = []
for tick in tqdm(tickers_df.Symbol):
    yftick = yf.Ticker(tick) 
    if len(yftick.history()) > 0 and len(yftick.options) > 0:
        available_tickers.append(tick)
    elif len(yftick.options) == 0:
        print(f'{tick} - no options')

len(available_tickers)

 13%|█▎        | 66/503 [00:24<02:37,  2.77it/s]

BRK.B: No data found, symbol may be delisted
BRK.B - no options


 16%|█▋        | 82/503 [00:30<02:28,  2.83it/s]

BF.B: No data found for this date range, symbol may be delisted
BF.B - no options


 70%|██████▉   | 352/503 [02:13<01:00,  2.51it/s]

NVR - no options


100%|██████████| 503/503 [03:08<00:00,  2.67it/s]


500

In [None]:
sp500_calls_options = pd.concat([options_chain(ticker, type='call') for ticker in tqdm(sorted(available_tickers))])
sp500_puts_options = pd.concat([options_chain(ticker, type='put') for ticker in tqdm(sorted(available_tickers))])

display(sp500_calls_options.info())
display(sp500_puts_options.info())

In [None]:
sp500_calls_options.to_pickle('data/sp500_calls.pkl')
sp500_puts_options.to_pickle('data/sp500_puts.pkl')