In [1]:
# import polygon
import polygon
from polygon import RESTClient

# import visualization packages
import plotly.graph_objects as go
from plotly.offline import plot
import plotly.express as px

# import yahoo finance api to get past earnings dates
import yfinance as yf

# import pandas and datetime
import statistics
import pandas_market_calendars as mcal
import pandas as pd
import datetime as dt
import numpy as np

# import time datetime
from datetime import datetime
import time

import warnings
warnings.filterwarnings('ignore')

# for exporting data
import pickle

# import API_KEY from config
from config import *

In [2]:
# Enter desire tickers
tickers = ['FB', 'BABA', 'DIS', 'TSLA', 'GE', 
           'NFLX', 'ROKU', 'UBER', 'MU', 'INTC', 
           'AMZN', 'SNAP', 'C', 'JD', 'JPM', 'WFC', 
           'CSCO', 'XOM', 'SQ', 'BA', 'F', 'BIDU', 
           'MS', 'BMY', 'PFE', 'SBUX', 'WMT', 'QCOM', 'GM', 'AAL', 
           'VZ', 'CAT', 'V', 'GS', 'KO', 'TGT', 'BP', 'JNJ', 
           'GOOG', 'CVS', 'ADBE', 'IBM', 'CVX', 'LYFT', 'MO', 'ATVI', 
           'SHOP', 'MCD', 'NKE', 'COST', 'HPQ', 'LVS']
client = RESTClient(API_KEY)

def get_net_pnl(call_df, put_df, start_date):
#     bug finder
#     print((call_df['option ticker'].iloc[0], put_df['option ticker'].iloc[0]))
    call = client.get_aggs(
        ticker = call_df['option ticker'].iloc[0],
        limit=10000,
        multiplier = 1, 
        timespan = 'day', 
        from_ = start_date, 
        to = earnings_date,
        adjusted=False
    )
    put = client.get_aggs(
        ticker = put_df['option ticker'].iloc[0],
        limit=10000,
        multiplier = 1, 
        timespan = 'day', 
        from_ = start_date, 
        to = earnings_date,
        adjusted=False
    )
    call = pd.DataFrame(call)
    call['date'] = pd.to_datetime(call['timestamp'] * 1000000).dt.normalize()


    put = pd.DataFrame(put)
    put['date'] = pd.to_datetime(put['timestamp'] * 1000000).dt.normalize()
    
    valid_dates = pd.to_datetime(get_valid_dates(start_date, earnings_date))
    valid_dates = pd.DataFrame(valid_dates, columns=['date'])
    
    call = pd.merge(valid_dates, call, how='outer')
    put = pd.merge(valid_dates, put, how='outer')
    
#     bug finder
#     print((len(call), len(put)))
#     print((start_date, earnings_date))
#     print('--------------------------')
    
    # create dataframe of net close of straddle
    net_df = pd.DataFrame()
    net_df['close'] = call['close'] + put['close']
    
    # creates column name based on how many days before earnings the straddle was placed
    col_name = f'{len(net_df)} days'
    net_df[col_name] = ((net_df['close'] - net_df['close'].iloc[0]) / net_df['close'].iloc[0]) * 100
    
    # just copy the date column from call and use it for the net_df
    net_df['date'] = call['date']
    
    # create a 'days remaining' before earnings column
#     net_df['days remaining'] = sorted(range(len(net_df)), reverse=True)
    
    net_df.drop('close', axis=1, inplace=True)
    return net_df

def get_contracts(search_date, earnings_date):
    start_date = (pd.to_datetime(earnings_date) - dt.timedelta(days=40)).date().strftime('%Y-%m-%d')

    contracts_df = []
    for c in client.list_options_contracts(ticker, limit=1000, as_of=start_date):
        contracts_df.append({
            'expiration date': c.expiration_date,
            'type': c.contract_type,
            'strike price': c.strike_price,
            'option ticker': c.ticker
        })
    contracts_df = pd.DataFrame(contracts_df)
    contracts_df['expiration date'] = pd.to_datetime(contracts_df['expiration date'])
    return contracts_df

def get_valid_dates(search_date, earnings_date):
    nyse = mcal.get_calendar('NYSE')
    valid_dates = nyse.valid_days(start_date=search_date, end_date=earnings_date)
    valid_dates = [date.date().strftime('%Y-%m-%d') for date in valid_dates]
    valid_dates = valid_dates[-22:]
    return valid_dates

def get_underlying(start_date, earnings_date, ticker):
    # get the price history of the underlying ticker from the start_date to earnings_date 
    underlying = client.get_aggs(ticker=ticker, from_=start_date, to=earnings_date, multiplier=1, timespan='day', adjusted=False)
    underlying = pd.DataFrame(underlying)
    underlying['date'] = pd.to_datetime(underlying['timestamp'] * 1000000).dt.date
    return underlying

def process_df(df_list, avg_days):
    aggregate_df = pd.concat(df_list)
    aggregate_df.reset_index(inplace=True)
    aggregate_df['index'] = aggregate_df['index'].str.strip(' days').astype('int')
    aggregate_df_median = aggregate_df.groupby('index').median().sort_index(ascending=False)
    aggregate_df_median.index = aggregate_df_median.index.astype('str')
    aggregate_df_mean = aggregate_df.groupby('index').mean().sort_index(ascending=False)
    aggregate_df_mean.index = aggregate_df_mean.index.astype('str')
    aggregate_df_min = aggregate_df.groupby('index').min().sort_index(ascending=False)
    aggregate_df_min.index = aggregate_df_min.index.astype('str')
    aggregate_df_max = aggregate_df.groupby('index').max().sort_index(ascending=False)
    aggregate_df_max.index = aggregate_df_max.index.astype('str')
    aggregate_df.index = aggregate_df.index.astype('str')
    return {'mean': aggregate_df_mean, 'median':aggregate_df_median, 'max': aggregate_df_max, 'min': aggregate_df_min, 'avg_days': avg_days}

In [3]:
for ticker in tickers:
    errors = []
    stock = yf.Ticker(ticker)
    earnings_dates = stock.get_earnings_dates(limit=11).dropna(axis=0).reset_index()

    # if earnings is after market close, set earnings date to next day
    earnings_dates.loc[earnings_dates['Earnings Date'].dt.hour > 6, 'Earnings Date'] =  earnings_dates['Earnings Date'] + dt.timedelta(days=1)
    earnings_dates = earnings_dates['Earnings Date'].dt.date
    earnings_dates = earnings_dates.loc[lambda x: x > pd.to_datetime('2021-02-01')]
    dates_export = [a_date.strftime('%Y-%m-%d') for a_date in earnings_dates]
    with open(f'data/{ticker}-earnings-dates.pickle', 'wb') as f:
        pickle.dump(dates_export, f)

    # define df lists that will be turned into dfs of aggregate functions
    near_aggregate_df = []
    med_aggregate_df = []
    far_aggregate_df = []

    for earnings_date in earnings_dates:

        search_date = (pd.to_datetime(earnings_date) - dt.timedelta(days=40)).date().strftime('%Y-%m-%d')

        valid_dates = get_valid_dates(search_date, earnings_date)

        contracts_df = get_contracts(search_date, earnings_date)

        start_date = valid_dates[-22]

        underlying = get_underlying(start_date, earnings_date, ticker)

        near_avg_days_after_earnings = []
        med_avg_days_after_earnings = []
        far_avg_days_after_earnings = []

        valid_expirations_mask = contracts_df['expiration date'].dt.date >= earnings_date
        i = 0
        for expiration_date in contracts_df[valid_expirations_mask]['expiration date'].unique()[0:3]:
            try:
                expiration_mask = contracts_df['expiration date'] == expiration_date
                call_mask = contracts_df['type'] == 'call'
                put_mask = contracts_df['type'] == 'put'

                list_of_df = []
                for date in valid_dates:    
                    date_mask = underlying['date'] == pd.to_datetime(date)
                    stock_price = underlying[date_mask]['close']
                    contracts_df.iloc[abs(contracts_df[expiration_mask]['strike price'] - stock_price.iloc[0]).sort_values().head(2).index]

                    call = contracts_df.iloc[abs(contracts_df[expiration_mask & call_mask]['strike price'] - stock_price.iloc[0]).sort_values().head(1).index]
                    put = contracts_df.iloc[abs(contracts_df[expiration_mask & put_mask]['strike price'] - stock_price.iloc[0]).sort_values().head(1).index]

                    list_of_df.append(get_net_pnl(call, put, date))
                    time.sleep(0.10)

                all_df = pd.DataFrame(columns=['date'])
                for dframe in list_of_df:
                    all_df = pd.merge(all_df, dframe, how='outer', on='date')
    #             all_df.dropna(axis=0, how='all', inplace=True)

                days_after_earnings = (pd.to_datetime(expiration_date).date() - pd.to_datetime(all_df['date'].iloc[-1]).date()).days


                # create date_df for individual heatmaps
                date_df = all_df
                date_df.set_index('date', inplace=True)
                date_df.sort_index(inplace=True)
                date_df = date_df.transpose()
    #             date_df.dropna(how='all', axis=1, inplace=True) # drop columns whose data is completely missing
                date_df.sort_index()

                # create the dates value DF for the hovertemplate
                for column in date_df:
                    date_df[column] = date_df[column].name
                new_column_names = [num for num in range(21, 0, -1)]
                new_column_names.append('After Earnings')
                date_df.columns = new_column_names
                date_df = date_df.astype('str')


                # use this to make 'days remaining' the x-axis
                days_df = all_df
                days_df['days remaining'] = sorted(range(len(days_df)), reverse=True)
                days_df['days remaining'] = days_df['days remaining'].astype('str') 
                days_df['days remaining'].iloc[-1] = 'After Earnings'
                days_df.set_index('days remaining', inplace=True)
                # days_df.drop('date', axis=1, inplace=True)
                days_df = days_df.transpose()
    #             days_df.dropna(how='all', axis=1, inplace=True)


                # creates the heatmap
                fig = px.imshow(days_df.round(1), color_continuous_scale=[(0,'red'), (0.5,'white'), (1.0, 'green')], range_color=(-100 ,100), text_auto=True)
                fig.update_layout(
                    title=f'{ticker} ATM Straddle Performance in percent (expires {days_after_earnings} days after earnings) for {earnings_date}',
                    title_x=0.5,
                    yaxis_title='Straddle Initiated',
                    xaxis_title='Trading Days Remaining'
                )
                fig.update_xaxes(rangebreaks=[dict(bounds=["sat", "mon"])]),  # hide weekends, eg. hide sat to before mon])
                fig.update(data=[{'customdata': np.dstack((days_df.round(1), date_df)),
                    'hovertemplate': '<b>return: %{z:.1f}</b> <br>date: %{customdata[1]}'}])
                print(ticker + ' ' + str(earnings_date) + ' complete')

                # define dictionary for export pickling
                a_dict = {'days_df': days_df, 'date_df': date_df, 'days after earnings': days_after_earnings}

                if i == 0:
                    near_aggregate_df.append(days_df) # append days_df for aggregate
                    near_avg_days_after_earnings.append(days_after_earnings)
                    with open(f'data/{ticker}-{earnings_date}-near.pickle', 'wb') as f:
                        pickle.dump(a_dict, f)

                elif i == 1:
                    med_aggregate_df.append(days_df) # append days_df for aggregate
                    med_avg_days_after_earnings.append(days_after_earnings)
                    with open(f'data/{ticker}-{earnings_date}-med.pickle', 'wb') as f:
                        pickle.dump(a_dict, f)
                else:
                    far_aggregate_df.append(days_df) # append days_df for aggregate
                    far_avg_days_after_earnings.append(days_after_earnings)
                    with open(f'data/{ticker}-{earnings_date}-far.pickle', 'wb') as f:
                        pickle.dump(a_dict, f)
            except polygon.exceptions.NoResultsError:
                print(f'There was a "no results" error! ticker: {ticker}, earnings date: {earnings_date}')
                errors.append(f'There was a "no results" error! ticker: {ticker}, earnings date: {earnings_date}')
            except IndexError as e:
                print(e)
                print(f'Missing Underlying Price History: {ticker}, earnings date: {earnings_date}')
                errors.append(f'Missing Underlying Price History: {ticker}, earnings date: {earnings_date}')
            i += 1


    near_avg_days_after_earnings = np.mean(near_avg_days_after_earnings)
    med_avg_days_after_earnings = np.mean(med_avg_days_after_earnings)
    far_avg_days_after_earnings = np.mean(far_avg_days_after_earnings)            

    # process the aggregate dfs and put them into a dictionary of dfs
    near_dict = process_df(near_aggregate_df, near_avg_days_after_earnings)
    med_dict = process_df(med_aggregate_df, med_avg_days_after_earnings)
    far_dict = process_df(far_aggregate_df, far_avg_days_after_earnings)

    # save agg dictionary of dfs
    with open(f'data/{ticker}-agg-near.pickle', 'wb') as f:
        pickle.dump(near_dict, f)
    with open(f'data/{ticker}-agg-med.pickle', 'wb') as f:
        pickle.dump(med_dict, f)
    with open(f'data/{ticker}-agg-far.pickle', 'wb') as f:
        pickle.dump(far_dict, f)

    # iterate over dictionaris for visualization
    list_of_aggregates = [near_dict, med_dict, far_dict]

    for agg in list_of_aggregates:
        avg_days = agg['avg_days']
        fig = px.imshow(agg['mean'].round(1), color_continuous_scale=[(0,'red'), (0.5,'white'), (1.0, 'green')], range_color=(-100 ,100), text_auto=True)
        fig.update_layout(
            title=f'{ticker} ATM Straddle Performance in percent<br>expires ~{avg_days} days after earnings',
            title_x=0.5,
            yaxis_title='Straddle Initiated',
            xaxis_title='Trading Days Remaining'
        )
        fig.update_xaxes(rangebreaks=[dict(bounds=["sat", "mon"])]),  # hide weekends, eg. hide sat to before mon])
        fig.update_yaxes(autorange='reversed')

        fig.update(data=[{'customdata': np.dstack((agg['median'], agg['max'], agg['min'])),
            'hovertemplate': '<b>mean:%{z:.1f}</b> <br>median: %{customdata[0]:.1f} <br>max: %{customdata[1]:.1f} <br>min: %{customdata[2]:.1f}'}])
        time.sleep(1)
    # export errors
    with open(f'data/{ticker}-errors.pickle', 'wb') as f:
        pickle.dump(errors, f)
    print(f'--------{ticker} is complete----------')

FB 2022-04-28 complete
FB 2022-04-28 complete
FB 2022-04-28 complete
FB 2022-02-03 complete
FB 2022-02-03 complete
FB 2022-02-03 complete
FB 2021-10-26 complete
FB 2021-10-26 complete
FB 2021-10-26 complete
FB 2021-07-29 complete
FB 2021-07-29 complete
FB 2021-07-29 complete
FB 2021-04-29 complete
FB 2021-04-29 complete
FB 2021-04-29 complete
--------FB is complete----------
BABA 2022-11-17 complete
BABA 2022-11-17 complete
BABA 2022-11-17 complete
BABA 2022-08-04 complete
BABA 2022-08-04 complete
BABA 2022-08-04 complete
BABA 2022-05-26 complete
BABA 2022-05-26 complete
BABA 2022-05-26 complete
BABA 2022-02-24 complete
BABA 2022-02-24 complete
BABA 2022-02-24 complete
BABA 2021-11-18 complete
BABA 2021-11-18 complete
BABA 2021-11-18 complete
BABA 2021-08-03 complete
BABA 2021-08-03 complete
BABA 2021-08-03 complete
BABA 2021-05-13 complete
BABA 2021-05-13 complete
BABA 2021-05-13 complete
--------BABA is complete----------
DIS 2022-11-09 complete
DIS 2022-11-09 complete
DIS 2022-11-09

XOM 2022-07-29 complete
XOM 2022-07-29 complete
XOM 2022-07-29 complete
XOM 2022-04-29 complete
XOM 2022-04-29 complete
XOM 2022-04-29 complete
XOM 2022-02-01 complete
XOM 2022-02-01 complete
XOM 2022-02-01 complete
XOM 2021-10-29 complete
XOM 2021-10-29 complete
XOM 2021-10-29 complete
XOM 2021-07-30 complete
XOM 2021-07-30 complete
XOM 2021-07-30 complete
--------XOM is complete----------
SQ 2022-11-04 complete
SQ 2022-11-04 complete
SQ 2022-11-04 complete
SQ 2022-08-05 complete
SQ 2022-08-05 complete
SQ 2022-08-05 complete
SQ 2022-05-06 complete
SQ 2022-05-06 complete
SQ 2022-05-06 complete
SQ 2022-02-25 complete
SQ 2022-02-25 complete
SQ 2022-02-25 complete
SQ 2021-11-05 complete
SQ 2021-11-05 complete
SQ 2021-11-05 complete
SQ 2021-08-02 complete
SQ 2021-08-02 complete
SQ 2021-08-02 complete
SQ 2021-05-07 complete
SQ 2021-05-07 complete
SQ 2021-05-07 complete
--------SQ is complete----------
BA 2022-10-26 complete
BA 2022-10-26 complete
BA 2022-10-26 complete
BA 2022-07-27 complet

KO 2021-10-27 complete
KO 2021-10-27 complete
KO 2021-10-27 complete
KO 2021-07-21 complete
KO 2021-07-21 complete
KO 2021-07-21 complete
--------KO is complete----------
TGT 2022-11-16 complete
TGT 2022-11-16 complete
TGT 2022-11-16 complete
TGT 2022-08-17 complete
TGT 2022-08-17 complete
TGT 2022-08-17 complete
TGT 2022-05-18 complete
TGT 2022-05-18 complete
TGT 2022-05-18 complete
TGT 2022-03-01 complete
TGT 2022-03-01 complete
TGT 2022-03-01 complete
TGT 2021-11-17 complete
TGT 2021-11-17 complete
TGT 2021-11-17 complete
TGT 2021-08-18 complete
TGT 2021-08-18 complete
TGT 2021-08-18 complete
--------TGT is complete----------
There was a "no results" error! ticker: BP, earnings date: 2022-11-01
BP 2022-11-01 complete
BP 2022-11-01 complete
BP 2022-08-02 complete
There was a "no results" error! ticker: BP, earnings date: 2022-08-02
BP 2022-08-02 complete
BP 2022-05-03 complete
BP 2022-05-03 complete
BP 2022-05-03 complete
BP 2022-02-08 complete
BP 2022-02-08 complete
BP 2022-02-08 co

NKE 2021-06-25 complete
NKE 2021-06-25 complete
NKE 2021-06-25 complete
--------NKE is complete----------
COST 2022-12-09 complete
COST 2022-12-09 complete
COST 2022-12-09 complete
COST 2022-09-23 complete
COST 2022-09-23 complete
COST 2022-09-23 complete
COST 2022-05-27 complete
COST 2022-05-27 complete
COST 2022-05-27 complete
COST 2022-03-04 complete
COST 2022-03-04 complete
COST 2022-03-04 complete
COST 2021-12-10 complete
COST 2021-12-10 complete
COST 2021-12-10 complete
COST 2021-09-24 complete
COST 2021-09-24 complete
COST 2021-09-24 complete
COST 2021-05-28 complete
COST 2021-05-28 complete
COST 2021-05-28 complete
COST 2021-03-05 complete
COST 2021-03-05 complete
COST 2021-03-05 complete
--------COST is complete----------
HPQ 2022-11-23 complete
HPQ 2022-11-23 complete
HPQ 2022-11-23 complete
HPQ 2022-08-31 complete
HPQ 2022-08-31 complete
HPQ 2022-08-31 complete
HPQ 2022-06-01 complete
HPQ 2022-06-01 complete
HPQ 2022-06-01 complete
HPQ 2022-03-01 complete
HPQ 2022-03-01 comp