### This notebooks builds 2 csv files for any commodity that you specify in the variable `SYMBOL_TO_RESEARCH`

The csv files are then written to as follows:
```
df_iv_final.to_csv(f'./temp_folder/df_iv_final_{SYMBOL_TO_RESEARCH}.csv',index=False)
df_iv_skew.to_csv(f'./temp_folder/df_iv_skew_{SYMBOL_TO_RESEARCH}.csv',index=False)
```

They can be copied to the volgrid project to be used by the Dash server that displays skew graphs for ES, CL, CB (Brent), and NG.

In [1]:
import zipfile
import glob
import pandas as pd
import numpy as np

from argparse import ArgumentParser
from argparse import RawDescriptionHelpFormatter
import sys
import os
if  not './' in sys.path:
    sys.path.append('./')
if  not '../' in sys.path:
    sys.path.append('../')

from barchartacs import build_db
from barchartacs import db_info
import plotly.graph_objs as go
from plotly.offline import  init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.tools as tls
from plotly.graph_objs.layout import Font,Margin
from IPython import display

import datetime
import io
from tqdm import tqdm,tqdm_notebook
from barchartacs import pg_pandas as pg
import mibian
import py_vollib
import importlib
from py_vollib import black
from py_vollib.black import implied_volatility
import ipdb
import traceback
import db_info
MONTH_CODES = 'FGHJKMNQUVXZ'
DICT_MONTH_NUMS = {MONTH_CODES[i]:i+1 for i in range(len(MONTH_CODES))}

# importlib.reload(db_info)

In [2]:
import warnings
warnings.filterwarnings('ignore')

### important global variables

In [3]:

DEBUG_IT=False
opttab = 'sec_schema.options_table'
futtab = 'sec_schema.underlying_table'
pga = db_info.get_db_info()
SYMBOL_TO_RESEARCH = 'NG'
STRIKE_DIVISORS = {}

# df_expiry_dates_additions = pd.read_csv('df_expiry_dates_additions.csv')
df_expiry_dates_additions = pd.read_csv('live_option_expirations.csv')


  sec_db


### methods to build options

In [4]:
USE_PYVOL = True
def lam_pyvol(r):
    try:
        return implied_volatility.implied_volatility(r.close_x,r.close_y,r.strike,.02,r.dte/365, r.pc.lower())
    except:
        return -1
# lam_pyvol = lambda r:implied_volatility.implied_volatility(r.close_x,r.close_y,r.strike,.02,r.dte/365, r.pc.lower())
lam_mibian = lambda r:mibian.BS([r.close_y,r.strike,2,r.dte], callPrice=r.close_x).impliedVolatility

def get_implieds(df,df_expiry_dates,contract):
    df2 = df[['symbol','contract_num','pc','settle_date','strike','close_x','close_y']]
    df2 = df2[(((df2.pc=='C' )& (df2.strike>=df2.close_y)) | ((df2.pc=='P' ) & (df2.strike<df2.close_y)))  & (df2.symbol.str.contains(contract))]
    cnum = _get_contract_number_from_symbol(contract)
    df2 = df2[df2.contract_num==2]
    phigh = df2.close_y.max()
    plow = df2.close_y.min()
    high_strike = round(phigh * 1.3)
    low_strike = round(plow * .7)
    df2 = df2[(df2.strike>=low_strike) & (df2.strike<=high_strike)]

    df9 = df2[df2.symbol==contract]
    df9 = df9.merge(df_expiry_dates.rename(columns={'settle_date':'expiry'}),on='symbol',how='inner')
    df9['syear'] = df9.settle_date.astype(str).str.slice(0,4).astype(int)
    df9['smon'] = df9.settle_date.astype(str).str.slice(4,6).astype(int)
    df9['sday'] = df9.settle_date.astype(str).str.slice(6,8).astype(int)
    df9['eyear'] = df9.expiry.astype(str).str.slice(0,4).astype(int)
    df9['emon'] = df9.expiry.astype(str).str.slice(4,6).astype(int)
    df9['eday'] = df9.expiry.astype(str).str.slice(6,8).astype(int)
    df9['sdatetime'] = df9.apply(lambda r:datetime.datetime(r.syear,r.smon,r.sday),axis=1)
    df9['edatetime'] = df9.apply(lambda r:datetime.datetime(r.eyear,r.emon,r.eday),axis=1)
    df9['dte'] = df9.edatetime - df9.sdatetime
    df9.dte = df9.dte.dt.days
    df9 = df9[['symbol','settle_date','pc','contract_num','strike','close_x','close_y','dte']]
    df10 = df9.iloc[:len(df9)].copy()
    df10.index = list(range(len(df10)))
    if USE_PYVOL:
        df10['iv'] = df10.apply(lam_pyvol,axis=1)
    else:
        n = 100
        for i in tqdm_notebook(np.arange(0,len(df10)-n,n)):
                df10.loc[i:i+n,'iv'] = df10.loc[i:i+n].apply(lam_mibian,axis=1)
        print(f'doing remaining {datetime.datetime.now()}')
        i = df10[df10.iv.isna()].index[0]
        df10.loc[i:,'iv'] = df10.loc[i:].apply(lam_mbian,axis=1)
        print(f'done with remaining {datetime.datetime.now()}')
    return df10



#### example of using mibian for options calcs (we use py_vollib instead)

In [5]:
def _test_mibian():
    underlying=1.4565
    strike=1.45
    interest = 1
    days=30
    opt_info = [underlying,strike,interest,days]
    c = mibian.BS(opt_info, volatility=20)
    print(c.callPrice,
    c.putPrice,
    c.callDelta,
    c.putDelta,
    c.callDelta2,
    c.putDelta2,
    c.callTheta,
    c.putTheta,
    c.callRho,
    c.putRho,
    c.vega,
    c.gamma)


    co = mibian.BS(opt_info, callPrice=c.callPrice)
    co.impliedVolatility

#### Show simple example of using py_vol package

In [6]:
def _test_py_vollib():
    #CL,Q2019,560P,07/02/2019,0.6,1.61,0.54,1.54,1997,4465
    F = 56.25
    K = 56
    sigma = .366591539
    flag = 'p'
    t = 15/365.0
    r = .025
    discounted_call_price = black.black(flag, F, K, t, r, sigma)
    dcp = 1.54
    ivpy = implied_volatility.implied_volatility(dcp, F, K, r, t, flag)
    ivmn = mibian.BS([F,K,2.5,15], callPrice=dcp).impliedVolatility
    discounted_call_price,ivpy,ivmn


In [7]:
def plotly_plot(df_in,x_column,plot_title=None,
                y_left_label=None,y_right_label=None,
                bar_plot=False,figsize=(16,10),
                number_of_ticks_display=20,
                yaxis2_cols=None):
    ya2c = [] if yaxis2_cols is None else yaxis2_cols
    ycols = [c for c in df_in.columns.values if c != x_column]
    # create tdvals, which will have x axis labels
    td = list(df_in[x_column]) 
    nt = len(df_in)-1 if number_of_ticks_display > len(df_in) else number_of_ticks_display
    spacing = len(td)//nt
    tdvals = td[::spacing]
    
    # create data for graph
    data = []
    # iterate through all ycols to append to data that gets passed to go.Figure
    for ycol in ycols:
        if bar_plot:
            b = go.Bar(x=td,y=df_in[ycol],name=ycol,yaxis='y' if ycol not in ya2c else 'y2')
        else:
            b = go.Scatter(x=td,y=df_in[ycol],name=ycol,yaxis='y' if ycol not in ya2c else 'y2')
        data.append(b)

    # create a layout
    layout = go.Layout(
        title=plot_title,
        xaxis=dict(
            ticktext=tdvals,
            tickvals=tdvals,
            tickangle=45,
            type='category'),
        yaxis=dict(
            title='y main' if y_left_label is None else y_left_label
        ),
        yaxis2=dict(
            title='y alt' if y_right_label is None else y_right_label,
            overlaying='y',
            side='right'),
        margin=Margin(
            b=100
        )        
    )

    fig = go.Figure(data=data,layout=layout)
    return fig


#### Define method to get a contract from postgres

In [8]:
def _next_monthyear_code(contract):
    code_val = contract[-3]
    code_num = DICT_MONTH_NUMS[code_val]
    y = int(contract[-2:])
    if code_num+1>12:
        next_code_num = 1
        next_y = y + 1
    else:
        next_code_num = code_num+1
        next_y = y
    next_code_val = MONTH_CODES[next_code_num-1]
    next_contract = contract[0:-3] + next_code_val + '%02d' %(next_y)
    return next_contract

def get_postgres_data(contract,strike_divisor=None):
    '''
    Get options and underlying data for ONLY ONE CONTRACT
    '''
    osql = f"select * from {opttab} where symbol='{contract}';"
    dfo = pga.get_sql(osql)
    if len(dfo)<10:
        e = f'''
        get_postgres_data ERROR: not enough option data for contract {contract} 
        '''
        raise ValueError(e)
    num_settle_days = len(dfo.settle_date.unique())
    u_contract = contract
    for i in range(12):
        usql = f"select * from {futtab} where symbol='{u_contract}';"
        dfu = pga.get_sql(usql)
        if len(dfu) < num_settle_days:
            u_contract = _next_monthyear_code(u_contract)
            print(f'trying contract {u_contract}')
        else:
            break

    if len(dfu)< num_settle_days:
        e = f'''
        get_postgres_data ERROR: not enough underlying days found for options contract {contract} 
        where len(underlying) = {len(dfu)} and num_settle_days = {num_settle_days}
        '''
        raise ValueError(e)
    # Merge options and futures data
    dfu = dfu.rename(columns={'symbol':'u_symbol'})
    df = dfo.merge(dfu,how='inner',on=['settle_date'])
    # Get options expiration dates
    df_expiry_dates = dfo[['symbol','settle_date']].groupby('symbol',as_index=False).max()
    df_additions = df_expiry_dates_additions[df_expiry_dates_additions.symbol==contract]
    df_additions = df_additions[['symbol','yyyymmdd_option']].rename(columns={'yyyymmdd_option':'settle_date'})
    additional_symbols = df_additions.symbol.values
    df_expiry_dates = df_expiry_dates[~df_expiry_dates.symbol.isin(additional_symbols)]
    df_expiry_dates = df_expiry_dates.append(df_additions).sort_values('symbol').copy()
    if strike_divisor is not None:
        df.strike = df.strike/strike_divisor
    return df,df_expiry_dates

In [9]:
def _get_contract_number_from_symbol(symbol):
    c = symbol[0:2]
    if c in ['CL','CB','ES','GE','NG']:
        return 2
    return 2

### Use py_vol to get options skews by percent in/out of the money (moneyness)

#### Add in even "amount in/out the money strikes, and interpolate their implied vols and skews

In [10]:
import pdb

def get_even_moneyness_strikes(df10):
    # define amounts around the money which will help create strikes to add
    moneyness = np.arange(.7,1.4,.05).round(6)
    # define columns on which to execute groupby
#     gb_cols = ['symbol','settle_date','pc','contract_num','dte','close_y']
    gb_cols = ['symbol','settle_date','contract_num','dte','close_y']
    # define function used in groupby.apply to create strikes and iv's at those strikes
    #   where the strikes are an even amount from the money 
    #   (like .7, .8, ... 1, 1.1, 1.2, etc)
    def _add_even_moneyness_strikes(df):
        # get underlying from first row (the groupby makes them all the same)
        r = df.iloc[0]
        underlying = r.close_y
        # create new rows to append to df, using only the gb_cols
        df_ret1 = df.iloc[:len(moneyness)][gb_cols].copy()
        # add nan iv's !!!! MUST BE np.nan - NOT None
        df_ret1['iv'] = np.nan
        # add new strikes
        df_ret1['strike'] = moneyness * underlying
        # append the new strikes
        try:
            # try with using the sort=True options for versions of pandas after 0.23
            dfa = df.append(df_ret1,ignore_index=True,sort=True).copy()
        except:
            # otherwise do not specify sort
            dfa = df.append(df_ret1,ignore_index=True).copy()
        df_ret2 = dfa.sort_values(['symbol','settle_date','pc','strike'])
        df_ret2 = df_ret2.drop_duplicates(subset='strike')
        # set the index to the strike so that interpolate works
        df_ret2.index = df_ret2.strike
        # create interpolated iv's
        df_ret2['iv'] = df_ret2.iv.interpolate(method='polynomial', order=2)
        # reset the index
        df_ret2.index = list(range(len(df_ret2)))
        return df_ret2

    # start here
    df11 = df10.groupby(gb_cols).apply(_add_even_moneyness_strikes).copy()
    df11.index = list(range(len(df11)))
    df11['moneyness'] = df11.strike / df11.close_y
    df11.moneyness = df11.moneyness.round(4)

    df12 = df11[(df11.moneyness.isin(moneyness)) & (~df11.iv.isna())].copy()
    df12.moneyness  = df12.moneyness - 1
    df12.index = list(range(len(df12)))
    df12_atm = df12[df12.moneyness==0][['symbol','settle_date','pc','iv']]
    df12_atm = df12_atm.rename(columns={'iv':'atm_iv'})
    
    df12_atm = df12_atm.drop_duplicates()
    df12_atm.pc = ''#np.nan
    df12.pc = ''#np.nan

    df12 = df12.merge(df12_atm,on=['symbol','settle_date','pc'],how='inner')

#     df12 = df12.merge(df12_atm,on=['symbol','settle_date'],how='inner')
#     df12['pc'] = np.nan

    df12.moneyness = df12.moneyness.round(4)
    df12['vol_skew'] = (df12.iv - df12.atm_iv).round(4)
    return df12



#### get all contracts in the options database

In [11]:
all_contracts = pga.get_sql(f"select distinct symbol from {opttab} where symbol~'^{SYMBOL_TO_RESEARCH}'").sort_values('symbol').values.reshape(-1)
len(all_contracts)

118

#### show last dates

In [12]:
sql = f'''
select symbol, count(settle_date), max(settle_date) last_date from {opttab} 
where substring(symbol,1,2)='{SYMBOL_TO_RESEARCH}'
group by symbol
order by symbol;
'''
display.display(pga.get_sql(sql))


Unnamed: 0,symbol,count,last_date
0,NGF12,187,20111020
1,NGF13,12465,20121226
2,NGF14,22126,20131226
3,NGF15,22744,20141226
4,NGF16,17591,20151228
...,...,...,...
113,NGZ16,21957,20161125
114,NGZ17,20054,20171127
115,NGZ18,13464,20181127
116,NGZ19,13089,20191125


In [13]:
def create_skew_per_date_df(df):
    '''
    Find the first settle_date whose count of rows is equal to max count of rows.
    '''
    # get the first symbol (which should be the only symbol)
    contract = df.symbol.unique()[0]
    # get just that symbol's data
    df12 = df[df.symbol==contract]
    df_counts = df12[['settle_date','moneyness']].groupby('settle_date',as_index=False).count()
    max_count = df_counts.moneyness.max()
    first_max_count_settle_date = df_counts[df_counts.moneyness==max_count].iloc[0].settle_date

    df_ret2 = df12[df12.settle_date==first_max_count_settle_date][['moneyness']]
    all_settle_dates = sorted(df_counts.settle_date.unique())
    for settle_date in all_settle_dates:
        df_temp = df12[df12.settle_date==settle_date][['moneyness','vol_skew']]
        df_ret2 = df_ret2.merge(df_temp,on='moneyness',how='outer')
        df_ret2 = df_ret2.rename(columns={'vol_skew':str(settle_date)})
    df_ret2 = df_ret2.sort_values('moneyness')
    df_ret2.index = list(range(len(df_ret2)))
    df_ret3 = df_ret2.fillna(0)
    df_ret3['csum'] = df_ret3.apply(lambda r: sum([r[c] for c in df_ret2.columns.values]),axis=1)
    df_csum = df_ret3[['moneyness','csum']].groupby('moneyness',as_index=False).max()
    df_ret4 = df_ret3.merge(df_csum,how='inner',on=['moneyness','csum']).drop_duplicates()
    df_ret4.index = list(range(len(df_ret4)))
    
    # eliminate csum column from returned DataFrame
    df_ret4 = df_ret4[[c for c in df_ret4.columns.values if 'csum' not in c]]
    
    # convert zero values to np.NaN, for those Out of the Money columns
    for col in [c for c in df_ret4.columns.values if 'moneyness' not in c]:
        df_ret4[col] = df_ret4[col].apply(lambda v: np.NaN if v==0.0 else v)
    df_ret4[df_ret4.moneyness==0] = 0.0
    return df_ret4


### Skew per contract

In [14]:
def skew_per_symbol(symbol,strike_divisor=None):
    '''
    For a symbol like CLM16 or EZH19, create 2 Dataframes
      1. df_iv - contains rows of implied vols, for only the 'pseudo' strikes that are an even
                 percent away from the money for each settle_date
      2. df_skew - contains one row per day of skew data of for 'pseudo' strikes that are an even
                 percent away from the money for each settle_date
    '''
    _exception = None
    _stacktrace = None
    df_iv = None
    df_skew = None
    try:
        df,df_expiry_dates = get_postgres_data(symbol)
        if len(df[df.contract_num==2])>0:
            df10 = get_implieds(df,df_expiry_dates,symbol)
            df12 = get_even_moneyness_strikes(df10)
            df_sk = create_skew_per_date_df(df12)
            df_sk.index = list(range(len(df_sk)))
            df_skt = df_sk.T
            df_skt.columns = df_skt.loc['moneyness']
            df_skt = df_skt.iloc[1:].copy()
            df_skt['symbol'] = symbol
            df_skt['settle_date'] = df_skt.index
            df_iv = df12.copy() 
            df_skew = df_skt.copy()
    except Exception as e:
        _exception = str(e)
        _stacktrace = traceback.format_exc()
    return df_iv,df_skew,_exception,_stacktrace

### Show contracts

In [15]:
all_contracts

array(['NGF12', 'NGF13', 'NGF14', 'NGF15', 'NGF16', 'NGF17', 'NGF18',
       'NGF19', 'NGF20', 'NGF21', 'NGG12', 'NGG13', 'NGG14', 'NGG15',
       'NGG16', 'NGG17', 'NGG18', 'NGG19', 'NGG20', 'NGG21', 'NGH12',
       'NGH13', 'NGH14', 'NGH15', 'NGH16', 'NGH17', 'NGH18', 'NGH19',
       'NGH20', 'NGH21', 'NGJ12', 'NGJ13', 'NGJ14', 'NGJ15', 'NGJ16',
       'NGJ17', 'NGJ18', 'NGJ19', 'NGJ20', 'NGJ21', 'NGK12', 'NGK13',
       'NGK14', 'NGK15', 'NGK16', 'NGK17', 'NGK18', 'NGK19', 'NGK20',
       'NGM12', 'NGM13', 'NGM14', 'NGM15', 'NGM16', 'NGM17', 'NGM18',
       'NGM19', 'NGM20', 'NGN11', 'NGN12', 'NGN13', 'NGN14', 'NGN15',
       'NGN16', 'NGN17', 'NGN18', 'NGN19', 'NGN20', 'NGQ11', 'NGQ12',
       'NGQ13', 'NGQ14', 'NGQ15', 'NGQ16', 'NGQ17', 'NGQ18', 'NGQ19',
       'NGQ20', 'NGU11', 'NGU12', 'NGU13', 'NGU14', 'NGU15', 'NGU16',
       'NGU17', 'NGU18', 'NGU19', 'NGU20', 'NGV11', 'NGV12', 'NGV13',
       'NGV14', 'NGV15', 'NGV16', 'NGV17', 'NGV18', 'NGV19', 'NGV20',
       'NGX11', 'NGX

## MAIN LOOP
#### Loop through all contracts and create DataFrames for implied vol and skew (`df_iv_final` and `df_iv_skew`)

In [16]:
strike_div = None if SYMBOL_TO_RESEARCH not in STRIKE_DIVISORS.keys() else STRIKE_DIVISORS[SYMBOL_TO_RESEARCH]
df_iv_final = None
df_iv_skew = None
dict_exceptions = {}
dict_stacktraces = {}
contracts = all_contracts
if SYMBOL_TO_RESEARCH in ['ES','GE']:
    contracts = [c for c in all_contracts if c[-3] in ['H','M','U','Z']]
for contract in tqdm_notebook(contracts):
    df12,df_skew,_exception,_stacktrace = skew_per_symbol(contract,strike_divisor=strike_div)
    if _exception is not None:
        dict_exceptions[contract] = _exception
        dict_stacktraces[contract] = _stacktrace
        continue
    
    if (df12 is None or len(df12)<1) or (df_skew is None or len(df_skew)<1):
        if (df12 is None or len(df12)<1):
            dict_exceptions[contract] = "No data returned for df in skew_per_symbol"
        if (df_skew is None or len(df_skew)<1):
            dict_exceptions[contract] = "No data returned for df_skew in skew_per_symbol"
        continue
    if df12 is not None:
        if df_iv_final is None:
            df_iv_final = df12.copy()
        else:
            df_iv_final = df_iv_final.append(df12,ignore_index=True)
        if df_iv_skew is None:
            df_iv_skew = df_skew.copy()
        else:
            df_iv_skew = df_iv_skew.append(df_skew,ignore_index=True)
            df_iv_skew.index = list(range(len(df_iv_skew)))
    
df_iv_final = df_iv_final.sort_values(['settle_date','moneyness'])

HBox(children=(IntProgress(value=0, max=118), HTML(value='')))

2020-03-07 12:37:57,797 - numexpr.utils - INFO - NumExpr defaulting to 4 threads.





### save to csv and print any exceptions that might have occured

In [17]:
print(dict_exceptions)
df_iv_final.to_csv(f'./temp_folder/df_iv_final_{SYMBOL_TO_RESEARCH}.csv',index=False)
df_iv_skew.to_csv(f'./temp_folder/df_iv_skew_{SYMBOL_TO_RESEARCH}.csv',index=False)

{'NGF12': 'No data returned for df_skew in skew_per_symbol', 'NGF21': 'No data returned for df_skew in skew_per_symbol', 'NGG12': 'No data returned for df_skew in skew_per_symbol', 'NGG21': 'No data returned for df_skew in skew_per_symbol', 'NGH12': 'No data returned for df_skew in skew_per_symbol', 'NGH21': 'No data returned for df_skew in skew_per_symbol', 'NGJ12': 'No data returned for df_skew in skew_per_symbol', 'NGJ21': 'No data returned for df_skew in skew_per_symbol', 'NGK12': 'No data returned for df_skew in skew_per_symbol', 'NGM12': 'No data returned for df_skew in skew_per_symbol', 'NGM20': 'No data returned for df_skew in skew_per_symbol', 'NGN11': 'No data returned for df_skew in skew_per_symbol', 'NGN20': 'No data returned for df_skew in skew_per_symbol', 'NGQ11': 'index 0 is out of bounds for axis 0 with size 0', 'NGQ20': 'No data returned for df_skew in skew_per_symbol', 'NGU11': 'No data returned for df_skew in skew_per_symbol', 'NGU20': 'No data returned for df_skew 

In [32]:
SYMBOL_TO_RESEARCH = 'ES'
df_iv_final = pd.read_csv(f'./temp_folder/df_iv_final_{SYMBOL_TO_RESEARCH}.csv')
df_iv_skew = pd.read_csv(f'./temp_folder/df_iv_skew_{SYMBOL_TO_RESEARCH}.csv')

### graph skew

In [33]:
def graph_skew(df,do_plot=False):
    '''
    Graph skew for ONLY ONE symbol.
    If df contains more than one symbol, we will only graph the first symbol in the DataFrames    
    '''
    # get the first symbol (which should be the only symbol)
    contract = df.symbol.unique()[0]
    dfp = create_skew_per_date_df(df_iv_final[df_iv_final.symbol==contract])
    
#     display.display(dfp)
    settle_dates = sorted([c for c in dfp.columns.values if c != 'moneyness'])
    splits = list(np.arange(5,len(settle_dates),5))
#     settle_date_strings = [str(yyyymmdd) for yyyymmdd in settle_dates]
    settle_date_groups = np.split(np.array(settle_dates),splits)
    ret_figs = []
    for sdg in settle_date_groups:
        sdg_sorted = [str(c) for c in sorted(sdg)]
        cols = ['moneyness']+list(sdg_sorted)
        dfp_sub = dfp[cols]
        t = f"{contract} {sdg[0]} - {sdg[-1]}"
        f = plotly_plot(dfp_sub,x_column='moneyness',plot_title=t,y_left_label='vol skew')
        ret_figs.append(f)
        if do_plot:
            iplot(f)
    return ret_figs

#### Do the same plots as above, but using a grid

In [34]:
def graph_skew_subplots(df):
    rfs = graph_skew(df)
    n = 4   
    # using list comprehension 
    rfs_groups = [rfs[i*n:(i + 1)*n] for i in range((len(rfs) + n - 1) // n )]  
    for rfs_group in rfs_groups:
        iplot(graph_skew_subplot_quad(rfs_group))
    
def graph_skew_subplot_quad(rfs):
    '''
    Use subplots to output the results of the method graph_skew above
    '''
    rows = 2#len(rfs)//2
    f1 = tls.make_subplots(rows=rows, cols=2,  
        shared_yaxes=False, 
        subplot_titles=[rfs[i]['layout'].title for i in range(len(rfs))],
        horizontal_spacing=0.09,
        vertical_spacing=0.11,                       
        print_grid=False)

    pl_width=900
    pl_height=800 
    title = 'Skew plots<br>'

    f1['layout'].update(title=title,                                 
        font= Font(family="Open Sans, sans-serif"),
        showlegend=True,     
        hovermode='x',  
        autosize=True,       
        width=pl_width,       
        height=pl_height,
        plot_bgcolor='#EFECEA', 
        bargap=0.05,
        margin=Margin(
                      l=45,
                      r=15,
                      b=55,
                      t=50
        )
    )    
    for i in range(len(rfs)):
        x = int(i/2) + 1
        y = i % 2 + 1
        f = rfs[i]
        l = f.layout
        
        try:
            yaxis = f'yaxis{i+1}'
            xaxis = f'xaxis{i+1}'
            if i < 10:
                yaxis = yaxis.replace('1','') 
                xaxis = xaxis.replace('1','') 
            f1['layout'].update({xaxis:l.xaxis})
            f1['layout'].update({yaxis:l.yaxis})
#             gname = f'chart {x,y}'#rfs[i]['layout'].title
            gname = f'c{x}-{y}'#rfs[i]['layout'].title
            for d in f.data:
                data_y = f'y{i+1}'.replace('1','') 
                d['yaxis']=data_y
                d['legendgroup'] =  gname
#                 d['name'] = f"{gname} {d.name}"
                d['name'] = d.name
                f1.append_trace(d,x,y)
            pass
        except Exception as e:
            ipdb.set_trace()
            print(f'graph_skew_subplots ERRORS: {str(e)}')
    return f1
# iplot(f1)


In [35]:
df_ivf2 = df_iv_final[df_iv_final.symbol.str.contains(f'{SYMBOL_TO_RESEARCH}')]
df_ivf2.sort_values(['settle_date','symbol','dte','strike'],ascending=False).head()

Unnamed: 0,close_x,close_y,contract_num,dte,iv,pc,settle_date,strike,symbol,moneyness,atm_iv,vol_skew
31172,,2953.75,2,105,0.213087,,20200306,3987.5625,ESM20,0.35,0.315675,-0.1026
31171,,2953.75,2,105,0.203855,,20200306,3839.875,ESM20,0.3,0.315675,-0.1118
31170,,2953.75,2,105,0.197637,,20200306,3692.1875,ESM20,0.25,0.315675,-0.118
31169,,2953.75,2,105,0.196792,,20200306,3544.5,ESM20,0.2,0.315675,-0.1189
31168,,2953.75,2,105,0.206317,,20200306,3396.8125,ESM20,0.15,0.315675,-0.1094


In [36]:
mcs = {'F':1,'G':2,'H':3,'J':4,'K':5,'M':6,'N':7,'Q':8,'U':9,'V':10,'X':11,'Z':12}
# sorted(list(set(df_iv_final.symbol)))
sorted([f'{s[:2]}{(2000+int(s[-2:]))*100 + int(mcs[s[-3]])}' for s in set(df_iv_final.symbol)])

['ES201106',
 'ES201109',
 'ES201112',
 'ES201203',
 'ES201206',
 'ES201209',
 'ES201212',
 'ES201303',
 'ES201306',
 'ES201309',
 'ES201312',
 'ES201403',
 'ES201406',
 'ES201409',
 'ES201412',
 'ES201503',
 'ES201506',
 'ES201509',
 'ES201512',
 'ES201603',
 'ES201606',
 'ES201609',
 'ES201612',
 'ES201703',
 'ES201706',
 'ES201709',
 'ES201712',
 'ES201803',
 'ES201806',
 'ES201809',
 'ES201812',
 'ES201903',
 'ES201906',
 'ES201909',
 'ES201912',
 'ES202003',
 'ES202006']

In [37]:
grid_plot=True
# clist = [c for c in all_contracts if (c[:2]==f'{SYMBOL_TO_RESEARCH}') & (int(c[-2:])>=19)]
clist = [c for c in df_iv_final.symbol.unique() if (c[:2]==f'{SYMBOL_TO_RESEARCH}') & (int(c[-2:])>=19)]
for c in clist:
    dft = df_iv_final[df_iv_final.symbol==c]
    if len(dft)<=0:
        print(f'no data for symbol {c}')
        continue
    if grid_plot:
        graph_skew_subplots(dft)
    else:
        rls = graph_skew(dft,do_plot=True)        

### Create charts of Historical Vol Skew vs Atm Vol and Cash Price

In [23]:
pga = db_info.get_db_info('local')

  sec_db


In [24]:
cash_sql = f"select * from sec_schema.underlying_table where symbol='{SYMBOL_TO_RESEARCH}Z99';"
df_cash_futures = pga.get_sql(cash_sql)
print(len(df_cash_futures))
df_cash_futures.to_csv(f'./temp_folder/df_cash_futures_{SYMBOL_TO_RESEARCH}.csv',index=False)

2378


In [25]:
df_iv_final[['settle_date','atm_iv']].drop_duplicates()

Unnamed: 0,settle_date,atm_iv
12220,20120502,0.609658
14211,20120612,0.519730
15973,20120629,0.562012
15986,20120716,0.570490
15999,20120719,0.533213
...,...,...
10318,20200228,1.223325
10328,20200302,1.594948
10337,20200303,1.869879
10345,20200304,2.164280


In [26]:
def plot_skew_vs_atm(df_iv_final,df_iv_skew,df_cash_futures,dist_from_zero=.1):
    # Step 01: create df_skew_2, which holds skew difference between 
    #   positive dist_from_zero and negative dist_from_zero, for each settle_date
    df_skew_2 = df_iv_skew.copy()
    df_skew_2.index.name = None
    skew_range_col = f'iv_skew'
    df_skew_2[skew_range_col] = df_skew_2[dist_from_zero] - df_skew_2[-dist_from_zero]
    df_skew_2.settle_date = df_skew_2.settle_date.astype(int)
    df_skew_2 = df_skew_2[['settle_date',skew_range_col]]
    
    # Step 02: create atm implied vol table, that also has the cash price for each settle_date
    df_atmv = df_iv_final[['settle_date','atm_iv']].drop_duplicates()
    df_cf = df_cash_futures[df_cash_futures.symbol==f'{SYMBOL_TO_RESEARCH}Z99']
    df_atmv = df_atmv.merge(df_cf[['settle_date','close']],how='inner',on='settle_date')
    
    # Step 03: merge skew and atm vol/close tables
    df_ivs = df_skew_2.merge(df_atmv,how='inner',on='settle_date')
    df_ivs = df_ivs.sort_values('settle_date')
    
    # Step 04: plot skew vs atm_iv
    chart_title = f'{SYMBOL_TO_RESEARCH} skew {dist_from_zero*100}% up and down vs atm vol'
    df_ivs_skew_vs_atm_iv = df_ivs[['settle_date',skew_range_col,'atm_iv']]
    iplot(plotly_plot(df_ivs_skew_vs_atm_iv,x_column='settle_date',yaxis2_cols=['atm_iv'],
                      y_left_label='iv_skew',y_right_label='atm_iv',plot_title=chart_title))
    
    # Step 05: plot skew vs close
    chart_title = f'{SYMBOL_TO_RESEARCH} skew {dist_from_zero*100}% up and down vs close'
    df_ivs_skew_vs_close = df_ivs[['settle_date',skew_range_col,'close']]
    iplot(plotly_plot(df_ivs_skew_vs_close,x_column='settle_date',yaxis2_cols=['close'],
                      y_left_label='iv_skew',y_right_label='close',plot_title=chart_title))

    
def plot_atm_vs_close(df_iv_final,df_cash_futures):
    # Step 01: create atm implied vol table, that also has the cash price for each settle_date
    df_atmv = df_iv_final[['settle_date','atm_iv']].drop_duplicates()
    df_cf = df_cash_futures[df_cash_futures.symbol==f'{SYMBOL_TO_RESEARCH}Z99']
    df_atmv = df_atmv.merge(df_cf[['settle_date','close']],how='inner',on='settle_date')

    # Step 02: plot atm_iv vs close
    chart_title = f'{SYMBOL_TO_RESEARCH} atm vol vs close'
    df_atm_vs_close = df_atmv[['settle_date','atm_iv','close']]
    iplot(plotly_plot(df_atm_vs_close,x_column='settle_date',yaxis2_cols=['close'],
                      y_left_label='atm_iv',y_right_label='close',plot_title=chart_title))
    return df_atm_vs_close
dfff = plot_atm_vs_close(df_iv_final,df_cash_futures)

# for d in [.05,.1,.2]:
#     plot_skew_vs_atm(df_iv_final,df_iv_skew,df_cash_futures,d)

### Plot all skews vs atm vol on one chart

In [27]:
def plot_skew_vs_atm(df_iv_final,df_iv_skew):
    df_skew_2 = df_iv_skew.copy()
    df_skew_2.index.name = None
    df_skew_2.settle_date = df_skew_2.settle_date.astype(int)
    skew_range_cols = []
    for dist_from_zero in [.05,.1,.25]:
        dfz_str = "{0:.2f}".format( dist_from_zero )
        skew_range_col = f'iv_skew_{dfz_str}'
        df_skew_2[skew_range_col] = df_skew_2[dist_from_zero] - df_skew_2[-dist_from_zero]
        skew_range_cols.append(skew_range_col)
        
    df_skew_2 = df_skew_2[['settle_date'] + skew_range_cols]
    df_atmv = df_iv_final[['settle_date','atm_iv']].drop_duplicates()

    df_ivs = df_skew_2.merge(df_atmv,how='inner',on='settle_date')
    df_ivs = df_ivs[['settle_date','atm_iv'] + skew_range_cols]
    df_ivs = df_ivs.sort_values('settle_date')
    
    chart_title = f'{SYMBOL_TO_RESEARCH} skew vs atm vol'
    iplot(plotly_plot(df_ivs,x_column='settle_date',yaxis2_cols=skew_range_cols,
                      y_left_label='atm_iv',y_right_label='iv_skew',plot_title=chart_title))
    return df_ivs
    
plot_skew_vs_atm(df_iv_final,df_iv_skew)

Unnamed: 0,settle_date,atm_iv,iv_skew_0.05,iv_skew_0.10,iv_skew_0.25
949,20120502,0.609658,-0.0053,-0.0153,
1100,20120612,0.519730,-0.0071,-0.0149,
1249,20120629,0.562012,-0.0099,-0.0196,-0.0390
1250,20120716,0.570490,-0.0094,-0.0183,-0.0486
1251,20120719,0.533213,-0.0132,-0.0270,-0.0732
...,...,...,...,...,...
798,20200228,1.223325,-0.0814,-0.1476,
799,20200302,1.594948,-0.1176,-0.2138,
800,20200303,1.869879,-0.1306,,
801,20200304,2.164280,-0.1595,,


In [28]:
df_cash_futures[(df_cash_futures.settle_date>=20150915) & (df_cash_futures.settle_date<=20151115)]

Unnamed: 0,symbol,settle_date,contract_num,open,high,low,close,adj_close,volume,open_interest
1264,NGZ99,20150915,120,0.0,2.73,2.73,2.73,2.73,0,0
1265,NGZ99,20150916,120,0.0,2.68,2.68,2.68,2.68,0,0
1266,NGZ99,20150917,120,0.0,2.68,2.68,2.68,2.68,0,0
1267,NGZ99,20150918,120,0.0,2.63,2.63,2.63,2.63,0,0
1268,NGZ99,20150921,120,0.0,2.59,2.59,2.59,2.59,0,0
1269,NGZ99,20150922,120,0.0,2.59,2.59,2.59,2.59,0,0
1270,NGZ99,20150923,120,0.0,2.59,2.59,2.59,2.59,0,0
1271,NGZ99,20150924,120,0.0,2.56,2.56,2.56,2.56,0,0
1272,NGZ99,20150925,120,0.0,2.54,2.54,2.54,2.54,0,0
1273,NGZ99,20150928,120,0.0,2.63,2.63,2.63,2.63,0,0


## END

In [39]:
!pip list|grep cuff


You should consider upgrading via the 'pip install --upgrade pip' command.[0m
