In [2]:

import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import seaborn as sns
%config InlineBackend.figure_format ='retina'

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

## Import data

In [8]:
#btc options data
btc_data=pd.read_csv('data/btc_data.csv', sep=",")
#eth options data 
eth_data=pd.read_csv('data/eth_data.csv', sep=",")

* best_bid_amount, best_ask_amount: The highest amount a buyer is willing to pay (bid) and the lowest amount a seller is willing to accept (ask).

* bid_iv, ask_iv: The implied volatility (IV) on the bid and ask side. IV is a measure of the market's expectation of the future volatility of the option's underlying asset.

* underlying_index: The specific BTC option series, likely indicating the underlying asset and expiry date.

* underlying_price: The current price of the underlying asset, in this case, Bitcoin.

* best_bid_price, best_ask_price: The best bid and ask prices available for the option.

* mark_iv: The market implied volatility, representing a weighted average of the bid and ask implied volatilities.

* interest_rate: The interest rate, which can affect the option's pricing model.

* greeks: Metrics that represent the sensitivity of the option's price to various factors, including theta (time decay), vega (volatility), gamma (rate of change of delta), and delta (rate of change of option price with respect to the underlying asset's price).

* stats: Various statistics related to the option, including volume (in USD and unit), price change, and the low/high price within a certain period.

* settlement_period: The period over which the option settles, such as daily.

### EDA - BTC

In [9]:
btc_data.head()

Unnamed: 0,best_bid_amount,best_ask_amount,bid_iv,ask_iv,underlying_index,underlying_price,mark_iv,interest_rate,best_bid_price,best_ask_price,...,greeks.vega,greeks.gamma,greeks.delta,stats.volume_usd,stats.volume,stats.price_change,stats.low,stats.high,last_trade_date,settlement_period
0,0.2,0.0,0.0,0.0,BTC-2FEB24,43091.53,100.0,0.0,0.4065,0.0,...,0.0,0.0,1.0,0.0,0.0,,,,,week
1,0.0,0.0,0.0,0.0,BTC-2FEB24,43091.53,100.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,,,,,week
2,1.0,1.0,0.0,499.41,BTC-2FEB24,43091.53,100.0,0.0,0.0001,0.305,...,0.0,0.0,1.0,60021.03,5.0,0.0,0.2848,0.2848,,week
3,0.0,12.1,0.0,367.7,BTC-2FEB24,43091.85,100.0,0.0,0.0,0.0001,...,0.0,0.0,0.0,0.0,0.0,,,,,week
4,1.0,0.1,0.0,0.0,BTC-2FEB24,43091.85,100.0,0.0,0.0001,0.178,...,0.0,0.0,1.0,186875.21,25.0,0.0,0.1764,0.1764,,week


In [11]:
btc_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 674 entries, 0 to 673
Data columns (total 34 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   best_bid_amount     674 non-null    float64
 1   best_ask_amount     674 non-null    float64
 2   bid_iv              674 non-null    float64
 3   ask_iv              674 non-null    float64
 4   underlying_index    674 non-null    object 
 5   underlying_price    674 non-null    float64
 6   mark_iv             674 non-null    float64
 7   interest_rate       674 non-null    float64
 8   best_bid_price      674 non-null    float64
 9   best_ask_price      674 non-null    float64
 10  open_interest       674 non-null    float64
 11  max_price           674 non-null    float64
 12  min_price           674 non-null    float64
 13  last_price          546 non-null    float64
 14  asks                674 non-null    object 
 15  bids                674 non-null    object 
 16  settleme

In [10]:
btc_data.isnull().sum()

best_bid_amount         0
best_ask_amount         0
bid_iv                  0
ask_iv                  0
underlying_index        0
underlying_price        0
mark_iv                 0
interest_rate           0
best_bid_price          0
best_ask_price          0
open_interest           0
max_price               0
min_price               0
last_price            128
asks                    0
bids                    0
settlement_price       88
instrument_name         0
mark_price              0
index_price             0
change_id               0
timestamp               0
greeks.rho              0
greeks.theta            0
greeks.vega             0
greeks.gamma            0
greeks.delta            0
stats.volume_usd        0
stats.volume            0
stats.price_change    204
stats.low             204
stats.high            204
last_trade_date       674
settlement_period       0
dtype: int64

In [7]:
btc_data_key= btc_data
key_metrics = ['best_bid_price', 'best_ask_price', 'bid_iv', 'ask_iv', 'mark_iv', 
               'greeks.theta', 'greeks.vega', 'greeks.gamma', 'greeks.delta']

descriptive_stats = btc_data_key[key_metrics].describe()

# Additionally, calculate the interquartile range (IQR) to identify potential outliers
Q1 = btc_data[key_metrics].quantile(0.25)
Q3 = btc_data[key_metrics].quantile(0.75)
IQR = Q3 - Q1

descriptive_stats, IQR


(       best_bid_price  best_ask_price      bid_iv      ask_iv     mark_iv  \
 count      638.000000      638.000000  638.000000  638.000000  638.000000   
 mean         0.073421        0.151175   25.336473  102.006677   68.733135   
 std          0.182739        0.257666   37.512781  115.036749   19.864182   
 min          0.000000        0.000000    0.000000    0.000000   20.540000   
 25%          0.000000        0.001000    0.000000   49.355000   54.962500   
 50%          0.000100        0.050000    0.000000   66.760000   62.710000   
 75%          0.050875        0.176750   51.632500  134.367500   82.500000   
 max          1.550000        1.434000  583.450000  999.000000  100.000000   
 
        greeks.theta  greeks.vega  greeks.gamma  greeks.delta  
 count    638.000000   638.000000    638.000000    638.000000  
 mean     -35.413328    42.516739      0.000053     -0.007673  
 std       46.174621    45.257127      0.000086      0.608578  
 min     -311.328770     0.000000      0

In [None]:
eth_data.head()

Unnamed: 0,best_bid_amount,best_ask_amount,bid_iv,ask_iv,underlying_index,underlying_price,mark_iv,interest_rate,best_bid_price,best_ask_price,...,greeks.theta,greeks.vega,greeks.gamma,greeks.delta,stats.volume_usd,stats.volume,stats.price_change,stats.low,stats.high,settlement_period
0,0.0,0.0,0.0,0.0,SYN.ETH-28JAN24,2276.8832,222.73,0.0,0.0,0.0,...,-2.21171,0.01986,0.00025,0.98721,0.0,0.0,,,,day
1,0.0,0.0,0.0,0.0,SYN.ETH-28JAN24,2276.9332,222.73,0.0,0.0,0.0,...,-0.60758,0.01984,0.00025,-0.01278,0.0,0.0,,,,day
2,0.0,0.0,0.0,0.0,SYN.ETH-28JAN24,2276.9332,222.73,0.0,0.0,0.0,...,-5.16421,0.04637,0.00058,0.96511,0.0,0.0,,,,day
3,0.0,0.0,0.0,0.0,SYN.ETH-28JAN24,2276.9332,222.73,0.0,0.0,0.0,...,-1.8895,0.04637,0.00058,-0.03489,45.27,10.0,0.0,0.002,0.002,day
4,0.0,0.0,0.0,0.0,SYN.ETH-28JAN24,2276.9332,222.73,0.0,0.0,0.0,...,-9.97799,0.0896,0.00111,0.91982,0.0,0.0,,,,day


In [None]:
eth_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 606 entries, 0 to 605
Data columns (total 33 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   best_bid_amount     606 non-null    float64
 1   best_ask_amount     606 non-null    float64
 2   bid_iv              606 non-null    float64
 3   ask_iv              606 non-null    float64
 4   underlying_index    606 non-null    object 
 5   underlying_price    606 non-null    float64
 6   mark_iv             606 non-null    float64
 7   interest_rate       606 non-null    float64
 8   best_bid_price      606 non-null    float64
 9   best_ask_price      606 non-null    float64
 10  mark_price          606 non-null    float64
 11  open_interest       606 non-null    float64
 12  max_price           606 non-null    float64
 13  min_price           606 non-null    float64
 14  settlement_price    582 non-null    float64
 15  last_price          373 non-null    float64
 16  asks    