In [13]:
#import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error



### deribit API Request

In [14]:
""" Download CC-Option Data from Deribit via public API """

"""
Matteo Bottacini -- matteo.bottacini@usi.ch
"""

# import modules
import json
import requests
import pandas as pd
from tqdm import tqdm
import sqlite3
import datetime


# functions
def get_option_name_and_settlement(coin):
    """
    :param coin: crypto-currency coin name ('BTC', 'ETH')
    :return: 2 lists:
                        1.  list of traded options for the selected coin;
                        2.  list of settlement period for the selected coin.
    """

    # requests public API
    r = requests.get("https://test.deribit.com/api/v2/public/get_instruments?currency=" + coin + "&kind=option")
    result = json.loads(r.text)

    # get option name
    name = pd.json_normalize(result['result'])['instrument_name']
    name = list(name)

    # get option settlement period
    settlement_period = pd.json_normalize(result['result'])['settlement_period']
    settlement_period = list(settlement_period)

    return name, settlement_period


def get_option_data(coin):
    """
    :param coin: crypto-currency coin name ('BTC', 'ETH')
    :return: pandas data frame with all option data for a given coin
    """

    # get option name and settlement
    coin_name = get_option_name_and_settlement(coin)[0]
    settlement_period = get_option_name_and_settlement(coin)[1]

    # initialize data frame
    coin_df = []

    # initialize progress bar
    pbar = tqdm(total=len(coin_name))

    # loop to download data for each Option Name
    for i in range(len(coin_name)):
        # download option data -- requests and convert json to pandas
        r = requests.get('https://test.deribit.com/api/v2/public/get_order_book?instrument_name=' + coin_name[i])
        result = json.loads(r.text)
        df = pd.json_normalize(result['result'])

        # add settlement period
        df['settlement_period'] = settlement_period[i]

        # append data to data frame
        coin_df.append(df)

        # update progress bar
        pbar.update(1)

    # finalize data frame
    coin_df = pd.concat(coin_df)

    # remove useless columns from coin_df
    columns = ['state', 'estimated_delivery_price']
    coin_df.drop(columns, inplace=True, axis=1)

    # close the progress bar
    pbar.close()

    return coin_df




In [16]:
# print data and time for log
print('Date and time: ' +  datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S") + ' , format: dd/mm/yyyy hh:mm:ss')

# download data -- BTC and ETH Options
btc_data = get_option_data('BTC')
eth_data = get_option_data('ETH')

# The data is now stored in Pandas DataFrames 'btc_data' and 'eth_data'
print('BTC data collected')
print(btc_data.head()) # Display first few rows of BTC data
print('ETH data collected')
print(eth_data.head()) # Display first few rows of ETH data

Date and time: 01/02/2024 16:18:18 , format: dd/mm/yyyy hh:mm:ss


  coin_df = pd.concat(coin_df)
100%|██████████| 674/674 [03:30<00:00,  3.20it/s]
  coin_df = pd.concat(coin_df)
100%|██████████| 616/616 [03:10<00:00,  3.23it/s]

BTC data collected
   best_bid_amount  best_ask_amount  bid_iv  ask_iv underlying_index  \
0              0.2              0.0     0.0    0.00       BTC-2FEB24   
0              0.0              0.0     0.0    0.00       BTC-2FEB24   
0              1.0              1.0     0.0  507.30       BTC-2FEB24   
0              0.0             11.1     0.0  377.41       BTC-2FEB24   
0              1.0             10.0     0.0  370.25       BTC-2FEB24   

   underlying_price  mark_iv  interest_rate  best_bid_price  best_ask_price  \
0          43098.65    100.0            0.0          0.4065          0.0000   
0          43096.35    100.0            0.0          0.0000          0.0000   
0          43096.35    100.0            0.0          0.0001          0.3050   
0          43096.35    100.0            0.0          0.0000          0.0001   
0          43095.16    100.0            0.0          0.0001          0.1905   

   ...  greeks.theta  greeks.vega  greeks.gamma  greeks.delta  \
0  ...  




In [None]:
btc_data.to_csv('data/btc_data.csv', index=False)
eth_data.to_csv('data/eth_data.csv', index=False)

In [None]:
btc_data.head()

Unnamed: 0,best_bid_amount,best_ask_amount,bid_iv,ask_iv,underlying_index,underlying_price,mark_iv,interest_rate,best_bid_price,best_ask_price,...,greeks.vega,greeks.gamma,greeks.delta,stats.volume_usd,stats.volume,stats.price_change,stats.low,stats.high,last_trade_date,settlement_period
0,0.2,0.0,0.0,0.0,BTC-2FEB24,43091.53,100.0,0.0,0.4065,0.0,...,0.0,0.0,1.0,0.0,0.0,,,,,week
0,0.0,0.0,0.0,0.0,BTC-2FEB24,43091.53,100.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,,,,,week
0,1.0,1.0,0.0,499.41,BTC-2FEB24,43091.53,100.0,0.0,0.0001,0.305,...,0.0,0.0,1.0,60021.03,5.0,0.0,0.2848,0.2848,,week
0,0.0,12.1,0.0,367.7,BTC-2FEB24,43091.85,100.0,0.0,0.0,0.0001,...,0.0,0.0,0.0,0.0,0.0,,,,,week
0,1.0,0.1,0.0,0.0,BTC-2FEB24,43091.85,100.0,0.0,0.0001,0.178,...,0.0,0.0,1.0,186875.21,25.0,0.0,0.1764,0.1764,,week


In [None]:
btc_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 674 entries, 0 to 0
Data columns (total 34 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   best_bid_amount     674 non-null    float64
 1   best_ask_amount     674 non-null    float64
 2   bid_iv              674 non-null    float64
 3   ask_iv              674 non-null    float64
 4   underlying_index    674 non-null    object 
 5   underlying_price    674 non-null    float64
 6   mark_iv             674 non-null    float64
 7   interest_rate       674 non-null    float64
 8   best_bid_price      674 non-null    float64
 9   best_ask_price      674 non-null    float64
 10  open_interest       674 non-null    float64
 11  max_price           674 non-null    float64
 12  min_price           674 non-null    float64
 13  last_price          546 non-null    float64
 14  asks                674 non-null    object 
 15  bids                674 non-null    object 
 16  settlement_pric

In [None]:
eth_data.head()

Unnamed: 0,best_bid_amount,best_ask_amount,bid_iv,ask_iv,underlying_index,underlying_price,mark_iv,interest_rate,best_bid_price,best_ask_price,...,greeks.vega,greeks.gamma,greeks.delta,stats.volume_usd,stats.volume,stats.price_change,stats.low,stats.high,last_trade_date,settlement_period
0,19.0,18.0,0.0,511.52,ETH-2FEB24,2301.04,100.0,0.0,0.0001,0.306,...,0.0,0.0,1.0,0.0,0.0,,,,,week
0,0.0,120.0,0.0,397.78,ETH-2FEB24,2301.04,100.0,0.0,0.0,0.0002,...,0.0,0.0,0.0,0.0,0.0,,,,,week
0,0.0,0.0,0.0,0.0,ETH-2FEB24,2301.04,100.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,,,,,week
0,0.0,0.0,0.0,0.0,ETH-2FEB24,2301.04,100.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,,,,,week
0,17.0,16.0,0.0,502.8,ETH-2FEB24,2301.01,100.0,0.0,0.0001,0.224,...,0.0,0.0,1.0,0.0,0.0,,,,,week


In [None]:
eth_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 616 entries, 0 to 0
Data columns (total 34 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   best_bid_amount     616 non-null    float64
 1   best_ask_amount     616 non-null    float64
 2   bid_iv              616 non-null    float64
 3   ask_iv              616 non-null    float64
 4   underlying_index    616 non-null    object 
 5   underlying_price    616 non-null    float64
 6   mark_iv             616 non-null    float64
 7   interest_rate       616 non-null    float64
 8   best_bid_price      616 non-null    float64
 9   best_ask_price      616 non-null    float64
 10  open_interest       616 non-null    float64
 11  max_price           616 non-null    float64
 12  min_price           616 non-null    float64
 13  last_price          466 non-null    float64
 14  asks                616 non-null    object 
 15  bids                616 non-null    object 
 16  settlement_pric