## Deribit API Request  - For BTC and ETH Options Data
#### ALL Options data 

### Specfic Settlement Period ( Day, Week, Month) 

In [13]:
import pandas as pd
import json
import requests
import re
from datetime import datetime
from tqdm import tqdm
import concurrent.futures
import time 

# Functions
def get_option_name_and_settlement(coin):
    """
    :param coin: crypto-currency coin name ('BTC', 'ETH')
    :return: 2 lists:
                        1. list of traded options for the selected coin;
                        2. list of settlement period for the selected coin.
    """
    r = requests.get("https://test.deribit.com/api/v2/public/get_instruments?currency=" + coin + "&kind=option")
    result = json.loads(r.text)
    # Get option name
    name = pd.json_normalize(result['result'])['instrument_name']
    name = list(name)

    # Get option settlement period
    settlement_period = pd.json_normalize(result['result'])['settlement_period']
    settlement_period = list(settlement_period)

    return name, settlement_period 

def extract_details(instrument_name, coin):
    """
    Extract expiration date, strike price, and option type (call or put) from instrument name.
    Adjusts to include selected coin (e.g., 'BTC' or 'ETH').
    """
    match = re.match(fr"{coin}-(\d+[A-Z]{{3}}\d+)-(\d+)-([CP])", instrument_name)
    if match:
        expiration_date = match.group(1)
        strike_price = match.group(2)
        option_type = 'Call' if match.group(3) == 'C' else 'Put'
        return expiration_date, strike_price, option_type
    return None, None, None

def fetch_option_data(option_name):
    """Fetch the option data for a given option name with a small delay to avoid rate limiting, and select only specific columns."""
    time.sleep(0.1)  # Add a short delay to avoid hitting rate limits
    r = requests.get(f'https://test.deribit.com/api/v2/public/get_order_book?instrument_name={option_name}')
    result = json.loads(r.text)
    
    # Normalize the JSON data and filter for required columns
    df = pd.json_normalize(result['result'])
    selected_columns = ["instrument_name", "mark_price", "underlying_price", "mark_iv", "greeks.vega"]
    return df[selected_columns]

def get_option_data(coin, settlement_per):
    # Get option name and settlement
    coin_name, settlement_period = get_option_name_and_settlement(coin)
    # Filter options that have the specified settlement period
    coin_name_filtered = [coin_name[i] for i in range(len(coin_name)) if settlement_period[i] == settlement_per]
    # Initialize progress bar
    pbar = tqdm(total=len(coin_name_filtered))

    # Fetch data concurrently using ThreadPoolExecutor
    with concurrent.futures.ThreadPoolExecutor() as executor:
        future_to_option = {executor.submit(fetch_option_data, name): name for name in coin_name_filtered}
        coin_df = []
        for future in concurrent.futures.as_completed(future_to_option):
            try:
                data = future.result()
                data['settlement_period'] = settlement_per
                coin_df.append(data)
            except Exception as exc:
                print(f'Error fetching data: {exc}')
            pbar.update(1)

    # Finalize DataFrame
    if len(coin_df) > 0:
        coin_df = pd.concat(coin_df)
    # Extract expiration date, strike price, and option type
    coin_df['Expiration Date'], coin_df['Strike Price'], coin_df['Option Type'] = zip(*coin_df['instrument_name'].apply(lambda x: extract_details(x, coin)))
    # Calculate time to expiration
    today = datetime.today()
    coin_df['Time to Expiration'] = coin_df['Expiration Date'].apply(lambda x: (datetime.strptime(x, '%d%b%y') - today).days / 365 if x else None)
    # Select the final columns
    final_columns = ["instrument_name", "Option Type", 'mark_price', 'underlying_price', 'mark_iv', 'greeks.vega', 'Expiration Date', 'Strike Price', 'Time to Expiration']
    coin_df = coin_df[final_columns]
    coin_df.to_csv('data/data.csv', index=False)
    pbar.close()
    return coin_df


In [14]:
data = get_option_data('BTC','month')

data['Strike Price'] = pd.to_numeric(data['Strike Price'], errors='coerce').astype('float64')

 63%|██████▎   | 402/634 [00:52<00:20, 11.23it/s]

Error fetching data: 'result'


100%|██████████| 634/634 [01:21<00:00,  7.81it/s]


In [19]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 633 entries, 0 to 0
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   instrument_name     633 non-null    object 
 1   Option Type         633 non-null    object 
 2   mark_price          633 non-null    float64
 3   underlying_price    633 non-null    float64
 4   mark_iv             633 non-null    float64
 5   greeks.vega         633 non-null    float64
 6   Expiration Date     633 non-null    object 
 7   Strike Price        633 non-null    float64
 8   Time to Expiration  633 non-null    float64
dtypes: float64(6), object(3)
memory usage: 49.5+ KB


In [20]:
data.isnull().sum()

instrument_name       0
Option Type           0
mark_price            0
underlying_price      0
mark_iv               0
greeks.vega           0
Expiration Date       0
Strike Price          0
Time to Expiration    0
dtype: int64

In [17]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 633 entries, 0 to 0
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   instrument_name     633 non-null    object 
 1   Option Type         633 non-null    object 
 2   mark_price          633 non-null    float64
 3   underlying_price    633 non-null    float64
 4   mark_iv             633 non-null    float64
 5   greeks.vega         633 non-null    float64
 6   Expiration Date     633 non-null    object 
 7   Strike Price        633 non-null    float64
 8   Time to Expiration  633 non-null    float64
dtypes: float64(6), object(3)
memory usage: 49.5+ KB


In [18]:
data.to_csv('data/data.csv', index=False)