In [119]:
import requests
import json
import pandas as pd
from datetime import datetime, timedelta
from tqdm.auto import tqdm

# Base URL for historical data
BASE_URL = "https://history.deribit.com/api/v2/public"


# Finding all instruments

In [114]:
def get_instruments(currency, type):
    url = f"{BASE_URL}/get_instruments"
    params = {
        "currency": currency,
        "kind": type  # Options: future, option, future_combo, option_combo
    }
    response = requests.get(url, params=params)
    data = response.json()
    status = response.status_code
    return data, status

In [115]:
# Get all instruments
currencies = ["BTC", "ETH"]
# kinds = ["future", "option", "spot", "future_combo", "option_combo"] spot always return error 400, and we will focus on futures and options
types = ["future", "option"]

for currency in currencies:
    for type in types:
        results, status = get_instruments(currency, type)

        print(f"{currency} {type} request returns status {status}")
        if 'result' in results:
            print(f"{len(results['result'])} instruments")


BTC future request returns status 200
8 instruments
BTC option request returns status 200
777 instruments
ETH future request returns status 200
8 instruments
ETH option request returns status 200
844 instruments


In [150]:
results, status = get_instruments('BTC', 'future')

earliest_expiry = None
latest_expiry = None
print(f"{len(results['result'])} results")
for instrument in results['result']:
    print(instrument['instrument_name'])
    print("test")
    expiry = instrument['expiration_timestamp']
    if earliest_expiry is None or expiry < earliest_expiry:
        earliest_expiry = expiry
    if latest_expiry is None or expiry > latest_expiry:
        latest_expiry = expiry
# Convert timestampts to dates and print
print(f"Earliest expiry: {datetime.fromtimestamp(earliest_expiry / 1000)}")
print(f"Latest expiry: {datetime.fromtimestamp(latest_expiry / 1000)}")


8 results
BTC-24OCT25
test
BTC-31OCT25
test
BTC-28NOV25
test
BTC-26DEC25
test
BTC-27MAR26
test
BTC-26JUN26
test
BTC-25SEP26
test
BTC-PERPETUAL
test
Earliest expiry: 2025-10-24 09:00:00
Latest expiry: 3000-01-01 08:00:00


Here we see an issue - the get_instruments endpoint does not return all instruments, only instruments which are currently traded, although we have tested and 'get_last_trades_by_instrument_and_time' endpoint does work for historically traded instruments.

There are two methods we could use to remedy this:
1. Test a range of dates and strike prices with get_last_trades_by_instrument_and_time to create a list of instruments which were traded historically
2. Use get_last_trades_by_currency_and_time endpoint to get all trades for BTC/ETH and infer from hat a list of traded instruments

The second method is preferable

In [117]:
def get_trades(currency, type, start_timestamp=None, end_timestamp=None, sorting=None):
    url = f"{BASE_URL}/get_last_trades_by_currency"
    params = {
        "currency": currency,
        "kind": type,
        **({"start_timestamp": start_timestamp} if start_timestamp else {}),
        **({"end_timestamp": end_timestamp} if end_timestamp else {}),
        **({"sorting": sorting} if sorting else {}),
        "count": 10000
    }
    response = requests.get(url, params=params)
    data = response.json()
    status = response.status_code
    return data, status

In [146]:
start_time = int(datetime(2025, 1, 1).timestamp() * 1000)
end_time = int(datetime(2025, 1, 3).timestamp() * 1000)
data, status = get_trades("BTC", "option", start_timestamp=start_time, end_timestamp=end_time, sorting="asc")
print(f"status: {status}")
print(f"{len(data['result']['trades'])} trades")
print(f"date of first trade: {datetime.fromtimestamp(data['result']['trades'][0]['timestamp']/1000)}")
print(f"date of last trade: {datetime.fromtimestamp(data['result']['trades'][-1]['timestamp']/1000)}")

for trade in data['result']['trades'][0:100:10]:
    print(trade)


status: 200
10000 trades
date of first trade: 2025-01-01 00:00:00.017000
date of last trade: 2025-01-02 08:04:18.517000
{'trade_seq': 738, 'trade_id': '338110691', 'timestamp': 1735689600017, 'tick_direction': 0, 'price': 0.004, 'mark_price': 0.00376973, 'iv': 71.19, 'instrument_name': 'BTC-31JAN25-135000-C', 'index_price': 93369.56, 'direction': 'buy', 'contracts': 0.1, 'amount': 0.1}
{'trade_seq': 372, 'trade_id': '338110740', 'timestamp': 1735689642010, 'tick_direction': 1, 'price': 0.012, 'mark_price': 0.01163879, 'iv': 55.44, 'instrument_name': 'BTC-10JAN25-88000-P', 'index_price': 93374.45, 'direction': 'buy', 'contracts': 2.9, 'amount': 2.9}
{'trade_seq': 1026, 'trade_id': '338110819', 'timestamp': 1735689720337, 'tick_direction': 2, 'price': 0.0185, 'mark_price': 0.01922577, 'iv': 50.93, 'instrument_name': 'BTC-3JAN25-94000-P', 'index_price': 93430.09, 'direction': 'sell', 'contracts': 0.1, 'amount': 0.1}
{'trade_seq': 429, 'trade_id': '338110933', 'timestamp': 1735689783404, '

Let's make a function which includes pagination, and returns a df of trades:

In [120]:
def get_trades_df(currency, type, start_timestamp, end_timestamp=None):
    url = f"{BASE_URL}/get_last_trades_by_currency"
    params = {
        "currency": currency,
        "kind": type,
        "start_timestamp": start_timestamp,
        **({"end_timestamp": end_timestamp} if end_timestamp else {}),
        "sorting": "asc",
        "count": 10000  
    }
    response = requests.get(url, params=params)
    fetch_count = 1
    
    if response.status_code != 200:
        print(f"Error: {response.status_code}")
        return None
    
    df = pd.DataFrame(response.json()['result']['trades'])
    
    # Use current time if no end_timestamp provided
    effective_end_timestamp = end_timestamp if end_timestamp else int(datetime.now().timestamp() * 1000)
    time_range = effective_end_timestamp - start_timestamp
    
    # Progress bar based on timestamp
    with tqdm(total=time_range, desc=f"Fetching {currency} {type} trades", unit_scale=True) as pbar:
        pbar.update(0)
        
        while response.json()['result']['has_more']:
            current_timestamp = response.json()['result']['trades'][-1]['timestamp']
            progress = current_timestamp - start_timestamp
            pbar.update(progress - pbar.n)
            
            params['start_timestamp'] = current_timestamp
            response = requests.get(url, params=params)
            fetch_count += 1
            df = pd.concat([df, pd.DataFrame(response.json()['result']['trades'])])
        
        # Complete the progress bar
        pbar.update(time_range - pbar.n)
    
    # Drop duplicates and report statistics
    original_count = len(df)
    df = df.drop_duplicates(subset=['trade_id'])
    duplicates_dropped = original_count - len(df)
    
    print(f"✓ Completed {fetch_count} API fetches")
    print(f"✓ Dropped {duplicates_dropped} duplicate trade(s)")
    
    return df

In [149]:
start_time = int(datetime(2025, 1, 1).timestamp() * 1000)
end_time = int(datetime(2025, 1, 3).timestamp() * 1000)
df = get_trades_df("BTC", "option", start_timestamp=start_time, end_timestamp=end_time)
print(f"{len(df)} trades")
print(f"date of first trade: {datetime.fromtimestamp(df['timestamp'].iloc[0]/1000)}")
print(f"date of last trade: {datetime.fromtimestamp(df['timestamp'].iloc[-1]/1000)}")

print(f"\nUnique instruments ({df['instrument_name'].nunique()}):")
for instrument in sorted(df['instrument_name'].unique()):
    print(f"  - {instrument}")

Fetching BTC option trades:   0%|          | 0.00/173M [00:00<?, ?it/s]

✓ Completed 2 API fetches
✓ Dropped 1 duplicate trade(s)
19844 trades
date of first trade: 2025-01-01 00:00:00.017000
date of last trade: 2025-01-02 23:59:57.072000

Unique instruments (634):
  - BTC-10JAN25-100000-C
  - BTC-10JAN25-100000-P
  - BTC-10JAN25-101000-C
  - BTC-10JAN25-101000-P
  - BTC-10JAN25-102000-C
  - BTC-10JAN25-102000-P
  - BTC-10JAN25-103000-C
  - BTC-10JAN25-103000-P
  - BTC-10JAN25-104000-C
  - BTC-10JAN25-105000-C
  - BTC-10JAN25-105000-P
  - BTC-10JAN25-106000-C
  - BTC-10JAN25-107000-C
  - BTC-10JAN25-108000-C
  - BTC-10JAN25-109000-C
  - BTC-10JAN25-110000-C
  - BTC-10JAN25-111000-C
  - BTC-10JAN25-112000-C
  - BTC-10JAN25-114000-C
  - BTC-10JAN25-115000-C
  - BTC-10JAN25-120000-C
  - BTC-10JAN25-125000-C
  - BTC-10JAN25-125000-P
  - BTC-10JAN25-130000-C
  - BTC-10JAN25-135000-C
  - BTC-10JAN25-140000-C
  - BTC-10JAN25-75000-P
  - BTC-10JAN25-80000-C
  - BTC-10JAN25-80000-P
  - BTC-10JAN25-85000-P
  - BTC-10JAN25-86000-C
  - BTC-10JAN25-86000-P
  - BTC-10JAN2

Now let's mock up some functionality for extracting strike dates and prices:

In [135]:
# Extract date and strike price from instrument names
instrument_info = df['instrument_name'].str.extract(r'([A-Z]+)-(\d+[A-Z]+\d+)-(\d+)-([CP])')
instrument_info.columns = ['currency', 'expiry_date', 'strike', 'option_type']

# Get unique date and strike combinations
unique_combos = instrument_info[['expiry_date', 'strike']].drop_duplicates().sort_values(['expiry_date', 'strike'])

print(f"\nUnique expiry dates: {instrument_info['expiry_date'].nunique()}")
print(f"Date range: {unique_combos['expiry_date'].min()} to {unique_combos['expiry_date'].max()}")
print(f"\nUnique strike prices: {instrument_info['strike'].nunique()}")
print(f"Strike range: {unique_combos['strike'].min()} to {unique_combos['strike'].max()}")

# Display grouped by expiry date
print("\n" + "="*50)
print("Strikes by Expiry Date:")
print("="*50)
for date in sorted(unique_combos['expiry_date'].unique()):
    strikes = unique_combos[unique_combos['expiry_date'] == date]['strike'].astype(int).tolist()
    print(f"\n{date}: {len(strikes)} strikes")
    print(f"  Range: ${min(strikes):,} - ${max(strikes):,}")
    # Optionally show all strikes for that date (commented out to avoid clutter)
    # print(f"  Strikes: {', '.join([f'${s:,}' for s in strikes])}")


Unique expiry dates: 14
Date range: 10JAN25 to 7JAN25

Unique strike prices: 94
Strike range: 100000 to 99500

Strikes by Expiry Date:

10JAN25: 34 strikes
  Range: $75,000 - $130,000

17JAN25: 30 strikes
  Range: $75,000 - $135,000

24JAN25: 27 strikes
  Range: $80,000 - $135,000

26DEC25: 22 strikes
  Range: $30,000 - $400,000

26SEP25: 28 strikes
  Range: $20,000 - $400,000

27JUN25: 34 strikes
  Range: $20,000 - $400,000

28FEB25: 36 strikes
  Range: $60,000 - $200,000

28MAR25: 31 strikes
  Range: $20,000 - $300,000

31JAN25: 69 strikes
  Range: $25,000 - $200,000

3JAN25: 20 strikes
  Range: $90,000 - $118,000

4JAN25: 26 strikes
  Range: $82,000 - $108,000

5JAN25: 25 strikes
  Range: $84,000 - $110,000

6JAN25: 20 strikes
  Range: $86,000 - $112,000

7JAN25: 20 strikes
  Range: $86,000 - $110,000


In [143]:
def get_trades_by_instrument(instrument_name, start_timestamp, end_timestamp):
    url = f"{BASE_URL}/get_last_trades_by_instrument_and_time"
    params = {
        "instrument_name": instrument_name,
        "start_timestamp": start_timestamp,
        "end_timestamp": end_timestamp,
        "sorting": "asc",
        "count": 10000
    }
    response = requests.get(url, params=params)
    fetch_count = 1
    
    if response.status_code != 200:
        print(f"Error: {response.status_code}")
        return None
    
    df = pd.DataFrame(response.json()['result']['trades'])
    
    # Use current time if no end_timestamp provided
    effective_end_timestamp = end_timestamp if end_timestamp else int(datetime.now().timestamp() * 1000)
    time_range = effective_end_timestamp - start_timestamp
    
    # Progress bar based on timestamp
    with tqdm(total=time_range, desc=f"Fetching {instrument_name} trades", unit_scale=True) as pbar:
        pbar.update(0)
        
        while response.json()['result']['has_more']:
            current_timestamp = response.json()['result']['trades'][-1]['timestamp']
            progress = current_timestamp - start_timestamp
            pbar.update(progress - pbar.n)
            
            params['start_timestamp'] = current_timestamp
            response = requests.get(url, params=params)
            fetch_count += 1
            df = pd.concat([df, pd.DataFrame(response.json()['result']['trades'])])
        
        # Complete the progress bar
        pbar.update(time_range - pbar.n)
    
    # Drop duplicates and report statistics
    original_count = len(df)
    df = df.drop_duplicates(subset=['trade_id'])
    duplicates_dropped = original_count - len(df)
    
    print(f"✓ Completed {fetch_count} API fetches")
    print(f"✓ Dropped {duplicates_dropped} duplicate trade(s)")
    
    return df

In [145]:
start_time = int(datetime(2025, 1, 1).timestamp() * 1000)
end_time = int(datetime(2025, 1, 5).timestamp() * 1000)
df = get_trades_by_instrument("BTC-PERPETUAL", start_timestamp=start_time, end_timestamp=end_time)
print(f"{len(df)} trades")
print(f"date of first trade: {datetime.fromtimestamp(df['timestamp'].iloc[0]/1000)}")
print(f"date of last trade: {datetime.fromtimestamp(df['timestamp'].iloc[-1]/1000)}")
print(df.head())
print(df.info())
print(df.describe())

Fetching BTC-PERPETUAL trades:   0%|          | 0.00/346M [00:00<?, ?it/s]

✓ Completed 32 API fetches
✓ Dropped 92 duplicate trade(s)
313825 trades
date of first trade: 2025-01-01 00:00:01.635000
date of last trade: 2025-01-04 23:59:58.072000
   trade_seq   trade_id      timestamp  tick_direction    price  mark_price  \
0  230579801  338110696  1735689601635               2  93426.0    93434.62   
1  230579802  338110697  1735689601635               2  93423.0    93434.62   
2  230579797  338110692  1735689601635               2  93445.5    93434.62   
3  230579798  338110693  1735689601635               2  93444.5    93434.62   
4  230579799  338110694  1735689601635               2  93442.5    93434.62   

  instrument_name  index_price direction  contracts  amount combo_trade_id  \
0   BTC-PERPETUAL     93369.38      sell      351.0  3510.0            NaN   
1   BTC-PERPETUAL     93369.38      sell      170.0  1700.0            NaN   
2   BTC-PERPETUAL     93369.38      sell      132.0  1320.0            NaN   
3   BTC-PERPETUAL     93369.38      sell     