In [None]:
import json
import requests
import pandas as pd
from datetime import datetime as dt
from datetime import date, timedelta

# Define helper functions for converting between datetimes and timestamps
def datetime_to_timestamp(datetime_obj): 
    return int(dt.timestamp(datetime_obj)*1000)

def timestamp_to_datetime(timestamp): 
    return dt.fromtimestamp(timestamp/1000)

def date_to_timestamp(date_obj): 
    return datetime_to_timestamp(dt.combine(date_obj, dt.min.time()))

# Initialize a list to hold option trade data
option_list = []

# Define query parameters for the API endpoint
params = {
    "currency": "ETH", 
    "kind": "option",
    "count": 10000,
    "include_old": True,
    "start_id": 0
}
url = 'https://history.deribit.com/api/v2/public/get_last_trades_by_currency'

# Use a session object to make requests to the API endpoint in a loop, paging through results until all data has been retrieved
with requests.Session() as session:
    while True:
        response = session.get(url, params=params)
        response_data = response.json()
        option_list.extend(response_data["result"]["trades"])
        if not response_data["result"]["has_more"]:
            break
        params["start_id"] = response_data["result"]["trades"][-1]["trade_id"]

# Create a pandas dataframe from the option trade data
option_data = pd.DataFrame(option_list)

# Select relevant columns from the dataframe
option_data = option_data[["timestamp", "price", "instrument_name", "index_price", "direction", "amount", 'iv']]

# Parse the instrument_name column to extract additional features
option_data["currency"] = option_data["instrument_name"].str.split("-").str[0]
option_data["maturity_date"] = pd.to_datetime(option_data["instrument_name"].str.split("-").str[1], format="%d%b%y", errors='coerce')
option_data["strike_price"] = option_data["instrument_name"].str.split("-").str[2].astype(int)
option_data["moneyness"] = round(option_data["index_price"]/option_data["strike_price"],3)
option_data["option_type"] = option_data["instrument_name"].str.split("-").str[3].str.lower()
option_data["option_price"] = round(option_data["price"]*option_data["index_price"], 2)

# Convert timestamps to datetimes and calculate time to maturity
option_data["date_time"] = option_data["timestamp"].apply(timestamp_to_datetime)
option_data["time_to_maturity (days)"] = option_data['maturity_date'] - option_data["date_time"]
option_data["time_to_maturity (days)"] = option_data["time_to_maturity (days)"].apply(lambda x: max(round(x.total_seconds()/31536000,3),1e-04))*365

# Convert iv to decimal form and drop duplicates
option_data["iv"] = round(option_data["iv"]/100, 3)
option_data = option_data.drop_duplicates().reset_index(drop=True)

# Reorder columns and select final subset of features
option_data = option_data[["date_time", "instrument_name", "option_price", "direction", "option_type", "amount", "maturity_date", "strike_price", "index_price", "iv", "moneyness", "time_to_maturity (days)"]]