In [1]:
import warnings
import pandas as pd
from IPython.display import Markdown, display
from openbb import obb

In [None]:
'''
HDF5 (Hierarchical Data Format): An open-source file format designed for handling large, complex datasets in a hierarchical structure.

Development: Created at the U.S. National Center for Supercomputing Applications.

Structure:

Uses a directory-like system for data organization.
Contains two main object types:
Datasets: Typed, multidimensional arrays.
Groups: Container structures for datasets and other groups.
Python Libraries:

h5py: Provides high- and low-level access to HDF5 data structures.
PyTables: A higher-level interface offering advanced indexing and querying capabilities.
pandas: Uses PyTables to read and write data to HDF5 files.
Advantages:

Efficiently manages large, hierarchical datasets.
Well-suited for high-performance, data-intensive applications.
Use Cases:

Ideal for storing related data in hierarchies, such as:
Fundamental stock data.
Futures expirations.
Options chains.
'''


In [3]:
warnings.filterwarnings("ignore")
obb.user.preferences.output_type = "dataframe"
STOCKS_DATA_STORE = "stocks.h5"
FUTURES_DATA_STORE = "futures.h5"
ticker = "SPY"
root = "ES"

In [4]:
spy_equity = obb.equity.price.historical(
    ticker, start_date="2021-01-01", provider="yfinance"
)

In [5]:
spy_chains = obb.derivatives.options.chains(ticker, provider="cboe")


In [6]:
spy_expirations = spy_chains.expiration.astype(str).unique().tolist()


In [7]:
spy_historic = obb.equity.price.historical(
    ticker + spy_expirations[-10].replace("-", "")[2:] + "C" + "00400000",
    start_date="2021-01-01",
    provider="yfinance",
)

In [8]:
with pd.HDFStore(STOCKS_DATA_STORE) as store:
    store.put("equities/spy/stock_prices", spy_equity)
    store.put("equities/spy/options_prices", spy_historic)
    store.put("equities/spy/chains", spy_chains)

In [9]:
with pd.HDFStore(FUTURES_DATA_STORE) as store:
    for i in range(24, 31):
        expiry = f"20{i}-12"
        df = obb.derivatives.futures.historical(
            symbol=[root],
            expiry=expiry,
            start_date="2021-01-01",
        )
        df.rename(columns={"close": expiry}, inplace=True)
        prices = df[expiry]

        store.put(f"futures/{root}/{expiry}", prices)

In [10]:
with pd.HDFStore(STOCKS_DATA_STORE) as store:
    spy_prices = store["equities/spy/stock_prices"]
    spy_options = store["equities/spy/options_prices"]
    spy_chains = store["equities/spy/chains"]

In [11]:
with pd.HDFStore(FUTURES_DATA_STORE) as store:
    es_prices = store[f"futures/{root}/2024-12"]

In [12]:
display(spy_prices)
display(spy_options)
display(es_prices)

Unnamed: 0_level_0,open,high,low,close,volume,split_ratio,dividend,capital_gains
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-01-04,375.309998,375.450012,364.820007,368.790009,110210800,0.0,0.0,0.0
2021-01-05,368.100006,372.500000,368.049988,371.329987,66426200,0.0,0.0,0.0
2021-01-06,369.709991,376.980011,369.119995,373.549988,107997700,0.0,0.0,0.0
2021-01-07,376.100006,379.899994,375.910004,379.100006,68766800,0.0,0.0,0.0
2021-01-08,380.589996,381.489990,377.100006,381.260010,71677200,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...
2024-09-04,550.200012,554.429993,549.460022,550.950012,47224900,0.0,0.0,0.0
2024-09-05,550.890015,553.799988,547.099976,549.609985,44264300,0.0,0.0,0.0
2024-09-06,549.940002,551.599976,539.440002,540.359985,68493800,0.0,0.0,0.0
2024-09-09,544.650024,547.710022,542.679993,546.409973,40362400,0.0,0.0,0.0


Unnamed: 0_level_0,open,high,low,close,volume,split_ratio,dividend
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-03-25,141.259995,141.259995,141.259995,141.259995,2,0.0,0.0
2024-04-01,142.5,142.5,142.5,142.5,1,0.0,0.0
2024-04-03,141.360001,141.360001,140.990005,140.990005,2,0.0,0.0
2024-04-05,137.0,137.0,137.0,137.0,2,0.0,0.0
2024-04-25,119.790001,124.099998,119.230003,124.099998,9,0.0,0.0
2024-05-15,145.220001,145.220001,145.220001,145.220001,1,0.0,0.0
2024-05-17,145.350006,145.350006,145.350006,145.350006,3,0.0,0.0
2024-05-21,146.800003,146.800003,146.800003,146.800003,1,0.0,0.0
2024-05-23,142.100006,142.520004,142.100006,142.520004,2,0.0,0.0
2024-05-31,140.5,140.5,140.5,140.5,1,0.0,0.0


date
2000-09-18    1467.50
2000-09-19    1478.50
2000-09-20    1469.50
2000-09-21    1469.50
2000-09-22    1468.50
               ...   
2024-09-04    5530.00
2024-09-05    5512.25
2024-09-06    5419.50
2024-09-09    5479.50
2024-09-10    5475.50
Name: 2024-12, Length: 6056, dtype: float64