# Download Historical Data

In [1]:
import pfeed as pe

pe.__version__

'0.0.2'

In [2]:
# Delta Lake format is enabled when use_deltalake=True
bybit_feed = pe.BybitFeed(data_tool='polars', use_ray=True, use_deltalake=True)
yfinance_feed = pe.YahooFinanceFeed(data_tool='dask', use_ray=False, use_deltalake=False)

## Download Historical Data from Bybit

> if MinIO is not running, you can start it by running `pfeed docker-compose up -d minio`.

In [3]:
polars_lf = bybit_feed.download(
    product='BTC_USDT_PERP',
    resolution='1tick',
    start_date='2025-01-01',
    end_date='2025-01-02',
    to_storage='minio'
)

2025-02-04 18:13:11,171	INFO worker.py:1841 -- Started a local Ray instance.
Running BYBIT dataflows:   0%|[38;2;191;97;51m          [0m| 0/1 [00:00<?, ?it/s]2025-02-04T18:13:22+0800.105 | INFO | bybit_data | loaded BYBIT:2025-01-01:CRYPTO:BYBIT:BTC_USDT_PERP:1_TICK data to MINIO | dataflow.py fn:_load ln:149
2025-02-04T18:13:23+0800.385 | INFO | bybit_data | loaded BYBIT:2025-01-02:CRYPTO:BYBIT:BTC_USDT_PERP:1_TICK data to MINIO | dataflow.py fn:_load ln:149
Running BYBIT dataflows: 100%|[38;2;191;97;51m██████████[0m| 1/1 [00:12<00:00, 12.81s/it]


In [4]:
polars_lf.collect().head(1)

ts,resolution,product,symbol,side,volume,price,tickDirection,trdMatchID,grossValue,homeNotional,foreignNotional
datetime[ns],str,str,str,i64,f64,f64,str,str,f64,f64,f64
2025-01-01 00:00:00.097400,"""1t""","""BTC_USDT_PERP""","""BTCUSDT""",-1,0.003,93530.0,"""ZeroMinusTick""","""e807b277-6d22-5f60-a520-768350…",28059000000.0,0.003,280.59


## Download Historical Data from Yahoo Finance

You can use `pfeed` to download and store yahoo finance data, which is not supported by `yfinance`.

In [5]:
dask_df = yfinance_feed.download(
    product='TSLA_USD_STK',  # STK = stock
    # NOTE: minute data is only available for the past 8 days
    resolution='1m',  # 1 minute data
    rollback_period='max',
    to_storage='local'
)

Running YAHOO_FINANCE dataflows:   0%|[38;2;103;159;109m          [0m| 0/1 [00:00<?, ?it/s]2025-02-04T18:13:27+0800.578 | INFO | yahoo_finance_data | loaded YAHOO_FINANCE:(from)2025-01-27:(to)2025-02-03:YAHOO_FINANCE:TSLA_USD_STK:1_MINUTE data to LOCAL | dataflow.py fn:_load ln:149
Running YAHOO_FINANCE dataflows: 100%|[38;2;103;159;109m██████████[0m| 1/1 [00:01<00:00,  1.49s/it]


In [6]:
dask_df.compute().head(1)

Unnamed: 0,ts,resolution,product,symbol,open,high,low,close,volume,dividends,splits
0,2025-01-27 14:30:00,1m,TSLA_USD_STK,TSLA,394.532013,397.089996,393.53009,396.513306,6467196.0,0.0,0.0


```{hint}
If Ray appears to be running **sequentially rather than in parallel**, it may be due to **insufficient network bandwidth** for parallel downloads.
```