# Download Historical Data

In [1]:
import pfeed as pe

pe.__version__

'0.0.2'

In [3]:
bybit_feed = pe.BybitFeed(data_tool='polars', use_ray=True, use_deltalake=True)

## Download Historical Data from Bybit

> if MinIO is not running, you can start it by running `pfeed docker-compose up -d minio` for convenience.

In [4]:
bybit_feed.download(
    product='BTC_USDT_PERP',
    resolution='1tick',
    start_date='2025-01-01',
    end_date='2025-01-02',
    to_storage='minio'
)

2025-02-03 14:44:39,243	INFO worker.py:1841 -- Started a local Ray instance.
Running BYBIT dataflows:   0%|[38;2;191;97;51m          [0m| 0/1 [00:00<?, ?it/s][1;91m2025-02-03T14:44:50+0800.123 | ERROR | bybit_data | Failed to write data (type=<class 'polars.lazyframe.frame.LazyFrame'>) to MINIO
Traceback (most recent call last):
  File "/Users/stephenyau/pfund.ai/pfeed/pfeed/storages/base_storage.py", line 140, in write_data
    self.data_handler.write(data)
  File "/Users/stephenyau/pfund.ai/pfeed/pfeed/data_handlers/market_data_handler.py", line 68, in write
    self._io.write(
  File "/Users/stephenyau/pfund.ai/pfeed/pfeed/io/tabular_io.py", line 34, in write
    dl.write_deltalake(file_path_without_filename, data, mode='overwrite', storage_options=self._storage_options)
  File "/Users/stephenyau/micromamba/envs/pfeed/lib/python3.11/site-packages/deltalake/writer.py", line 298, in write_deltalake
    table, table_uri = try_get_table_and_table_uri(table_or_uri, storage_options)
  

```{note}
1. You can also download data in different ways:
- pe.download(data_source='bybit', ...)
- pe.BybitFeed().download(...)

2. If you do not need the raw data, you can download the resampled data directly by specifying the `dtypes` argument. e.g. `dtypes=['minute']` will download 1-minute data.
```

Now you can get your downloaded raw data locally by creating a Feed object:
> if the data has not been downloaded, it will be downloaded on the fly automatically.

In [3]:
raw_df = bybit_feed.get_historical_data(
    product='BTC_USDT_PERP',
    resolution='raw',
    start_date='2024-03-01',
    end_date='2024-03-02',
)
raw_df.head(3)

Unnamed: 0,ts,symbol,side,volume,price,tickDirection,trdMatchID,grossValue,homeNotional,foreignNotional
0,2024-03-01 00:00:00.097599983,BTCUSDT,1,0.003,61184.1,ZeroMinusTick,79ac9a21-0249-5985-b042-906ec7604794,18355230000.0,0.003,183.5523
1,2024-03-01 00:00:00.098299980,BTCUSDT,1,0.078,61184.9,PlusTick,2af4e516-8ff4-5955-bb9c-38aa385b7b44,477242200000.0,0.078,4772.4222
2,2024-03-01 00:00:00.098299980,BTCUSDT,1,0.07,61185.2,PlusTick,e4ce9cf1-c32b-5a1a-a211-1a16ea160d67,428296400000.0,0.07,4282.964


## Resample Data

You can resample the downloaded raw data to other resolutions by specifying the `resolution` argument, e.g. 1m or 1minute for 1-minute data.

In [4]:
minute_df = bybit_feed.get_historical_data(
    product='BTC_USDT_PERP',
    resolution='1m',
    start_date='2024-03-01',
    end_date='2024-03-02',
)
minute_df.head(3)

Unnamed: 0,ts,product,resolution,open,high,low,close,volume
0,2024-03-01 00:00:00,BTC_USDT_PERP,1m,61184.1,61244.5,61175.8,61244.5,159.142
1,2024-03-01 00:01:00,BTC_USDT_PERP,1m,61245.3,61276.5,61200.7,61232.2,227.242
2,2024-03-01 00:02:00,BTC_USDT_PERP,1m,61232.2,61249.0,61180.0,61184.2,91.446


```{hint}
If Ray appears to be running **sequentially rather than in parallel**, it may be due to **insufficient network bandwidth** for parallel downloads.
```