In [31]:
import pandas as pd
import vectorbtpro as vbt

### Acquiting Forex Data from Dukascopy
For acquiring historical market data from Dukascopy, I used this nodejs package called [`dukascopy-node`](https://github.com/Leo4815162342/dukascopy-node).
<br>The following are the commands I used to download `M1` (1 minute ) data for the following symbols:<br>
```javascript
npx dukascopy-node -i audnzd -p ask -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv
npx dukascopy-node -i audnzd -p bid -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv

npx dukascopy-node -i eurgbp -p ask -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv
npx dukascopy-node -i eurgbp -p bid -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv

npx dukascopy-node -i gbpjpy -p ask -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv
npx dukascopy-node -i gbpjpy -p bid -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv

npx dukascopy-node -i usdjpy -p ask -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv
npx dukascopy-node -i usdjpy -p bid -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv

npx dukascopy-node -i usdcad -p ask -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv
npx dukascopy-node -i usdcad -p bid -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv

npx dukascopy-node -i eurusd -p ask -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv
npx dukascopy-node -i eurusd -p bid -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv

npx dukascopy-node -i audusd -p ask -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv
npx dukascopy-node -i audusd -p bid -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv

npx dukascopy-node -i gbpusd -p ask -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv
npx dukascopy-node -i gbpusd -p bid -from 2019-01-01 to 2022-12-31 -t m1 -v true -f csv
```
The free data `1m` provided by Dukascopy has some missing data and one needs to validate it for data quality auditing with
other preferable paid data sources.

In [32]:
def read_bid_ask_data(ask_file : str, bid_file : str, lowercase_columns = False, set_time_index = False) -> pd.DataFrame:
    """Reads and combines the bid and ask csv files of duksascopy historical market data, into a single OHLCV dataframe."""
    df_ask = pd.read_csv(ask_file, infer_datetime_format = True)
    df_bid = pd.read_csv(bid_file, infer_datetime_format = True)
    df_ask_columns = list(df_ask.columns)
    df_bid_columns = list(df_bid.columns)    
    cols_avg = ["Open", "High", "Low", "Close", "Volume"]     
    cols_avg = cols_avg + ['Timestamp'] if 'timestamp' in df_ask_columns else cols_avg     
    df_ask.columns = df_ask.columns.str.title()
    df_bid.columns = df_bid.columns.str.title()   

    ## Average OHLCV columns for bid and ask data
    df_avg = (df_bid[cols_avg] + df_ask[cols_avg]) / 2.0
    df_avg = df_avg[df_avg["Volume"] > 0.0].reset_index()

    ## Case when we downloaded Dukascopy historical market data from node package: dukascopy-node
    if ('timestamp' in df_ask_columns) or ('timestamp' in df_bid_columns):       
        df_avg['time'] = pd.to_datetime(df_avg['Timestamp'], unit = 'ms')
        df_avg.drop(columns = ["Timestamp"],inplace = True)

    ## Case when we downloaded Dukascopy historical market data from website
    if ("Local time" in df_ask_columns) or ("Local time" in df_bid_columns):
        print(f"Columns in df_avg:{df_avg.columns}")
        df_avg["time"] = df_ask["Local Time"]
        ## Strip ms and GMT TZ in time column
        df_avg["time"] = df_avg["time"].str.replace(r".\d{3} GMT[+-]\d\d\d\d", '', regex = True) 

    if "index" in list(df_avg.columns):
        # print("index column found in dataframe, so dropping them")
        df_avg.drop(labels = "index", axis = 1, inplace = True)

    if lowercase_columns:
        df_avg.columns= df_avg.columns.str.lower()
        
    if set_time_index:
        df_avg["time"] = pd.to_datetime(df_avg["time"],format='%d.%m.%Y %H:%M:%S')
        df_avg = df_avg.set_index("time")      
    return df_avg

In [33]:
### DataFrame Slicing based on nr. of rows on 1m dataframe
def slice_df_by_1m_rows(df : pd.DataFrame, nr_days_to_slice : int):
    """Slice the historical dataframe from most recent to the nr. of days specified"""
    mins_per_day = 24 * 60
    nr_days_to_slice = 365 * mins_per_day
    df = df.iloc[-nr_days_to_slice:].reset_index(drop = True)
    return df

In [37]:
## Specify FileNames of Bid / Ask data downloaded from DukaScopy
bid_ask_files = {
    "GBPUSD" : {"Bid": "gbpusd-m1-bid-2019-01-01-2023-01-13.csv",
                "Ask": "gbpusd-m1-ask-2019-01-01-2023-01-13.csv"},
    "EURUSD" : {"Bid": "eurusd-m1-bid-2019-01-01-2023-01-13.csv",
                "Ask": "eurusd-m1-ask-2019-01-01-2023-01-13.csv"},
    "AUDUSD" : {"Bid": "audusd-m1-bid-2019-01-01-2023-01-13.csv",
                "Ask": "audusd-m1-ask-2019-01-01-2023-01-13.csv"},
    "USDCAD" : {"Bid": "usdcad-m1-bid-2019-01-01-2023-01-13.csv",
                "Ask": "usdcad-m1-ask-2019-01-01-2023-01-13.csv"},
    "USDJPY" : {"Bid": "usdjpy-m1-bid-2019-01-01-2023-01-13.csv",
                "Ask": "usdjpy-m1-ask-2019-01-01-2023-01-13.csv"},
    "GBPJPY" : {"Bid": "gbpjpy-m1-bid-2019-01-01-2023-01-13.csv",
                "Ask": "gbpjpy-m1-ask-2019-01-01-2023-01-13.csv"},
    "EURGBP" : {"Bid": "eurgbp-m1-bid-2019-01-01-2023-01-16.csv",
                "Ask": "eurgbp-m1-ask-2019-01-01-2023-01-16.csv"},
    "GBPAUD" : {"Bid": "gbpaud-m1-bid-2019-01-01-2023-01-16.csv",
                "Ask": "gbpaud-m1-ask-2019-01-01-2023-01-16.csv"}                                                                           
}

In [39]:
## Write everything into one single HDF5 file indexed by keys for the various symbols
folder_path = "/Users/dilip.rajkumar/Documents/Dukascopy_Historical_Data/"
output_file_path = "/Users/dilip.rajkumar/Documents/vbtpro_tuts_private/data/MultiAsset_OHLCV_3Y_m1.h5"
for symbol in bid_ask_files.keys():
    print(f'\n{symbol}')
    ask_csv_file = folder_path + bid_ask_files[symbol]["Ask"]
    bid_csv_file = folder_path + bid_ask_files[symbol]["Bid"]
    print("ASK File PATH:",ask_csv_file,'\nBID File PATH:',bid_csv_file)
    df = read_bid_ask_data(ask_csv_file, bid_csv_file, set_time_index = True)
    df.to_hdf(output_file_path, key=symbol)


GBPUSD
ASK File PATH: /Users/dilip.rajkumar/Documents/Dukascopy_Historical_Data/gbpusd-m1-ask-2019-01-01-2023-01-13.csv 
BID File PATH: /Users/dilip.rajkumar/Documents/Dukascopy_Historical_Data/gbpusd-m1-bid-2019-01-01-2023-01-13.csv

EURUSD
ASK File PATH: /Users/dilip.rajkumar/Documents/Dukascopy_Historical_Data/eurusd-m1-ask-2019-01-01-2023-01-13.csv 
BID File PATH: /Users/dilip.rajkumar/Documents/Dukascopy_Historical_Data/eurusd-m1-bid-2019-01-01-2023-01-13.csv

AUDUSD
ASK File PATH: /Users/dilip.rajkumar/Documents/Dukascopy_Historical_Data/audusd-m1-ask-2019-01-01-2023-01-13.csv 
BID File PATH: /Users/dilip.rajkumar/Documents/Dukascopy_Historical_Data/audusd-m1-bid-2019-01-01-2023-01-13.csv

USDCAD
ASK File PATH: /Users/dilip.rajkumar/Documents/Dukascopy_Historical_Data/usdcad-m1-ask-2019-01-01-2023-01-13.csv 
BID File PATH: /Users/dilip.rajkumar/Documents/Dukascopy_Historical_Data/usdcad-m1-bid-2019-01-01-2023-01-13.csv

USDJPY
ASK File PATH: /Users/dilip.rajkumar/Documents/Dukas

### Acquiring Crypto Data

In [40]:
## Acquire multi-asset 1m crypto data from Binance using vbt Wrapper

data = vbt.BinanceData.fetch(
    ["BTCUSDT", "ETHUSDT", "BNBUSDT", "XRPUSDT", "ADAUSDT"], 
    start="2019-01-01 UTC", 
    end="2022-12-01 UTC",
    timeframe="1m"
    )

## Save acquired data locally for persistance
data.to_hdf("/Users/dilip.rajkumar/Documents/vbtpro_tuts_private/data/Binance_MultiAsset_OHLCV_3Y_m1.h5")