# Dukascopy からデータ取得

[dukascopy-node](https://github.com/Leo4815162342/dukascopy-node) のインストールが必要  
```
npm install  -g dukascopy-node
```

In [1]:
import numpy as np
import pandas as pd
import datetime
import subprocess
import os
import glob

In [2]:
def execute_command(cmd):
    # print(f"Execute command [{cmd}]")
    result = subprocess.run(cmd, shell=True, text=True, capture_output=True)
    if result.returncode == 0:
        # print(result.stdout)
        pass
    else:
        raise RuntimeError(result.stderr)

In [14]:
# symbol = "usdjpy"
symbols = ["usdjpy", "eurusd"]
price_types = ["bid", "ask"]

DATA_DIRECTORY = "./raw"

In [4]:
os.makedirs(DATA_DIRECTORY, exist_ok=True)

## 既存の最新ファイルを削除

最新ファイルは不完全なデータで作成された可能性が高いため、削除して作り直す。

In [28]:
for symbol in symbols:
    for price_type in price_types:
        data_file_paths = glob.glob(f"{DATA_DIRECTORY}/{symbol}-m1-{price_type}-*.csv")
        if len(data_file_paths) > 0:
            latest_file_path = max(data_file_paths)
            print(f"Delete {latest_file_path}")
            os.remove(latest_file_path)

Delete ./raw/usdjpy-m1-bid-2022-07-01-2022-07-02.csv
Delete ./raw/usdjpy-m1-ask-2022-07-01-2022-07-02.csv
Delete ./raw/eurusd-m1-bid-2022-07-01-2022-07-02.csv
Delete ./raw/eurusd-m1-ask-2022-07-01-2022-07-02.csv


## データをダウンロード

In [29]:
# Dukascopy が提供しているデータの開始日時
forex_earliest_datetime = {
    "usdjpy": "2003-05-04T21:00:00+00:00",
    "eurusd": "2003-05-04T21:00:00+00:00",
}

In [30]:
for symbol in symbols:
    earliest_datetime = forex_earliest_datetime[symbol]
    data_collect_start = datetime.datetime.fromisoformat(earliest_datetime)
    end = datetime.datetime.now(tz=datetime.timezone.utc)

    # 新しいデータから1ヶ月ごとに取得していく
    while True:
        month_start = datetime.datetime(end.year, end.month, day=1, hour=0, minute=0, second=0, tzinfo=datetime.timezone.utc)
        start = max(month_start, data_collect_start)

        start_date = start.strftime('%Y-%m-%d')
        end_date = end.strftime('%Y-%m-%d')
        print(f"{symbol}: {start_date} to {end_date}")

        for price_type in price_types:
            file_path = os.path.join(DATA_DIRECTORY, f"{symbol}-m1-{price_type}-{start_date}-{end_date}.csv")
            if not os.path.exists(file_path):
                execute_command((
                    f"npx dukascopy-node -i {symbol} "
                    f"-from {start.isoformat()} -to {end.isoformat()} "
                    f"-t m1 -f csv -p {price_type} -dir {DATA_DIRECTORY}"
                ))

        if start == data_collect_start:
            break

        end = start - datetime.timedelta(seconds=1)

usdjpy: 2022-08-01 to 2022-08-12
usdjpy: 2022-07-01 to 2022-07-31
usdjpy: 2022-06-01 to 2022-06-30
usdjpy: 2022-05-01 to 2022-05-31
usdjpy: 2022-04-01 to 2022-04-30
usdjpy: 2022-03-01 to 2022-03-31
usdjpy: 2022-02-01 to 2022-02-28
usdjpy: 2022-01-01 to 2022-01-31
usdjpy: 2021-12-01 to 2021-12-31
usdjpy: 2021-11-01 to 2021-11-30
usdjpy: 2021-10-01 to 2021-10-31
usdjpy: 2021-09-01 to 2021-09-30
usdjpy: 2021-08-01 to 2021-08-31
usdjpy: 2021-07-01 to 2021-07-31
usdjpy: 2021-06-01 to 2021-06-30
usdjpy: 2021-05-01 to 2021-05-31
usdjpy: 2021-04-01 to 2021-04-30
usdjpy: 2021-03-01 to 2021-03-31
usdjpy: 2021-02-01 to 2021-02-28
usdjpy: 2021-01-01 to 2021-01-31
usdjpy: 2020-12-01 to 2020-12-31
usdjpy: 2020-11-01 to 2020-11-30
usdjpy: 2020-10-01 to 2020-10-31
usdjpy: 2020-09-01 to 2020-09-30
usdjpy: 2020-08-01 to 2020-08-31
usdjpy: 2020-07-01 to 2020-07-31
usdjpy: 2020-06-01 to 2020-06-30
usdjpy: 2020-05-01 to 2020-05-31
usdjpy: 2020-04-01 to 2020-04-30
usdjpy: 2020-03-01 to 2020-03-31
usdjpy: 20