In [1]:
import pandas as pd
from datetime import datetime as dt, timedelta
from dateutil import parser
import requests
from constants.definitions import API_KEY, ACCOUNT_ID, OANDA_URL
from infrastructure.exceptions import NullCandles, RequestError, InvalidFunctionArguments

In [2]:
# initialise a session and set headers
session = requests.Session()
session.headers.update({
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json",
})

url = f"{OANDA_URL}/accounts/{ACCOUNT_ID}/instruments"

In [136]:
# get a list of all tradeable instruments for the account
response = session.get(url)
response.raise_for_status()
instruments = response.json()["instruments"] if "instruments" in response.json() else []

In [142]:
instruments_names = [i["name"] for i in instruments]
instruments_names

['USD_CNH',
 'NZD_CAD',
 'EUR_CHF',
 'SGD_CHF',
 'XAU_AUD',
 'WHEAT_USD',
 'EUR_CZK',
 'XAG_USD',
 'NZD_JPY',
 'CHF_HKD',
 'EUR_GBP',
 'US30_USD',
 'EUR_NOK',
 'JP225Y_JPY',
 'USD_CZK',
 'CN50_USD',
 'GBP_NZD',
 'XAU_USD',
 'EUR_SEK',
 'USD_SGD',
 'GBP_PLN',
 'CH20_CHF',
 'CAD_HKD',
 'XAG_CHF',
 'USD_CHF',
 'XAG_HKD',
 'AUD_HKD',
 'ESPIX_EUR',
 'NZD_CHF',
 'AUD_CHF',
 'GBP_CHF',
 'USD_THB',
 'XAU_JPY',
 'XAU_HKD',
 'GBP_CAD',
 'EUR_HKD',
 'CHF_JPY',
 'GBP_HKD',
 'EUR_NZD',
 'XAG_AUD',
 'WTICO_USD',
 'XAG_NZD',
 'AUD_SGD',
 'EUR_JPY',
 'EUR_TRY',
 'USD_JPY',
 'SGD_JPY',
 'GBP_ZAR',
 'XAG_JPY',
 'ZAR_JPY',
 'NZD_SGD',
 'GBP_JPY',
 'USD_TRY',
 'TRY_JPY',
 'XAU_NZD',
 'HK33_HKD',
 'XAG_GBP',
 'EUR_SGD',
 'EUR_AUD',
 'XAU_SGD',
 'USD_CAD',
 'SPX500_USD',
 'USB10Y_USD',
 'EU50_EUR',
 'CAD_CHF',
 'USD_NOK',
 'USB02Y_USD',
 'XCU_USD',
 'AUD_USD',
 'XAG_EUR',
 'AUD_JPY',
 'EUR_ZAR',
 'CHF_ZAR',
 'USD_HKD',
 'CHINAH_HKD',
 'USD_PLN',
 'GBP_AUD',
 'NAS100_USD',
 'XAU_GBP',
 'XAU_XAG',
 'AU200_AUD

In [6]:
instruments_df = pd.DataFrame(instruments)

In [7]:
instruments_df.to_csv("../data/instruments/tradeable_instruments.csv")

In [8]:
instruments_names = instruments_df["name"].tolist()

In [52]:
CANDLE_COUNT = 4000
INCREMENTS = {
    "M5": 5 * CANDLE_COUNT,
    "M1": CANDLE_COUNT,
    "M30": 30 * CANDLE_COUNT,
    "M15": 15 *  CANDLE_COUNT,
    "H1": 60 * CANDLE_COUNT,
    "H4": 60 * 4 * CANDLE_COUNT,
    "D": 60 * 24 * CANDLE_COUNT,
}
granularity_list = list(INCREMENTS.keys())

In [97]:
def calculate_candles_between(from_date: str, to_date: str, granularity: str) -> int:
    """
    Calculate the number of candles between two dates based on the specified granularity.

    :param from_date: Start date in 'YYYY-MM-DDTHH:MM:SSZ' or 'YYYY-MM-DD' format.
    :param to_date: End date in 'YYYY-MM-DDTHH:MM:SSZ' or 'YYYY-MM-DD' format.
    :param granularity: Granularity (timeframe) as a string, e.g., 'M1', 'M5', 'M30', 'H1', 'H2', 'H4', 'D', 'W'.
    :return: Number of candles between the two dates.
    """

    # Convert string dates to datetime objects
    date_format = "%Y-%m-%dT%H:%M:%SZ" if "T" in from_date else "%Y-%m-%d"
    dt1 = dt.strptime(from_date, date_format)
    dt2 = dt.strptime(to_date, date_format)

    if dt1 >= dt2:
        raise ValueError("date1 must be earlier than date2")

    # Match the granularity and calculate the time delta in minutes
    match granularity:
        case "M1":
            delta_minutes = 1
        case "M5":
            delta_minutes = 5
        case "M30":
            delta_minutes = 30
        case "H1":
            delta_minutes = 60
        case "H2":
            delta_minutes = 120
        case "H4":
            delta_minutes = 240
        case "D":
            delta_minutes = 1440  # 24 hours
        case "W":
            delta_minutes = 10080  # 7 days
        case _:
            raise ValueError(f"Unsupported granularity: {granularity}")

    # Calculate the time difference in minutes
    total_minutes = int((dt2 - dt1).total_seconds() / 60)

    # Calculate the number of candles
    candles_count = total_minutes // delta_minutes

    return candles_count

# Example usage
date1 = "2020-01-01T00:00:00Z"
date2 = "2020-01-02T00:00:00Z"
granularity = "M30"

candles = calculate_candles_between(date1, date2, granularity)
print(f"Number of {granularity} candles between {date1} and {date2}: {candles}")


Number of M30 candles between 2020-01-01T00:00:00Z and 2020-01-02T00:00:00Z: 48


In [104]:
def fetch_candles(pair_name: str, start: str | dt, end: str | dt | None = None, granularity: str = 'H1', count: int = 10, price: str = 'MBA', use_count: bool = False) -> list[dict]:
    """
    Fetches candles for a given pair name. Note that this function assumes that 'requests' library is installed and that
    tradeable instruments are available in its scope
    :param pair_name: pair to request, e.g. 'EUR_USD'
    :param start: date to start fetching candles for, e.g. from '2020-01-01'
    :param end: date to end fetching candles for, e.g. from '2020-01-01' 
    :param granularity: e.g. 'M1', 'M5', 'H1', 'H4'
    :param count: Number of candles to fetch from the date specified in 'start'
    :param price: string representation of price, e.g. 'MBA' or 'B' for bid
    :param use_count: whether to use count or end date in 'end'
    :return returns a dict with candles data as per the specification in the API documentation.
    """
    
    # --------------------- parameter checking ---------------------------------------------------------------------
    if pair_name not in instruments_names:  # NOTE: instruments_names is a list, and it is assumed to be available in the scope
        raise InvalidFunctionArguments(function_name="fetch_candles", arguments=pair_name, message="Pair name must be in 'tradeable_instruments' list")
    if granularity not in INCREMENTS:
        raise InvalidFunctionArguments(function_name="fetch_candles", arguments=granularity, message=f"Granularity must be one of {list(INCREMENTS.keys())}")
    try:
        date_format = "%Y-%m-%dT%H:%M:%SZ"
        
        # if type(start) != str:
        #     print("The error occurs here")
        #     start = dt.strptime(start, date_format)
        #     print("The error occurs here..")
        # else:
        #     start = dt.strftime(parser.parse(start), date_format)
        # if type(end) != str:
        #     end = dt.strptime(end, date_format)
        # else:
        #     end = dt.strftime(parser.parse(end), date_format)

        start = dt.strftime(parser.parse(start), date_format)
        end = dt.strftime(parser.parse(end), date_format) if end is not None else None

    except TypeError:
        raise InvalidFunctionArguments(function_name="fetch_candles", arguments=[start, end], message="Start date must be in 'YYYY-MM-DD' or 'yyyy-mm-ddTh:m:s' format")
    
    if price not in ["MBA", "B", "A", "M"]:
        raise InvalidFunctionArguments(function_name="fetch_candles", arguments=price, message="Price must be one of ['MBA', 'B', 'A', 'M']")
    
    if count not in range(1, CANDLE_COUNT + 1):
        raise InvalidFunctionArguments(function_name="fetch_candles", arguments=count, message="Count must be in range(1, 5000)")
    
    # --------------------- end of parameter checking ----------------------------------------------------------------------
    # if we reach here, all should be good with the provided parameters, proceed with the function operation
    
    request_url = f"{OANDA_URL}/instruments/{pair_name}/candles"
    params = { 'granularity': granularity, 'price': price, 'from': start}
    if use_count and end is None:
        params["count"] = count
    else:
        params["to"] = end
    res = session.get(request_url, params=params)
    # sometimes the data can return with success but will not have 'candles' data in it. We will check for this
    if not res.ok:
        raise RequestError(request_obj=res, message=res.text)
    elif not 'candles' in res.json():
        raise NullCandles(res)
    
    print(params)
    return res.json()['candles']

In [98]:
temp_df = pd.DataFrame(fetch_candles(
    pair_name='GBP_JPY',
    start="2024-08-01T00:00:00Z",
    end="2024-08-02T00:00:00Z",
    granularity='M30',
    price='MBA',
    use_count=False
))

{'granularity': 'M30', 'price': 'MBA', 'from': '2024-08-01T00:00:00Z', 'to': '2024-08-02T00:00:00Z'}


In [99]:
temp_df.shape[0]

48

In [96]:
temp_df

Unnamed: 0,complete,volume,time,bid,mid,ask
0,True,28399,2024-07-31T21:00:00.000000000Z,"{'o': '192.680', 'h': '193.235', 'l': '191.958...","{'o': '192.786', 'h': '193.255', 'l': '191.980...","{'o': '192.892', 'h': '193.275', 'l': '192.003..."
1,True,49253,2024-08-01T01:00:00.000000000Z,"{'o': '192.064', 'h': '192.499', 'l': '190.953...","{'o': '192.083', 'h': '192.517', 'l': '190.972...","{'o': '192.102', 'h': '192.535', 'l': '190.989..."
2,True,72019,2024-08-01T05:00:00.000000000Z,"{'o': '192.335', 'h': '192.730', 'l': '190.971...","{'o': '192.354', 'h': '192.750', 'l': '190.991...","{'o': '192.373', 'h': '192.770', 'l': '191.010..."
3,True,62336,2024-08-01T09:00:00.000000000Z,"{'o': '191.293', 'h': '193.245', 'l': '190.998...","{'o': '191.312', 'h': '193.260', 'l': '191.012...","{'o': '191.331', 'h': '193.278', 'l': '191.024..."
4,True,80031,2024-08-01T13:00:00.000000000Z,"{'o': '193.246', 'h': '193.246', 'l': '190.986...","{'o': '193.260', 'h': '193.260', 'l': '191.003...","{'o': '193.274', 'h': '193.275', 'l': '191.020..."
5,True,40788,2024-08-01T17:00:00.000000000Z,"{'o': '191.165', 'h': '191.222', 'l': '190.154...","{'o': '191.181', 'h': '191.240', 'l': '190.178...","{'o': '191.197', 'h': '191.257', 'l': '190.201..."
6,True,38622,2024-08-01T21:00:00.000000000Z,"{'o': '190.072', 'h': '190.296', 'l': '189.513...","{'o': '190.269', 'h': '190.390', 'l': '189.526...","{'o': '190.466', 'h': '190.615', 'l': '189.536..."


In [11]:
dt.strftime(parser.parse("2022-01-01"), "%Y-%m-%dT%H:%M:%SZ")

'2022-01-01T00:00:00Z'

In [22]:
t = "2020-01-01"

datetime.datetime(2020, 1, 1, 0, 0)

In [44]:
def collect_large_candle_data(pair: str, granularity: str, date_from: str, date_to: str, price: str = 'MBA') -> list[dict]:
    """
    Collects large candle data for a given pair and granularity.
    :param pair: pair to request, e.g. 'EUR_USD'
    :param granularity: granularity of candles to fetch
    :param date_from: date from which to start fetching candles for, e.g. from '2020-01-01'
    :param date_to: date to end fetching candles for, e.g. from '2020-01-01'
    :param price: price representation of price, e.g. 'MBA' or 'B' for bid
    :return: list of candles data as per the specification in the API documentation.
    """
    
    # --------------- checking parameters --------------------
    start_date: dt
    end_date: dt
    if pair not in instruments_names:  # NOTE: instruments_names is a list, and it is assumed to be available in the scope
        raise InvalidFunctionArguments(function_name="collect_large_candle_data", arguments=pair, message="Pair must be in 'tradeable_instruments' list")
    if granularity not in INCREMENTS:
        raise InvalidFunctionArguments(function_name="collect_large_candle_data", arguments=granularity, message=f"Granularity must be one of {list(INCREMENTS.keys())}")
    try:
        start_date = parser.parse(date_from)
        end_date = parser.parse(date_to)
    except TypeError:
        raise InvalidFunctionArguments(function_name="collect_large_candle_data", arguments=[date_from, date_to], message="dates must be in 'YYYY-MM-DD' or 'yyyy-mm-ddTh:m:s' format")
    if price not in ["MBA", "B", "A", "M"]:
        raise InvalidFunctionArguments(function_name="collect_large_candle_data", arguments=price, message="Price must be one of ['MBA', 'B', 'A', 'M']")
    
    
    candles: list = []
    # TODO: the data collected maybe very large and my consume the memory, update the function to workaround this
    date_format = "%Y-%m-%dT%H:%M:%SZ"
    time_step = INCREMENTS[granularity]
    to_date = start_date + timedelta(seconds=time_step)
    
    
    
    while to_date < end_date:
        print(f"start_date: {start_date}")
        print(f"to_date: {to_date}")
        try:
            fetched_candles = fetch_candles(
                pair_name=pair,
                start=dt.strftime(start_date, date_format),
                granularity=granularity,
                end=dt.strftime(to_date, date_format),
                price=price,
                use_count=False
            )
            
            candles.extend(fetched_candles) 
            start_date =  start_date + timedelta(minutes=time_step)
            to_date += timedelta(minutes=time_step)
            to_date = min(to_date, end_date)
            print("------------------------------------------")
            print(f"start_date: {start_date}")
            print(f"to_date: {to_date}")
        except InvalidFunctionArguments as e:
            print(f"Invalid arguments: {e}")
        except RequestError as e:
            print(f"Request failed: {e}")
        except NullCandles as e:
            print(f"No candle data returned: {e}")
            
            
    return candles  


In [133]:
def collect_large_candle_data(pair: str, granularity: str, date_from: str, date_to: str, price: str = 'MBA') -> list[dict]:
    """
    Collects large candle data for a given pair and granularity.
    :param pair: pair to request, e.g. 'EUR_USD'
    :param granularity: granularity of candles to fetch
    :param date_from: date from which to start fetching candles for, e.g. from '2020-01-01'
    :param date_to: date to end fetching candles for, e.g. from '2020-01-01'
    :param price: price representation of price, e.g. 'MBA' or 'B' for bid
    :return: list of candles data as per the specification in the API documentation.
    """

    # --------------- checking parameters --------------------
    from_date: dt
    to_date: dt
    if pair not in instruments_names:  # NOTE: instruments_names is a list, and it is assumed to be available in the scope
        raise InvalidFunctionArguments(function_name="collect_large_candle_data", arguments=pair, message="Pair must be in 'tradeable_instruments' list")
    if granularity not in INCREMENTS:
        raise InvalidFunctionArguments(function_name="collect_large_candle_data", arguments=granularity, message=f"Granularity must be one of {list(INCREMENTS.keys())}")
    try:
        from_date = parser.parse(date_from)
        to_date = parser.parse(date_to)
    except TypeError:
        raise InvalidFunctionArguments(function_name="collect_large_candle_data", arguments=[date_from, date_to], message="dates must be in 'YYYY-MM-DD' or 'yyyy-mm-ddTh:m:s' format")
    if price not in ["MBA", "B", "A", "M"]:
        raise InvalidFunctionArguments(function_name="collect_large_candle_data", arguments=price, message="Price must be one of ['MBA', 'B', 'A', 'M']")
    
    # ------------- subroutines ------------------------------------
    def get_candles(p_count: int, from_date: dt) -> list:
        try:
            return fetch_candles(
                pair_name=pair,
                start=dt.strftime(from_date, date_format),
                granularity=granularity,
                price=price,
                use_count=True,
                count=p_count
            )
        except InvalidFunctionArguments as e:
            print(f"Invalid arguments: {e}")
        except RequestError as e:
            print(f"Request failed: {e}")
        except NullCandles as e:
            print(f"No candle data returned: {e}")
            
    # ------------ end subroutines --------------------------------


    candles: list = []
    date_format = "%Y-%m-%dT%H:%M:%SZ"
    
    count = calculate_candles_between(
        from_date=dt.strftime(from_date, date_format), 
        to_date=dt.strftime(to_date, date_format),
        granularity=granularity,
    ) + 1
    
    if count <= CANDLE_COUNT: 
        candles.extend(get_candles(p_count=count, from_date=from_date))
    else:
        while count > 0: # 4000 <= 4500? -> T
            current_count = min(count, CANDLE_COUNT)
            candles.extend(get_candles(p_count=current_count, from_date=from_date)) # fetch 4000 candles
            # decrease count by current count -> 4500 - 4000 = 500 
            count -= current_count
            from_date = parser.parse(candles[-1]["time"])
            if from_date >= to_date:
                break

    return candles


In [134]:
df = pd.DataFrame(collect_large_candle_data(pair='EUR_USD', granularity='M1', date_from='2016-08-01T00:00:00Z', date_to='2024-08-02T00:00:00Z'))

{'granularity': 'M1', 'price': 'MBA', 'from': '2016-08-01T00:00:00Z', 'count': 4000}
{'granularity': 'M1', 'price': 'MBA', 'from': '2016-08-04T00:40:00Z', 'count': 4000}
{'granularity': 'M1', 'price': 'MBA', 'from': '2016-08-09T04:33:00Z', 'count': 4000}
{'granularity': 'M1', 'price': 'MBA', 'from': '2016-08-12T06:35:00Z', 'count': 4000}
{'granularity': 'M1', 'price': 'MBA', 'from': '2016-08-17T08:47:00Z', 'count': 4000}
{'granularity': 'M1', 'price': 'MBA', 'from': '2016-08-22T06:16:00Z', 'count': 4000}
{'granularity': 'M1', 'price': 'MBA', 'from': '2016-08-25T06:17:00Z', 'count': 4000}
{'granularity': 'M1', 'price': 'MBA', 'from': '2016-08-30T06:42:00Z', 'count': 4000}
{'granularity': 'M1', 'price': 'MBA', 'from': '2016-09-02T07:05:00Z', 'count': 4000}
{'granularity': 'M1', 'price': 'MBA', 'from': '2016-09-07T09:47:00Z', 'count': 4000}
{'granularity': 'M1', 'price': 'MBA', 'from': '2016-09-12T09:07:00Z', 'count': 4000}
{'granularity': 'M1', 'price': 'MBA', 'from': '2016-09-15T08:27:0

In [135]:
df.tail(10)

Unnamed: 0,complete,volume,time,bid,mid,ask
2875990,True,40,2024-08-02T01:06:00.000000000Z,"{'o': '1.07894', 'h': '1.07897', 'l': '1.07893...","{'o': '1.07902', 'h': '1.07904', 'l': '1.07900...","{'o': '1.07910', 'h': '1.07912', 'l': '1.07908..."
2875991,True,39,2024-08-02T01:07:00.000000000Z,"{'o': '1.07896', 'h': '1.07896', 'l': '1.07892...","{'o': '1.07904', 'h': '1.07904', 'l': '1.07900...","{'o': '1.07912', 'h': '1.07912', 'l': '1.07907..."
2875992,True,58,2024-08-02T01:08:00.000000000Z,"{'o': '1.07892', 'h': '1.07902', 'l': '1.07892...","{'o': '1.07900', 'h': '1.07910', 'l': '1.07900...","{'o': '1.07908', 'h': '1.07917', 'l': '1.07907..."
2875993,True,66,2024-08-02T01:09:00.000000000Z,"{'o': '1.07900', 'h': '1.07912', 'l': '1.07899...","{'o': '1.07908', 'h': '1.07919', 'l': '1.07907...","{'o': '1.07916', 'h': '1.07927', 'l': '1.07915..."
2875994,True,45,2024-08-02T01:10:00.000000000Z,"{'o': '1.07912', 'h': '1.07914', 'l': '1.07908...","{'o': '1.07919', 'h': '1.07922', 'l': '1.07916...","{'o': '1.07926', 'h': '1.07929', 'l': '1.07924..."
2875995,True,28,2024-08-02T01:11:00.000000000Z,"{'o': '1.07912', 'h': '1.07912', 'l': '1.07903...","{'o': '1.07919', 'h': '1.07919', 'l': '1.07911...","{'o': '1.07926', 'h': '1.07926', 'l': '1.07919..."
2875996,True,34,2024-08-02T01:12:00.000000000Z,"{'o': '1.07905', 'h': '1.07905', 'l': '1.07902...","{'o': '1.07912', 'h': '1.07913', 'l': '1.07910...","{'o': '1.07920', 'h': '1.07921', 'l': '1.07917..."
2875997,True,64,2024-08-02T01:13:00.000000000Z,"{'o': '1.07899', 'h': '1.07904', 'l': '1.07895...","{'o': '1.07907', 'h': '1.07912', 'l': '1.07902...","{'o': '1.07915', 'h': '1.07919', 'l': '1.07910..."
2875998,True,63,2024-08-02T01:14:00.000000000Z,"{'o': '1.07902', 'h': '1.07910', 'l': '1.07900...","{'o': '1.07910', 'h': '1.07918', 'l': '1.07908...","{'o': '1.07918', 'h': '1.07926', 'l': '1.07916..."
2875999,True,72,2024-08-02T01:15:00.000000000Z,"{'o': '1.07904', 'h': '1.07905', 'l': '1.07893...","{'o': '1.07911', 'h': '1.07912', 'l': '1.07900...","{'o': '1.07918', 'h': '1.07919', 'l': '1.07908..."


In [113]:
c_list[-1]["time"]

'2023-09-15T16:00:00.000000000Z'

In [41]:
c_list = []
c1 = fetch_candles(
    pair_name='EUR_USD',
    start="2020-01-01",
    granularity='M5',
    count=1,
    price='MBA'
)
c2 = fetch_candles(
    pair_name='EUR_USD',
    start="2020-01-01",
    granularity='H1',
    count=1,
    price='MBA'
)

In [46]:
d1 = parser.parse("2020-01-01")

d2 = parser.parse("2020-01-02")

True