In [2]:
import pandas as pd
import os
import re
import betfairlightweight
from betfairlightweight import StreamListener
import logging
import requests
import tarfile
import bz2
from unittest.mock import patch

import logging
from typing import List, Set, Dict, Tuple, Optional

from unittest.mock import patch
from itertools import zip_longest
import functools

import os
import tarfile
import zipfile
import bz2
import glob

# importing data types
import betfairlightweight
from betfairlightweight.resources.bettingresources import (
    PriceSize,
    MarketBook
)


data_path = [
#     "./data/2021_01_JanRacingPro.tar",
    "./data/2021_02_FebRacingPro.tar"
]

# Betfair Lightweight Boilerplate

# create trading instance (don't need username/password)
trading = betfairlightweight.APIClient("username", "password")

# create listener
listener = StreamListener(max_latency=None)

# rounding to 2 decimal places or returning '' if blank
def as_str(v: float) -> str:
    return '%.2f' % v if v is not None else ''

# splitting race name and returning the parts 
def split_anz_horse_market_name(market_name: str) -> (str, str, str):
    # return race no, length, race type
    # input sample: R6 1400m Grp1
    parts = market_name.split(' ')
    race_no = parts[0] # return example R6
    race_len = parts[1] # return example 1400m
    race_type = parts[2].lower() # return example grp1, trot, pace

    return (race_no, race_len, race_type)

# filtering markets to those that fit the following criteria
def filter_market(market: MarketBook) -> bool: 
    d = market.market_definition
    return (d.country_code == 'AU' 
        and d.market_type == 'WIN' 
        and (c := split_anz_horse_market_name(d.name)[2]) != 'trot' and c != 'pace')


# loading from tar and extracting files
def load_markets(file_paths):
    for file_path in file_paths:
        print(file_path)
        if os.path.isdir(file_path):
            for path in glob.iglob(file_path + '**/**/*.bz2', recursive=True):
                f = bz2.BZ2File(path, 'rb')
                yield f
                f.close()
        elif os.path.isfile(file_path):
            ext = os.path.splitext(file_path)[1]
            # iterate through a tar archive
            if ext == '.tar':
                with tarfile.TarFile(file_path) as archive:
                    for file in archive:
                        yield bz2.open(archive.extractfile(file))
            # or a zip archive
            elif ext == '.zip':
                with zipfile.ZipFile(file_path) as archive:
                    for file in archive.namelist():
                        yield bz2.open(archive.open(file))

    return None

In [13]:
# Extract Components From Generated Stream
def extract_components_from_stream(s):
    
    with patch("builtins.open", lambda f, _: f):   
    
        # Will return t-5mins market book, t-1mins marketbook, and final market book
        evaluate_market = False
        final_market = None
        final_traded = None
        t1m_market = None
        t5m_market = None
        t5m = False
        t1m = False

        gen = stream.get_generator()

        for market_books in gen():

            for market_book in market_books:

                # skipping markets that don't meet the filter
                if evaluate_market == False and filter_market(market_book) == False:
                    continue
                else:
                    evaluate_market = True

                # final market view at the conclusion of the market
                if final_market is not None and final_market.status == "OPEN" and market_book.status != final_market.status:
                    final_traded = [ (r.last_price_traded, r.ex.traded_volume.copy()) for r in market_book.runners ]
                final_market = market_book   

                seconds_to_start = (market_book.market_definition.market_time - market_book.publish_time).total_seconds()

                # Best Available To Back 5m
                if not t5m:
                    if seconds_to_start < 5*60:
                        t5m_market = market_book
                        t5m = True

                # Best Available To Back 1m
                if not t1m:
                    if seconds_to_start < 1*60:
                        t1m_market = market_book
                        t1m = True

        return (t1m_market, t5m_market, final_market, final_traded)

In [16]:
# record prices to a file
with open("outputs/tho-odds-feb-test.csv", "w+") as output:
    # defining column headers\
    
    # Column Headers
    output.write("market_id,event_date,country,track,market_name,selection_id,selection_name,result,bsp,matched_volume, best_back_1m, best_back_5m \n")

    for file_obj in load_markets(data_path):

        # Instantiate a "stream" object
        stream = trading.streaming.create_historical_generator_stream(
            file_path=file_obj,
            listener=listener,
        )

        # Extract Components From Stream
        (t1m_market, t5m_market, final_market, final_traded) = extract_components_from_stream(stream)
                    
        # no price data for market
        if final_traded is None:
            continue; 

        # Runner Metadata
        runner_data = [
            {
                'selection_id': r.selection_id,
                'selection_name': next((rd.name for rd in final_market.market_definition.runners if rd.selection_id == r.selection_id), None),
                'selection_status': r.status,
                'sp': r.sp.actual_sp
            }
            for r in final_market.runners 
        ]

        # Total Matched Volume  
        # _____________________

        def ladder_traded_volume(ladder):
            return(sum([rung.size for rung in ladder]))

        selection_traded_volume = [ ladder_traded_volume(runner[1]) for runner in final_traded ]


        # Best Available To Back
        # ______________________

        def best_back(availableLadder):
            if len(availableLadder) == 0:
                return(None)
            else:
                return(availableLadder[0].price)

        bestBack5m = [ best_back(runner.ex.available_to_back) for runner in t5m_market.runners]

        bestBack1m = [ best_back(runner.ex.available_to_back) for runner in t1m_market.runners]

        # Writing To CSV
        # ______________________

        for (runnerMeta, runnerTradedVolume, bb5m, bb1m) in zip(runner_data, selection_traded_volume, bestBack5m, bestBack1m):

            print("current time: ", datetime.now(), "m:" , final_market.market_id[0])
            
            if runnerMeta['selection_status'] != 'REMOVED':

                print("current time: ", datetime.now(), "m:" , final_market.market_id)
                
                output.write(
                    "{},{},{},{},{},{},{},{},{},{},{},{} \n".format(
                        final_market.market_id,
                        final_market.market_definition.market_time,
                        final_market.market_definition.country_code,
                        final_market.market_definition.venue,
                        final_market.market_definition.name,
                        runnerMeta['selection_id'],
                        runnerMeta['selection_name'],
                        runnerMeta['selection_status'],
                        runnerMeta['sp'],
                        runnerTradedVolume,
                        bb5m,
                        bb1m
                    )
                )

./data/2021_02_FebRacingPro.tar
current time:  2021-04-02 12:34:38.666065 m: 1
current time:  2021-04-02 12:34:38.666065 m: 1.178629182
current time:  2021-04-02 12:34:38.666065 m: 1
current time:  2021-04-02 12:34:38.666065 m: 1.178629182
current time:  2021-04-02 12:34:38.666065 m: 1
current time:  2021-04-02 12:34:38.666065 m: 1.178629182
current time:  2021-04-02 12:34:38.666065 m: 1
current time:  2021-04-02 12:34:38.666065 m: 1.178629182
current time:  2021-04-02 12:34:38.666065 m: 1
current time:  2021-04-02 12:34:38.666065 m: 1.178629182
current time:  2021-04-02 12:34:38.666065 m: 1
current time:  2021-04-02 12:34:38.667067 m: 1.178629182
current time:  2021-04-02 12:34:38.667067 m: 1
current time:  2021-04-02 12:35:00.388976 m: 1
current time:  2021-04-02 12:35:00.389976 m: 1.178629242
current time:  2021-04-02 12:35:00.389976 m: 1
current time:  2021-04-02 12:35:00.389976 m: 1.178629242
current time:  2021-04-02 12:35:00.389976 m: 1
current time:  2021-04-02 12:35:00.389976 

current time:  2021-04-02 12:35:40.412238 m: 1
current time:  2021-04-02 12:35:40.412238 m: 1.178629246
current time:  2021-04-02 12:35:40.412238 m: 1
current time:  2021-04-02 12:35:40.412238 m: 1.178629246
current time:  2021-04-02 12:35:40.412238 m: 1
current time:  2021-04-02 12:35:40.412238 m: 1.178629246
current time:  2021-04-02 12:35:40.412238 m: 1
current time:  2021-04-02 12:35:40.412238 m: 1.178629246
current time:  2021-04-02 12:35:40.412238 m: 1
current time:  2021-04-02 12:35:40.412238 m: 1.178629246
current time:  2021-04-02 12:35:40.412238 m: 1
current time:  2021-04-02 12:35:40.412238 m: 1.178629246
current time:  2021-04-02 12:35:40.415237 m: 1
current time:  2021-04-02 12:35:40.415237 m: 1.178629246
current time:  2021-04-02 12:35:40.416236 m: 1
current time:  2021-04-02 12:35:40.416236 m: 1.178629246
current time:  2021-04-02 12:35:40.416236 m: 1
current time:  2021-04-02 12:35:46.280864 m: 1
current time:  2021-04-02 12:35:46.280864 m: 1.178629998
current time:  20

current time:  2021-04-02 12:36:20.254408 m: 1
current time:  2021-04-02 12:36:20.254408 m: 1.178630002
current time:  2021-04-02 12:36:20.254408 m: 1
current time:  2021-04-02 12:36:20.254408 m: 1.178630002
current time:  2021-04-02 12:36:20.254408 m: 1
current time:  2021-04-02 12:36:20.255379 m: 1.178630002
current time:  2021-04-02 12:36:20.256378 m: 1
current time:  2021-04-02 12:36:20.256378 m: 1.178630002
current time:  2021-04-02 12:36:20.256378 m: 1
current time:  2021-04-02 12:36:20.256378 m: 1.178630002
current time:  2021-04-02 12:36:20.256378 m: 1
current time:  2021-04-02 12:36:20.256378 m: 1.178630002
current time:  2021-04-02 12:36:20.257380 m: 1
current time:  2021-04-02 12:36:20.257380 m: 1.178630002
current time:  2021-04-02 12:36:20.258378 m: 1
current time:  2021-04-02 12:36:20.258378 m: 1.178630002
current time:  2021-04-02 12:36:20.258378 m: 1
current time:  2021-04-02 12:36:20.258378 m: 1.178630002
current time:  2021-04-02 12:36:24.274237 m: 1
current time:  20

current time:  2021-04-02 12:48:51.921114 m: 1
current time:  2021-04-02 12:48:51.921114 m: 1.178676388
current time:  2021-04-02 12:48:51.921114 m: 1
current time:  2021-04-02 12:48:51.921114 m: 1.178676388
current time:  2021-04-02 12:48:51.921114 m: 1
current time:  2021-04-02 12:48:51.921114 m: 1.178676388
current time:  2021-04-02 12:48:51.921114 m: 1
current time:  2021-04-02 12:48:51.921114 m: 1.178676388
current time:  2021-04-02 12:48:51.921114 m: 1
current time:  2021-04-02 12:48:51.921114 m: 1.178676388
current time:  2021-04-02 12:48:51.921114 m: 1
current time:  2021-04-02 12:48:51.921114 m: 1.178676388
current time:  2021-04-02 12:48:51.921114 m: 1
current time:  2021-04-02 12:48:51.921114 m: 1.178676388
current time:  2021-04-02 12:48:51.921114 m: 1
current time:  2021-04-02 12:48:51.921114 m: 1.178676388
current time:  2021-04-02 12:48:54.687531 m: 1
current time:  2021-04-02 12:48:54.688532 m: 1.178676390
current time:  2021-04-02 12:48:54.688532 m: 1
current time:  20

current time:  2021-04-02 12:50:01.551296 m: 1
current time:  2021-04-02 12:50:01.552297 m: 1.178676400
current time:  2021-04-02 12:50:01.552297 m: 1
current time:  2021-04-02 12:50:01.552297 m: 1.178676400
current time:  2021-04-02 12:50:01.552297 m: 1
current time:  2021-04-02 12:50:01.552297 m: 1.178676400
current time:  2021-04-02 12:50:01.552297 m: 1
current time:  2021-04-02 12:50:01.552297 m: 1.178676400
current time:  2021-04-02 12:50:01.552297 m: 1
current time:  2021-04-02 12:50:01.552297 m: 1.178676400
current time:  2021-04-02 12:50:01.553260 m: 1
current time:  2021-04-02 12:50:01.553260 m: 1.178676400
current time:  2021-04-02 12:50:01.553260 m: 1
current time:  2021-04-02 12:50:01.553260 m: 1.178676400
current time:  2021-04-02 12:50:01.553260 m: 1
current time:  2021-04-02 12:50:12.970963 m: 1
current time:  2021-04-02 12:50:12.970963 m: 1.178680209
current time:  2021-04-02 12:50:12.970963 m: 1
current time:  2021-04-02 12:50:12.970963 m: 1.178680209
current time:  20

current time:  2021-04-02 12:58:34.515413 m: 1
current time:  2021-04-02 12:58:34.516414 m: 1.178721772
current time:  2021-04-02 12:58:34.516414 m: 1
current time:  2021-04-02 12:58:34.516414 m: 1.178721772
current time:  2021-04-02 12:58:34.516414 m: 1
current time:  2021-04-02 12:58:34.516414 m: 1.178721772
current time:  2021-04-02 12:58:34.516414 m: 1
current time:  2021-04-02 12:58:34.516414 m: 1.178721772
current time:  2021-04-02 12:58:34.516414 m: 1
current time:  2021-04-02 12:58:34.516414 m: 1.178721772
current time:  2021-04-02 12:58:34.517384 m: 1
current time:  2021-04-02 12:58:34.517384 m: 1.178721772
current time:  2021-04-02 12:58:34.517384 m: 1
current time:  2021-04-02 12:58:34.517384 m: 1.178721772
current time:  2021-04-02 12:58:34.517384 m: 1
current time:  2021-04-02 12:58:34.517384 m: 1.178721772
current time:  2021-04-02 12:58:38.980819 m: 1
current time:  2021-04-02 12:58:38.980819 m: 1.178722320
current time:  2021-04-02 12:58:38.980819 m: 1
current time:  20

current time:  2021-04-02 12:59:29.220935 m: 1
current time:  2021-04-02 12:59:29.220935 m: 1.178722041
current time:  2021-04-02 12:59:29.220935 m: 1
current time:  2021-04-02 12:59:29.220935 m: 1.178722041
current time:  2021-04-02 12:59:29.220935 m: 1
current time:  2021-04-02 12:59:29.220935 m: 1.178722041
current time:  2021-04-02 12:59:29.220935 m: 1
current time:  2021-04-02 12:59:29.220935 m: 1.178722041
current time:  2021-04-02 12:59:29.220935 m: 1
current time:  2021-04-02 12:59:29.220935 m: 1.178722041
current time:  2021-04-02 12:59:29.220935 m: 1
current time:  2021-04-02 12:59:29.220935 m: 1.178722041
current time:  2021-04-02 12:59:29.220935 m: 1
current time:  2021-04-02 12:59:29.222936 m: 1.178722041
current time:  2021-04-02 12:59:29.224935 m: 1
current time:  2021-04-02 12:59:29.224935 m: 1.178722041
current time:  2021-04-02 12:59:32.264835 m: 1
current time:  2021-04-02 12:59:32.264835 m: 1.178721778
current time:  2021-04-02 12:59:32.264835 m: 1
current time:  20

current time:  2021-04-02 13:00:10.302930 m: 1
current time:  2021-04-02 13:00:10.302930 m: 1.178777701
current time:  2021-04-02 13:00:10.302930 m: 1
current time:  2021-04-02 13:00:10.302930 m: 1.178777701
current time:  2021-04-02 13:00:10.302930 m: 1
current time:  2021-04-02 13:00:10.302930 m: 1.178777701
current time:  2021-04-02 13:00:10.303930 m: 1
current time:  2021-04-02 13:00:10.303930 m: 1.178777701
current time:  2021-04-02 13:00:10.303930 m: 1
current time:  2021-04-02 13:00:10.303930 m: 1.178777701
current time:  2021-04-02 13:00:10.303930 m: 1
current time:  2021-04-02 13:00:10.303930 m: 1.178777701
current time:  2021-04-02 13:00:10.303930 m: 1
current time:  2021-04-02 13:00:10.303930 m: 1.178777701
current time:  2021-04-02 13:00:10.303930 m: 1
current time:  2021-04-02 13:00:10.303930 m: 1.178777701
current time:  2021-04-02 13:00:10.303930 m: 1
current time:  2021-04-02 13:00:10.303930 m: 1.178777701
current time:  2021-04-02 13:00:10.303930 m: 1
current time:  20

current time:  2021-04-02 13:11:34.724540 m: 1
current time:  2021-04-02 13:11:34.724540 m: 1.178772560
current time:  2021-04-02 13:11:34.724540 m: 1
current time:  2021-04-02 13:11:34.724540 m: 1.178772560
current time:  2021-04-02 13:11:34.724540 m: 1
current time:  2021-04-02 13:11:34.725539 m: 1.178772560
current time:  2021-04-02 13:11:34.725539 m: 1
current time:  2021-04-02 13:11:34.726540 m: 1.178772560
current time:  2021-04-02 13:11:34.726540 m: 1
current time:  2021-04-02 13:11:34.726540 m: 1.178772560
current time:  2021-04-02 13:11:34.728540 m: 1
current time:  2021-04-02 13:11:34.728540 m: 1.178772560
current time:  2021-04-02 13:11:34.728540 m: 1
current time:  2021-04-02 13:11:34.728540 m: 1.178772560
current time:  2021-04-02 13:11:34.728540 m: 1
current time:  2021-04-02 13:11:34.728540 m: 1.178772560
current time:  2021-04-02 13:11:34.728540 m: 1
current time:  2021-04-02 13:11:34.728540 m: 1.178772560
current time:  2021-04-02 13:11:34.729539 m: 1
current time:  20

current time:  2021-04-02 13:12:14.775147 m: 1
current time:  2021-04-02 13:12:14.775147 m: 1.178772812
current time:  2021-04-02 13:12:14.775147 m: 1
current time:  2021-04-02 13:12:14.775147 m: 1
current time:  2021-04-02 13:12:14.775147 m: 1.178772812
current time:  2021-04-02 13:12:14.775147 m: 1
current time:  2021-04-02 13:12:14.775147 m: 1.178772812
current time:  2021-04-02 13:12:14.775147 m: 1
current time:  2021-04-02 13:12:14.776149 m: 1.178772812
current time:  2021-04-02 13:12:14.776149 m: 1
current time:  2021-04-02 13:12:14.776149 m: 1.178772812
current time:  2021-04-02 13:12:14.776149 m: 1
current time:  2021-04-02 13:12:14.776149 m: 1
current time:  2021-04-02 13:12:14.776149 m: 1.178772812
current time:  2021-04-02 13:12:14.776149 m: 1
current time:  2021-04-02 13:12:14.776149 m: 1.178772812
current time:  2021-04-02 13:12:14.776149 m: 1
current time:  2021-04-02 13:12:14.776149 m: 1.178772812
current time:  2021-04-02 13:12:14.776149 m: 1
current time:  2021-04-02 1

AttributeError: 'NoneType' object has no attribute 'runners'