In [4]:
import pandas as pd
import os
import re
import betfairlightweight
from betfairlightweight import StreamListener
import logging
import requests
import tarfile
import bz2
from unittest.mock import patch

import logging
from typing import List, Set, Dict, Tuple, Optional

from unittest.mock import patch
from itertools import zip_longest
import functools

import os
import tarfile
import zipfile
import bz2
import glob

# importing data types
import betfairlightweight
from betfairlightweight.resources.bettingresources import (
    PriceSize,
    MarketBook
)


data_path = [
    "./data/2021_01_JanRacingPro.tar",
    "./data/2021_01_JanRacingPro.tar"
]

# loading from tar and extracting files
def load_markets(file_paths):
    for file_path in file_paths:
        print(file_path)
        if os.path.isdir(file_path):
            for path in glob.iglob(file_path + '**/**/*.bz2', recursive=True):
                f = bz2.BZ2File(path, 'rb')
                yield f
                f.close()
        elif os.path.isfile(file_path):
            ext = os.path.splitext(file_path)[1]
            # iterate through a tar archive
            if ext == '.tar':
                with tarfile.TarFile(file_path) as archive:
                    for file in archive:
                        yield bz2.open(archive.extractfile(file))
            # or a zip archive
            elif ext == '.zip':
                with zipfile.ZipFile(file_path) as archive:
                    for file in archive.namelist():
                        yield bz2.open(archive.open(file))

    return None


# Betfair Lightweight Boilerplate

# create trading instance (don't need username/password)
trading = betfairlightweight.APIClient("username", "password")

# create listener
listener = StreamListener(max_latency=None)

# rounding to 2 decimal places or returning '' if blank
def as_str(v: float) -> str:
    return '%.2f' % v if v is not None else ''

# splitting race name and returning the parts 
def split_anz_horse_market_name(market_name: str) -> (str, str, str):
    # return race no, length, race type
    # input sample: R6 1400m Grp1
    parts = market_name.split(' ')
    race_no = parts[0] # return example R6
    race_len = parts[1] # return example 1400m
    race_type = parts[2].lower() # return example grp1, trot, pace

    return (race_no, race_len, race_type)

# filtering markets to those that fit the following criteria
def filter_market(market: MarketBook) -> bool: 
    d = market.market_definition
    return (d.country_code == 'AU' 
        and d.market_type == 'WIN' 
        and (c := split_anz_horse_market_name(d.name)[2]) != 'trot' and c != 'pace')

In [11]:
# record prices to a file
with open("outputs/example-2.csv", "w+") as output:
    # defining column headers\
    
    # Column Headers
    output.write("market_id,event_date,country,track,market_name,selection_id,selection_name,result,bsp,matched_volume, best_back_1m, best_back_5m \n")

    for file_obj in load_markets(data_path):

        # Instantiate a "stream" object
        stream = trading.streaming.create_historical_generator_stream(
            file_path=file_obj,
            listener=listener,
        )


        # For this stream object execute the following Lambda function
        with patch("builtins.open", lambda f, _: f): 

            evaluate_market = False
            preplay_market = None
            postplay_market = None
            preplay_traded = None
            postplay_traded = None
            t5m = False
            t1m = False

            gen = stream.get_generator()
            for market_books in gen():
                for market_book in market_books:

                    # skipping markets that don't meet the filter
                    if evaluate_market == False and filter_market(market_book) == False:
                        continue
                    else:
                        evaluate_market = True

                    # final market view before market goes in play
                    if preplay_market is not None and preplay_market.inplay != market_book.inplay:
                        preplay_traded = [ (r.last_price_traded, r.ex.traded_volume.copy()) for r in preplay_market.runners ]
                    preplay_market = market_book

                    # final market view at the conclusion of the market
                    if postplay_market is not None and postplay_market.status == "OPEN" and market_book.status != postplay_market.status:
                        postplay_traded = [ (r.last_price_traded, r.ex.traded_volume.copy()) for r in market_book.runners ]
                    postplay_market = market_book   
                    
                    seconds_to_start = (
                        market_book.market_definition.market_time - market_book.publish_time
                    ).total_seconds()
                    
                    # Best Available To Back 5m
                    if not t5m:
                        if seconds_to_start < 5*60:
                            t5m_market = market_book
                            t5m = True
                            
                    # Best Available To Back 1m
                    if not t1m:
                        if seconds_to_start < 1*60:
                            t1m_market = market_book
                            t1m = True
                    
            # no price data for market
            if postplay_traded is None:
                continue; 

            # Runner Metadata
            runner_data = [
                {
                    'selection_id': r.selection_id,
                    'selection_name': next((rd.name for rd in postplay_market.market_definition.runners if rd.selection_id == r.selection_id), None),
                    'selection_status': r.status,
                    'sp': as_str(r.sp.actual_sp)
                }
                for r in postplay_market.runners 
            ]
            
            # Total Matched Volume  
            # _____________________
            
            def ladder_traded_volume(ladder):
                return(sum([rung.size for rung in ladder]))

            selection_traded_volume = [ ladder_traded_volume(runner[1]) for runner in postplay_traded ]

            
            # Best Available To Back
            # ______________________
            
            def best_back(availableLadder):
                if len(availableLadder) == 0:
                    return(None)
                else:
                    return(availableLadder[0].price)

            bestBack5m = [ best_back(runner.ex.available_to_back) for runner in t5m_market.runners]

            bestBack1m = [ best_back(runner.ex.available_to_back) for runner in t1m_market.runners]
            
            # Writing To CSV
            # ______________________
            
            for (runnerMeta, runnerTradedVolume, bb5m, bb1m) in zip(runner_data, selection_traded_volume, bestBack5m, bestBack1m):
                
                output.write(
                    "{},{},{},{},{},{},{},{},{},{},{},{} \n".format(
                        postplay_market.market_id,
                        postplay_market.market_definition.market_time,
                        postplay_market.market_definition.country_code,
                        postplay_market.market_definition.venue,
                        postplay_market.market_definition.name,
                        runnerMeta['selection_id'],
                        runnerMeta['selection_name'],
                        runnerMeta['selection_status'],
                        runnerMeta['sp'],
                        runnerTradedVolume,
                        bb5m,
                        bb1m
                    )
                )

./data/2021_01_JanRacingPro.tar
[8.6, 75, 130, 16.5, 60, 5.3, 10.5, 7.2, 60, 3.85, 8, 27, None]
[1927.55, 158.48999999999998, 52.25, 1599.3399999999997, 222.52999999999994, 3617.4099999999994, 1562.7600000000002, 2341.9300000000007, 162.17, 17769.130000000005, 2618.2800000000007, 1424.8000000000004, 0]
[11.5, 10.5, 4.8, 6.2, 14.5, 15.5, 80, 18, 8.8, None, 8.4, 29, 32, 75, 170, None]
[1478.97, 1590.38, 6609.990000000001, 6479.78, 1860.5399999999997, 5259.17, 470.7199999999998, 557.84, 2416.58, 0, 5386.529999999999, 938.2800000000001, 425.0, 505.99000000000007, 179.4599999999999, 0]
[20, 3.65, 36, 13, 5.5, 85, 36, 2.7, None, None, None, None, None]
[694.67, 9763.349999999999, 631.1, 3270.420000000001, 4310.670000000002, 120.17999999999998, 319.0000000000001, 18692.06999999999, 0, 0, 0, 0, 0]
[9, 60, 44, 25, 8.4, 5.4, 5.4, None, 5.2, 50, 38, 44, 16, 50]
[8952.71, 590.2900000000002, 1058.8999999999999, 1360.6399999999999, 2700.4500000000003, 7162.890000000004, 7707.909999999998, 0, 13871.5

KeyboardInterrupt: 