In [1]:
import pandas as pd
import json


filename = "place_data.json"

In [2]:
records = []


def try_parse_json(string):
    try:
        return json.loads(string), None
    except json.JSONDecodeError as e:
        return None, e


with open(filename, "r") as file:
    buffer = ""
    for line in file:
        buffer += line.strip()
        parsed_json, error = try_parse_json(buffer)
        if parsed_json:
            buffer = ""  # Clear buffer if successful parse
            if parsed_json["op"] == "mcm":
                pt = parsed_json["pt"]
                for mc_item in parsed_json.get("mc", []):
                    market_id = mc_item["id"]
                    for rc in mc_item.get("rc", []):
                        if "batb" in rc and len(rc["batb"]) > 1:
                            best_available_to_back = rc["batb"][1][1]
                        else:
                            best_available_to_back = None
                        ltp = rc.get("ltp")
                        tv = rc.get("tv")
                        horse_id = rc["id"]
                        records.append(
                            {
                                "timestamp_unix": pt,
                                "market_id": market_id,
                                "horse_id": horse_id,
                                "best_available_to_back": best_available_to_back,
                                "last_traded_price": ltp,
                                "traded_volume": tv,
                            }
                        )
        elif error and "Extra data" in str(
            error
        ):  # Possible end of one object and start of another
            first_part, _, buffer = buffer.partition("}{")
            # Assume the split is between separate JSON objects
            buffer = "{" + buffer
            first_part += "}"
            try_parse_json(first_part)  # Retry parsing the first part
        # Handle other types of JSONDecodeError if necessary

In [3]:
# Create a DataFrame from the records
df = pd.DataFrame(records)

In [4]:
df.tail(1000)

Unnamed: 0,timestamp_unix,market_id,horse_id,best_available_to_back,last_traded_price,traded_volume
22151,1580573797519,1.168096597,23405261,,11.00,1007.59
22152,1580573797519,1.168096597,21848159,2.54,3.10,9996.87
22153,1580573797519,1.168096597,17596196,1.49,1.78,9070.90
22154,1580573798479,1.168096597,22023487,1.70,3.40,3270.98
22155,1580573798479,1.168096597,22046306,,2.66,3217.77
...,...,...,...,...,...,...
23146,1580573984225,1.168096597,14765349,1.27,5.80,6824.50
23147,1580573984225,1.168096597,22972635,1.14,4.50,1890.92
23148,1580573984225,1.168096597,22101589,,1.03,11795.21
23149,1580573984225,1.168096597,23405261,1.10,3.70,1039.41


In [5]:
# describe the dataframe

df.describe()

Unnamed: 0,timestamp_unix,horse_id,best_available_to_back,last_traded_price,traded_volume
count,23151.0,23151.0,9942.0,23151.0,23151.0
mean,1580558000000.0,20187520.0,3.108045,3.394366,1236.235645
std,18093090.0,3020924.0,1.683398,2.005571,2129.182236
min,1580495000000.0,14765350.0,1.01,0.0,0.0
25%,1580550000000.0,17596200.0,1.95,2.12,102.65
50%,1580565000000.0,22023490.0,2.68,2.84,420.18
75%,1580573000000.0,22101590.0,3.75,4.1,1186.69
max,1580574000000.0,23405260.0,13.5,16.0,11795.21


In [6]:
df.shape

(23151, 6)

In [8]:
# save the dataframe to a csv file

df.to_csv("csv_files/place_data.csv", index=False)