In [42]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import bz2
import json

In [43]:
file_df = pd.read_csv("../refined_csvs/1_full_file_df.csv")

In [44]:
ats = file_df[file_df["name"].fillna("").str.contains(" @ ")].copy()
ats["home"] = ats["name"].str.split(" @ ").str[1]
ats["away"] = ats["name"].str.split(" @ ").str[0]

In [47]:
dfs = []
for num in range(20, 25):
    temp_df = pd.read_csv(f"data/20{num}-{num + 1}_season_cli_FIX.csv")
    temp_df["season_num"] = num - 20
    dfs.append(temp_df)
cli_df = pd.concat(dfs)
cli_df["home_playoff_cli"] = cli_df["hpg"] - cli_df["hpb"]
cli_df["away_playoff_cli"] = cli_df["apg"] - cli_df["apb"]
cli_df["home_champ_cli"] = cli_df["hcg"] - cli_df["hcb"]
cli_df["away_champ_cli"] = cli_df["acg"] - cli_df["acb"]

In [48]:
nba_teams = list(cli_df["home"].unique()) + ["Los Angeles Clippers"]

In [53]:
nba_bets = ats[
    (ats["home"].isin(nba_teams))
    & (ats["mkt_type"] == "MATCH_ODDS")
].copy()

In [54]:
nba_bets["us_time"] = pd.to_datetime(nba_bets["time"]).dt.tz_convert('America/Chicago')
nba_bets["date"] = nba_bets["us_time"].astype(str).str[:10]

In [55]:
# FAMILIAR code from mlb notebook
def bz2_to_json(filepath):
    content_string = bz2.open(filepath, "rt", encoding="utf-8").read()
    # Instead of a bunch of dicts printed one after the other
    # Reshape so it's a list of dicts
    fixed_string = "[" + content_string.replace("}\n{", "},{") + "]"
    return json.loads(fixed_string)

def process_game(filename, lookback):
    data = bz2_to_json("../" + filename) # CHANGE made here: are in different directory
    
    # Get the time
    game_time = data[0]["mc"][0]["marketDefinition"]["suspendTime"]
    runners = data[0]["mc"][0]["marketDefinition"]["runners"]
    away_name, away_id = runners[0]["name"], runners[0]["id"]
    home_name, home_id = runners[1]["name"], runners[1]["id"]
    
    # Get the trades
    home_trades = []
    away_trades = []
    for line in data[2:]:
        if "rc" in line["mc"][0]:
            curr_time = line["pt"]
            for trade in line["mc"][0]["rc"]:
                if trade["id"] == home_id:
                    # home_trade_rows.append([curr_time, trade["ltp"]])
                    home_trades.append(trade["ltp"])
                elif trade["id"] == away_id:
                    # away_trade_rows.append([curr_time, trade["ltp"]])
                    away_trades.append(trade["ltp"])
                else:
                    print("Questionable")
        try:
            if line["mc"][0]["marketDefinition"]["turnInPlayEnabled"]:
                break
        except KeyError:
            pass
    
    # Now get the winner
    end_runners = data[-1]["mc"][0]["marketDefinition"]["runners"]
    if end_runners[0]["status"] == "WINNER":
        winner = away_name
    elif end_runners[1]["status"] == "WINNER":
        winner = home_name
    else:
        winner = None
    
    return {
        "home_ask": 1 / min(home_trades[-lookback:]) if len(home_trades) >= 5 else None,
        "home_bid": 1 / max(home_trades[-lookback:]) if len(home_trades) >= 5 else None,
        "away_ask": 1 / min(away_trades[-lookback:]) if len(away_trades) >= 5 else None,
        "away_bid": 1 / max(away_trades[-lookback:]) if len(away_trades) >= 5 else None,
        "home_trades": home_trades,
        "away_trades": away_trades,
        "winner": winner, 
        "game_time": game_time
    }

In [56]:
new_df = pd.DataFrame(
    nba_bets["filepath"].apply(lambda x: process_game(x, 5)).to_list(), 
    index=nba_bets.index
)

Questionable
Questionable
Questionable
Questionable
Questionable
Questionable
Questionable
Questionable
Questionable


In [57]:
full_bet_df = pd.concat([nba_bets, new_df], axis=1)

In [58]:
for column in ["name", "home", "away", "winner"]:
    full_bet_df[column] = full_bet_df[column].str.replace("Los Angeles Clippers", "LA Clippers")

In [59]:
full_df = full_bet_df.merge(cli_df, on=["date", "home", "away"]).dropna()

In [60]:
full_df["synth_home_bid"] = pd.DataFrame({0: full_df["home_bid"], 1: 1 - full_df["away_ask"]}).max(axis=1)
full_df["synth_home_ask"] = pd.DataFrame({0: full_df["home_ask"], 1: 1 - full_df["away_bid"]}).max(axis=1)
full_df["synth_home_mid"] = (full_df["synth_home_bid"] + full_df["synth_home_ask"]) / 2

full_df["anti_home_pnl"] = full_df["synth_home_bid"] - full_df["home_win"] # bid
full_df["pro_home_pnl"] = full_df["home_win"] - full_df["synth_home_ask"] # ask

full_df["date_mod_year"] = pd.to_datetime("2000" + full_df["time"].str[4:])

In [61]:
# Check that the home team is usually a little bit overpriced
full_df[["pro_home_pnl", "anti_home_pnl"]].mean()

pro_home_pnl    -0.013024
anti_home_pnl    0.000790
dtype: float64

In [62]:
def get_sharpe(s): return s.sum() / (s ** 2).sum() ** 0.5
get_sharpe(full_df[["pro_home_pnl", "anti_home_pnl"]])

pro_home_pnl    -1.725885
anti_home_pnl    0.104805
dtype: float64

In [63]:
full_df.to_csv("data/df_bets_plus_cli.csv", index=False)