In [254]:
import pandas as pd
import numpy as np
from nba_api.stats import endpoints as nba_endpoints
from nba_api.live.nba import endpoints as nba_live_endpoints
import plotly.graph_objects as go
import plotly.io as pio
from tqdm.notebook import tqdm, trange
from pprint import pprint
from math import floor, ceil

In [2]:
pd.set_option("display.max_rows", 200)
pd.set_option("display.max_columns", 500)
pio.renderers.default = 'iframe' 

# Data Ingestion

In [3]:
seasons = [
    f"{x}-{(x+1)-2000}" for x in range(int(2023),2025)
]

## Teams

In [4]:
teams = nba_endpoints.commonteamyears.CommonTeamYears().get_data_frames()[0][["TEAM_ID", "ABBREVIATION"]]

## Games

In [6]:
teamlogs = nba_endpoints.teamgamelogs

games = pd.DataFrame()
for s in tqdm(seasons):
    games = pd.concat(
        [
            games,
            teamlogs.TeamGameLogs(season_nullable=s).get_data_frames()[0]
        ]
    )
game_ids = games["GAME_ID"]

  0%|          | 0/2 [00:00<?, ?it/s]

In [7]:
game_ids = list(set(game_ids))

In [8]:
len(game_ids)

2460

## All Plays

In [250]:
all_plays = pd.DataFrame()
for i in tqdm(range(len(game_ids))):
    print(i) if i%100 == 0 else None
    g = game_ids[i]
    try:
        df = pd.DataFrame(
            nba_endpoints.PlayByPlayV3(game_id=g).get_dict().get("game").get("actions")
        )
        df['scoreHome'] = np.where(df['scoreHome'] == '', np.NaN, df['scoreHome']).astype(float)
        df['scoreAway'] = np.where(df['scoreAway'] == '', np.NaN, df['scoreAway']).astype(float)
        dff = df.assign(
            game_id=g,
            point_diff=lambda row: row['scoreHome'] - row['scoreAway']
        )
        all_plays = pd.concat([all_plays, dff], axis=0)
    except Exception as e:
        print(f"ERROR: Game {i+1} out of {len(game_ids)} -> {e}")

  0%|          | 0/2460 [00:00<?, ?it/s]

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400


In [251]:
all_plays.game_id.nunique()

2460

In [252]:
all_plays.to_parquet("all_plays.parquet")

In [5]:
all_plays = pd.read_parquet("all_plays.parquet")

# Feature Engineering

## Home & Away Teams

In [10]:
games["home"] = games["MATCHUP"].str.contains("vs.")
games["team_1"] = games["MATCHUP"].str.extract("(^[A-Z]{3})")
games["team_2"] = games["MATCHUP"].str.extract("([A-Z]{3}$)")

In [11]:
games["home_team"] = np.where(
    games["home"] == True,
    games["team_1"],
    games["team_2"]
)
games["away_team"] = np.where(
    games["home"] == True,
    games["team_2"],
    games["team_1"]
)

In [12]:
games[["MATCHUP", "home_team", "away_team"]]

Unnamed: 0,MATCHUP,home_team,away_team
0,SAS vs. DET,SAS,DET
1,UTA @ GSW,GSW,UTA
2,WAS @ BOS,BOS,WAS
3,PHX @ MIN,MIN,PHX
4,LAL @ NOP,NOP,LAL
...,...,...,...
2455,GSW @ POR,POR,GSW
2456,LAL vs. MIN,LAL,MIN
2457,MIN @ LAL,LAL,MIN
2458,NYK @ BOS,BOS,NYK


## Seconds Elapsed

In [13]:
all_plays["minutes"] = all_plays["clock"].str.extract(
    "[PT]{2}([0-9]{2})M([0-9]{2}).[0-9]{2}S")[0].astype(int)
all_plays["seconds"] = all_plays["clock"].str.extract(
    "[PT]{2}([0-9]{2})M([0-9]{2}).[0-9]{2}S")[1].astype(int)
all_plays["seconds_elapsed"] = ((all_plays["period"].astype(int)-1)*12*60) + \
    ((np.where(all_plays["period"]==5,5,12)*60)-(((all_plays["minutes"])*60)+all_plays["seconds"]))

In [14]:
all_plays[["clock", "minutes", "seconds", "seconds_elapsed"]]

Unnamed: 0,clock,minutes,seconds,seconds_elapsed
0,PT12M00.00S,12,0,0
1,PT12M00.00S,12,0,0
2,PT11M43.00S,11,43,17
3,PT11M33.00S,11,33,27
4,PT11M23.00S,11,23,37
...,...,...,...,...
468,PT00M07.00S,0,7,2873
469,PT00M07.00S,0,7,2873
470,PT00M01.90S,0,1,2879
471,PT00M01.90S,0,1,2879


## Turnovers
Specifically getting the play prior to the turnover

In [160]:
all_plays["next_actionType"] = all_plays["actionType"].shift(-1)
all_plays["next_subType"] = all_plays["subType"].shift(-1)
all_plays["next_actionId"] = all_plays["actionId"].shift(-1)
all_plays["next_seconds_elapsed"] = all_plays["seconds_elapsed"].shift(-1)
all_plays["next_description"] = all_plays["description"].shift(-1)
all_plays["next_team"] = all_plays["teamTricode"].shift(-1)
all_plays["time_to_turnover"] = all_plays["next_seconds_elapsed"] - all_plays["seconds_elapsed"]

In [161]:
turnovers = all_plays.loc[
    ((all_plays["actionType"] == "Turnover")
    | (all_plays["next_actionType"] == "Turnover"))
    & ((all_plays["subType"] == "Bad Pass")
    | (all_plays["next_subType"] == "Bad Pass")), 
    ["game_id", "actionId", "next_actionId"]
].assign(
    key=lambda row: row["game_id"].astype(str) + "-" +row["actionId"].astype(str)
)

In [162]:
with_tos = all_plays.merge(
    turnovers[["game_id", "actionId", "key"]],
    on=["game_id", "actionId"],
    how="left"
).merge(
    turnovers[["game_id", "next_actionId", "key"]],
    on=["game_id", "next_actionId"],
    how="left"
)

with_tos = with_tos.loc[
    ~pd.isnull(with_tos["key_x"])
    | ~pd.isnull(with_tos["key_y"])
]

with_tos = with_tos.loc[
    (with_tos["next_subType"] == "Bad Pass")
    & (~with_tos["actionType"].isin(["Turnover", "Rebound"]))
    & (~with_tos["description"].str.contains("STEAL"))
    & (with_tos["teamTricode"] != with_tos["next_team"])
].sort_values("time_to_turnover")

In [154]:
tos_team_agg = with_tos.assign(
    less_than_2_secs=lambda row: row["time_to_turnover"] <= 1
).groupby(["next_team", "less_than_2_secs"]).agg({
    "key_x": "count"
}).reset_index().pivot_table(
    index="next_team",
    columns="less_than_2_secs",
    values="key_x"
).reset_index().rename(columns={
    False: "False",
    True: "True"
}).assign(
    perc_tos=lambda row: row["True"]/(row["True"]+row["False"])
)

In [252]:
tos_team_agg.sort_values("perc_tos", ascending=False).rename(columns={
    "next_team": "Team",
    "False": ">1 Second",
    "True": "<=1 Second",
    "perc_tos": "Rate"
}).assign(
    Rate=lambda row: round(row["Rate"]*100,2).astype(str) + "%"
)

less_than_2_secs,Team,>1 Second,<=1 Second,Rate
2,BOS,289.0,9.0,3.02%
19,NYK,311.0,9.0,2.81%
16,MIL,357.0,9.0,2.46%
18,NOP,385.0,9.0,2.28%
17,MIN,394.0,9.0,2.23%
13,LAL,427.0,9.0,2.06%
29,WAS,433.0,9.0,2.04%
8,DET,445.0,9.0,1.98%
26,SAS,451.0,9.0,1.96%
9,GSW,402.0,8.0,1.95%


# Viz

### Bad Pass Turnovers

#### % TOs < 2 Seconds By Team

In [247]:
overall = tos_team_agg["True"].sum()/(tos_team_agg["True"].sum()+tos_team_agg["False"].sum())
fig = go.Figure()
fig.add_trace(
    go.Bar(
        x=tos_team_agg["next_team"],
        y=tos_team_agg["perc_tos"],
        text=tos_team_agg["perc_tos"],
        marker=dict(color=np.where(tos_team_agg["next_team"]=='BOS', 'green', 'lightblue')),
        texttemplate="%{y:0.2p}",
        textposition="outside",
        textfont=dict(size=14)
    )
)
fig.add_annotation(
    xref="paper",
    x=0.8,
    y=overall+.001,
    text=f"League Rate: {round(overall*100,2)}%",
    font=dict(size=10),
    showarrow=False
)
fig.add_hline(
    y=overall,
    line=dict(dash="dot"), 
    opacity=0.5
)
fig.update_layout(
    xaxis=dict(categoryorder='total descending', tickfont=dict(color="gray", size=10)),
    yaxis=dict(visible=False),
    paper_bgcolor="#FFFAF0",
    plot_bgcolor="#FFFAF0",
#     color=dict(opacity=0.5),
    title=dict(
        text="""
            % of <b>Bad Pass</b> Turnovers Within 1 Second of Inbound<br><sup>Celtics turn it over on inbounds plays at twice the league rate</sup>
            """,
        font=dict(size=16)
    )
)
fig.show()

#### Distribution

In [246]:
fig = go.Figure()
fig.add_trace(
    go.Histogram(
        x=with_tos["time_to_turnover"],
        histnorm="probability",
        hovertemplate="%{y:.2%}",
        marker=dict(opacity=0.8)
    )
)
fig.update_layout(
    plot_bgcolor="#FFFAF0",
    paper_bgcolor="#FFFAF0",
    title=dict(
        text="Distribution of <b>Bad Pass</b> Turnovers Within <em>N</em> Seconds of Inbound",
        font=dict(size=16)
    ),
    xaxis=dict(dtick=1, title="Seconds From Deadball"),
    yaxis=dict(tickformat=".0%"),
    bargap=0.02,
)
fig.show()

#### Distribution per Team

In [204]:
to_teams = with_tos["next_team"].unique()

In [217]:
from plotly.subplots import make_subplots

In [253]:
fig = make_subplots(
    rows=5,
    cols=6
)
for i, t in enumerate(to_teams):
    team_num = i+1
    col = team_num//5 if team_num % 5 == 0 else (team_num//5)+1
    row = 5 if team_num % 5 == 0 else team_num % 5
    df = with_tos.loc[with_tos["next_team"] == t]
    fig.append_trace(
        go.Violin(
            x=df["next_team"],
            y=df["time_to_turnover"],
        ),
        row,
        col
    )
fig.update_layout(
    showlegend=False,
    plot_bgcolor="#FFFAF0",
    paper_bgcolor="#FFFAF0",
)
fig.show()
#
#     fig.show()