In [1]:
import pandas as pd
import polars
from nba_api.stats import endpoints as nba_endpoints
from nba_api.live.nba import endpoints as nba_live_endpoints
import plotly.graph_objects as go
import multiprocessing as mp
from tqdm.notebook import tqdm
import asyncio
import nest_asyncio
from parallel_pandas import ParallelPandas
nest_asyncio.apply()

In [2]:
pd.set_option("display.max_rows", 100)

# GW or Tying Shots Analysis

In [3]:
playerindex = nba_endpoints.playerindex.PlayerIndex()
data = playerindex.data_sets[0].get_dict().get("PlayerIndex").get("data")
headers = playerindex.data_sets[0].get_dict().get("PlayerIndex").get("headers")
players = pd.DataFrame(
    data, columns=headers
)

In [44]:
nba_endpoints.playercareerstats.PlayerCareerStats("1628369").get_data_frames()[0].transpose()

Unnamed: 0,0,1,2,3,4,5,6
PLAYER_ID,1628369,1628369,1628369,1628369,1628369,1628369,1628369
SEASON_ID,2017-18,2018-19,2019-20,2020-21,2021-22,2022-23,2023-24
LEAGUE_ID,00,00,00,00,00,00,00
TEAM_ID,1610612738,1610612738,1610612738,1610612738,1610612738,1610612738,1610612738
TEAM_ABBREVIATION,BOS,BOS,BOS,BOS,BOS,BOS,BOS
PLAYER_AGE,20.0,21.0,22.0,23.0,24.0,25.0,25.0
GP,80,79,66,64,76,74,38
GS,80,79,66,64,76,74,38
MIN,2443.0,2455.0,2265.0,2290.0,2731.0,2732.0,1360.0
FGM,397,466,552,605,708,727,348


In [8]:
start_year = 2018

In [9]:
seasons = [
    f"{x}-{(x+1)-2000}" for x in range(int(start_year),2024)
]
print(seasons)
# ['2017-18', '2018-19', '2019-20', '2020-21', '2021-22', '2022-23', '2023-24']

['2018-19', '2019-20', '2020-21', '2021-22', '2022-23', '2023-24']


In [10]:
game = nba_live_endpoints.playbyplay

In [11]:
box = nba_endpoints.boxscoresummaryv2.BoxScoreSummaryV2("0022300421").get_data_frames()[0][
    ["GAME_ID", "HOME_TEAM_ID", "VISITOR_TEAM_ID"]
]

In [12]:
players = pd.DataFrame()
for s in seasons:
    data = playerindex.data_sets[0].get_dict().get("PlayerIndex").get("data")
    headers = playerindex.data_sets[0].get_dict().get("PlayerIndex").get("headers")
    players = pd.concat([players,
        pd.DataFrame(
            data, columns=headers
        ).assign(season=s)[
            ["PERSON_ID", "PLAYER_FIRST_NAME", "PLAYER_LAST_NAME", "TEAM_ID", "TEAM_ABBREVIATION", "season"]
        ]
    ], axis=0)

In [13]:
teamlogs = nba_endpoints.teamgamelogs

game_logs = pd.DataFrame()
for s in seasons:
    game_log = teamlogs.TeamGameLogs(
        season_nullable=s
    ).get_data_frames()[0][["GAME_ID", "MATCHUP"]].assign(
        first_team=lambda row: row["MATCHUP"].str.extract("(^[A-Z]+)"),
        second_team=lambda row: row["MATCHUP"].str.extract("([A-Z]+$)"),
        home_away=lambda row: row["MATCHUP"].str.extract("(@)"),
        home=lambda row: np.where(row["home_away"] == "@", row["second_team"], row["first_team"]),
        away=lambda row: np.where(row["home_away"] == "@", row["first_team"], row["second_team"]),
        season=s
    ).drop(["first_team", "second_team", "home_away"], axis=1)

    game_logs = pd.concat([game_logs, game_log], axis=0)

In [14]:
teams = teamlogs.TeamGameLogs(season_nullable=s).get_data_frames()[0]["TEAM_ID"].unique()

team_info = pd.DataFrame()
for t in teams:
    info = nba_endpoints.teaminfocommon.TeamInfoCommon(team_id=t).get_data_frames()[0][["TEAM_ID", "TEAM_ABBREVIATION"]]
    team_info = pd.concat([team_info, info], axis=0)

In [15]:
game_logs = game_logs.merge(
    team_info,
    left_on="home",
    right_on="TEAM_ABBREVIATION"
).rename(
    columns={"TEAM_ID": "HOME_ID", "TEAM_ABBREVIATION": "HOME_ABBREVIATION"}
).merge(
    team_info,
    left_on="away",
    right_on="TEAM_ABBREVIATION"
).rename(
    columns={"TEAM_ID": "AWAY_ID", "TEAM_ABBREVIATION": "AWAY_ABBREVIATION"}
).drop(["MATCHUP", "home", "away"], axis=1).drop_duplicates().sort_values("GAME_ID")

In [16]:
game_df = teamlogs.TeamGameLogs(season_nullable="2023-24").get_data_frames()[0]

In [17]:
game_df.head().transpose()

Unnamed: 0,0,1,2,3,4
SEASON_YEAR,2023-24,2023-24,2023-24,2023-24,2023-24
TEAM_ID,1610612741,1610612758,1610612766,1610612765,1610612738
TEAM_ABBREVIATION,CHI,SAC,CHA,DET,BOS
TEAM_NAME,Chicago Bulls,Sacramento Kings,Charlotte Hornets,Detroit Pistons,Boston Celtics
GAME_ID,0022300523,0022300518,0022300518,0022300519,0022300517
GAME_DATE,2024-01-10T00:00:00,2024-01-10T00:00:00,2024-01-10T00:00:00,2024-01-10T00:00:00,2024-01-10T00:00:00
MATCHUP,CHI vs. HOU,SAC @ CHA,CHA vs. SAC,DET vs. SAS,BOS vs. MIN
WL,W,W,L,L,W
MIN,53.0,48.0,48.0,48.0,53.0
FGM,42,46,35,44,39


## Get Data

In [3]:
ParallelPandas.initialize(n_cpu=8, split_factor=4, disable_pr_bar=True)

In [5]:
def get_final_shots_pbp(g):

    df = pd.DataFrame(game.PlayByPlay(game_id=g[0]).get_dict().get("game").get("actions")).assign(
        clock_minutes=lambda row: row["clock"].str.extract(
            "PT(\d{2})").astype(int),
        clock_seconds=lambda row: row["clock"].str.extract(
            "M(\d{2}.\d{2})S").astype(float),
        game_id=g[0],
        home_id=g[2],
        away_id=g[4],
        home_team=g[3],
        away_team=g[5],
        season=g[1]
    )

    dff = df.loc[
        (df["period"].isin([4, 5])) &
        (df["personId"] != 0),
        [
            "season", "game_id", "home_id", "away_id", "home_team", "away_team",
            "personId", "actionNumber", "period", "clock", "clock_minutes", "clock_seconds",
            "actionType", "subType", "shotResult", "scoreHome", "scoreAway",
            "x", "y", "shotDistance"
        ]
    ]
    
    player_team = nba_endpoints.boxscorescoringv2.BoxScoreScoringV2(g[0]).get_data_frames()[0][
        ["GAME_ID", "TEAM_ABBREVIATION", "PLAYER_ID", "PLAYER_NAME"]
    ]
    
    return dff.merge(
        player_team, left_on=["game_id", "personId"], right_on=["GAME_ID", "PLAYER_ID"]
    ).drop(["GAME_ID", "PLAYER_ID"], axis=1)
    
def get_final_shots_task(game):
    try:
        return get_final_shots_pbp(game)
    except Exception as e:
        print(e)
        return []

In [29]:
results = pd.DataFrame()
for g in tqdm(game_logs.values.tolist()[::-1]):
    shots = get_final_shots_task(g)
    if not isinstance(shots, list):
        if shots.shape[0] > 0:
            results = pd.concat([results, shots])
        else:
            pass
    else:
        pass

  0%|          | 0/6385 [00:00<?, ?it/s]

Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (

Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (

Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (

Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (

Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (

Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (

Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (

Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)
Expecting value: line 1 column 1 (char 0)


In [35]:
results.columns = [
    "season", "game_id", "home_id", "away_id", "home_team", "away_team",
    "personId", "actionNumber", "period", "clock", "clock_minutes", "clock_seconds",
    "actionType", "subType", "shotResult", "scoreHome", "scoreAway",
    "x", "y", "shotDistance", "player_team", "player_name"
]

In [37]:
results

Unnamed: 0,season,game_id,home_id,away_id,home_team,away_team,personId,actionNumber,period,clock,clock_minutes,clock_seconds,actionType,subType,shotResult,scoreHome,scoreAway,x,y,shotDistance,player_team,player_name
0,2023-24,0022301230,1610612747,1610612740,LAL,NOP,1629629,504,4,PT12M00.00S,12,0.0,substitution,out,,110,71,,,,LAL,Cam Reddish
1,2023-24,0022301230,1610612747,1610612740,LAL,NOP,1629627,505,4,PT12M00.00S,12,0.0,substitution,out,,110,71,,,,NOP,Zion Williamson
2,2023-24,0022301230,1610612747,1610612740,LAL,NOP,1630230,506,4,PT12M00.00S,12,0.0,substitution,out,,110,71,,,,NOP,Naji Marshall
3,2023-24,0022301230,1610612747,1610612740,LAL,NOP,1627742,507,4,PT12M00.00S,12,0.0,substitution,out,,110,71,,,,NOP,Brandon Ingram
4,2023-24,0022301230,1610612747,1610612740,LAL,NOP,1631108,508,4,PT12M00.00S,12,0.0,substitution,in,,110,71,,,,LAL,Max Christie
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187,2019-20,0021900001,1610612761,1610612740,TOR,NOP,1628366,776,5,PT02M06.00S,2,6.0,foul,personal,,122,122,,,,NOP,Lonzo Ball
188,2019-20,0021900001,1610612761,1610612740,TOR,NOP,1628366,785,5,PT01M27.00S,1,27.0,substitution,out,,127,122,,,,NOP,Lonzo Ball
189,2019-20,0021900001,1610612761,1610612740,TOR,NOP,202324,766,5,PT03M08.00S,3,8.0,substitution,in,,119,120,,,,NOP,Derrick Favors
190,2019-20,0021900001,1610612761,1610612740,TOR,NOP,202324,772,5,PT02M37.00S,2,37.0,foul,personal,,121,120,,,,NOP,Derrick Favors


In [38]:
results[[
            "season", "game_id", "home_id", "away_id", "home_team", "away_team",
            "personId", "player_name", "player_team", "actionNumber", "period", 
            "clock", "clock_minutes", "clock_seconds", "actionType", "subType", 
            "shotResult", "scoreHome", "scoreAway", "x", "y", "shotDistance"
        ]].to_parquet(path="4Q_OT_Shots.parquet", index=False)

In [40]:
del results

In [4]:
shot_df = pd.read_parquet("4Q_OT_Shots.parquet")

In [5]:
shot_df.assign(
    shotResult_int=lambda row: np.where(row["shotResult"]=="Made", 1, 0)
).groupby("actionType").agg({
    "game_id": "count",
    "shotResult_int": "sum"
})

Unnamed: 0_level_0,game_id,shotResult_int
actionType,Unnamed: 1_level_1,Unnamed: 2_level_1
2pt,125337,65666
3pt,84222,28821
block,11530,0
ejection,142,0
foul,56382,0
freethrow,64604,49099
jumpball,1372,0
rebound,107347,0
steal,17001,0
substitution,152620,0


In [6]:
shot_df["home_player"] = (shot_df["home_team"] == shot_df["player_team"]).astype(int)
shot_df["away_player"] = (shot_df["away_team"] == shot_df["player_team"]).astype(int)
shot_df["home_lead"] = (shot_df["scoreHome"] > shot_df["scoreAway"]).astype(int)
shot_df["away_lead"] = (shot_df["scoreHome"] < shot_df["scoreAway"]).astype(int)
shot_df["tied"] = (shot_df["scoreHome"] == shot_df["scoreAway"]).astype(int)
shot_df["shotResult_int"] = np.where(shot_df["shotResult"]=="Made", 1, 0)

In [7]:
shot_df.loc[shot_df["actionType"] != "substitution"]

Unnamed: 0,season,game_id,home_id,away_id,home_team,away_team,personId,player_name,player_team,actionNumber,...,scoreAway,x,y,shotDistance,home_player,away_player,home_lead,away_lead,tied,shotResult_int
5,2023-24,0022301230,1610612747,1610612740,LAL,NOP,1631108,Max Christie,LAL,545,...,79,33.722076,59.803922,26.90,1,0,0,1,0,0
6,2023-24,0022301230,1610612747,1610612740,LAL,NOP,1631108,Max Christie,LAL,560,...,81,,,,1,0,0,1,0,0
7,2023-24,0022301230,1610612747,1610612740,LAL,NOP,1631108,Max Christie,LAL,565,...,81,,,,1,0,0,1,0,0
8,2023-24,0022301230,1610612747,1610612740,LAL,NOP,1631108,Max Christie,LAL,567,...,81,,,,1,0,0,1,0,0
9,2023-24,0022301230,1610612747,1610612740,LAL,NOP,1631108,Max Christie,LAL,578,...,81,,,,1,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
653763,2019-20,0021900001,1610612761,1610612740,TOR,NOP,200755,JJ Redick,NOP,804,...,122,,,,0,1,1,0,0,0
653765,2019-20,0021900001,1610612761,1610612740,TOR,NOP,1628366,Lonzo Ball,NOP,759,...,120,91.672142,8.157169,21.15,0,1,0,1,0,0
653766,2019-20,0021900001,1610612761,1610612740,TOR,NOP,1628366,Lonzo Ball,NOP,762,...,120,,,,0,1,0,1,0,0
653767,2019-20,0021900001,1610612761,1610612740,TOR,NOP,1628366,Lonzo Ball,NOP,776,...,122,,,,0,1,0,0,1,0


In [39]:
len(shot_df.loc[
    (shot_df["actionType"].isin(["2pt", "3pt"]))
    & (shot_df["clock_minutes"] == 0)
    & (shot_df["clock_seconds"] <= 30)
    & (np.abs(shot_df["scoreHome"].astype(int) - shot_df["scoreAway"].astype(int)) <= 3)
#     & (shot_df["player_name"] == "Derrick White")
, "game_id"
].unique())

1323

## Analysis

### Jump Shots

#### Shots with <= 30 seconds left in 4th or OT, one possession game

In [59]:
shot_df.loc[
    (shot_df["actionType"].isin(["2pt", "3pt"]))
    & (shot_df["clock_minutes"] == 0)
    & (shot_df["clock_seconds"] <= 30)
    & (np.abs(shot_df["scoreHome"].astype(int) - shot_df["scoreAway"].astype(int)) <= 3)
#     & (shot_df["player_name"] == "Derrick White")
].groupby(["personId", "player_name", "actionType"]).agg({
    "shotResult_int": "sum",
    "game_id": "count",
    "shotDistance": "mean"
}).reset_index().pivot_table(
    index=["player_name"],
    columns=["actionType"],
    values=["shotDistance", "shotResult_int", "game_id"]
).assign(
    fgm=lambda row: (row[("shotResult_int", "2pt")] + row[("shotResult_int", "3pt")]),
    fga=lambda row: (row[("game_id", "2pt")] + row[("game_id", "3pt")]),
    avg_shotDistance=lambda row: ((row[("shotDistance", "2pt")]*row[("game_id", "2pt")])+(row[("shotDistance", "3pt")]*row[("game_id", "3pt")]))/row[("fga", "")],
    fg_perc=lambda row: row[("fgm", "")]/row[("fga", "")],
    efg_perc=lambda row: (row[("fgm", "")] + (0.5 * row[("shotResult_int", "3pt")]))/row[("fga", "")],
).sort_values(
    ("fga",""), ascending=False
).head(50).sort_values(("efg_perc",""), ascending=False).reset_index()

Unnamed: 0_level_0,player_name,game_id,game_id,shotDistance,shotDistance,shotResult_int,shotResult_int,fgm,fga,avg_shotDistance,fg_perc,efg_perc
actionType,Unnamed: 1_level_1,2pt,3pt,2pt,3pt,2pt,3pt,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,Kyle Kuzma,10.0,17.0,4.923,28.006471,7.0,8.0,15.0,27.0,19.457037,0.555556,0.703704
1,Paul George,5.0,19.0,10.296,25.922105,3.0,9.0,12.0,24.0,22.666667,0.5,0.6875
2,Zach LaVine,26.0,15.0,6.687692,27.985333,13.0,9.0,22.0,41.0,14.479512,0.536585,0.646341
3,Devonte' Graham,5.0,21.0,4.868,29.865714,1.0,10.0,11.0,26.0,25.058462,0.423077,0.615385
4,Buddy Hield,11.0,13.0,6.936364,28.418462,7.0,5.0,12.0,24.0,18.5725,0.5,0.604167
5,Damian Lillard,14.0,20.0,9.005714,31.0155,7.0,9.0,16.0,34.0,21.952647,0.470588,0.602941
6,D'Angelo Russell,9.0,16.0,6.186667,26.694375,6.0,6.0,12.0,25.0,19.3116,0.48,0.6
7,Kyrie Irving,5.0,16.0,7.244,27.731875,2.0,7.0,9.0,21.0,22.85381,0.428571,0.595238
8,Nikola Jokic,33.0,11.0,7.357879,29.639091,20.0,4.0,24.0,44.0,12.928182,0.545455,0.590909
9,Shai Gilgeous-Alexander,31.0,17.0,6.723226,27.593529,19.0,6.0,25.0,48.0,14.114792,0.520833,0.583333


#### Shots with <= 60 seconds left in 4th or OT, one possession game

In [70]:
shot_df.loc[
    (shot_df["actionType"].isin(["2pt", "3pt"]))
    & (shot_df["clock_minutes"] == 0)
    & (shot_df["clock_seconds"] <= 60)
    & (np.abs(shot_df["scoreHome"].astype(int) - shot_df["scoreAway"].astype(int)) <= 3)
#     & (shot_df["player_name"] == "Derrick White")
].groupby(["personId", "player_name", "actionType"]).agg({
    "shotResult_int": "sum",
    "game_id": "count",
    "shotDistance": "mean"
}).reset_index().pivot_table(
    index=["player_name"],
    columns=["actionType"],
    values=["shotDistance", "shotResult_int", "game_id"]
).assign(
    fgm=lambda row: (row[("shotResult_int", "2pt")] + row[("shotResult_int", "3pt")]),
    fga=lambda row: (row[("game_id", "2pt")] + row[("game_id", "3pt")]),
    avg_shotDistance=lambda row: ((row[("shotDistance", "2pt")]*row[("game_id", "2pt")])+(row[("shotDistance", "3pt")]*row[("game_id", "3pt")]))/row[("fga", "")],
    fg_perc=lambda row: row[("fgm", "")]/row[("fga", "")],
    efg_perc=lambda row: (row[("fgm", "")] + (0.5 * row[("shotResult_int", "3pt")]))/row[("fga", "")],
).sort_values(
    ("fga",""), 
    ascending=False
).head(50).sort_values(("efg_perc",""), ascending=False).reset_index()

Unnamed: 0_level_0,player_name,game_id,game_id,shotDistance,shotDistance,shotResult_int,shotResult_int,fgm,fga,avg_shotDistance,fg_perc,efg_perc
actionType,Unnamed: 1_level_1,2pt,3pt,2pt,3pt,2pt,3pt,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,Paul George,19.0,27.0,5.875263,25.745556,12.0,13.0,25.0,46.0,17.538261,0.543478,0.684783
1,Jaylen Brown,27.0,15.0,8.395185,26.914667,14.0,8.0,22.0,42.0,15.009286,0.52381,0.619048
2,Devonte' Graham,8.0,25.0,6.54375,29.5452,2.0,12.0,14.0,33.0,23.969091,0.424242,0.606061
3,Kyle Kuzma,19.0,27.0,5.619474,27.711852,12.0,10.0,22.0,46.0,18.586739,0.478261,0.586957
4,Dejounte Murray,33.0,11.0,8.738485,27.073636,15.0,7.0,22.0,44.0,13.322273,0.5,0.579545
5,Damian Lillard,33.0,32.0,5.875758,29.979063,18.0,13.0,31.0,65.0,17.742,0.476923,0.576923
6,Stephen Curry,31.0,35.0,6.037419,27.202857,20.0,12.0,32.0,66.0,17.261515,0.484848,0.575758
7,Giannis Antetokounmpo,31.0,3.0,5.143226,25.773333,18.0,1.0,19.0,34.0,6.963529,0.558824,0.573529
8,D'Angelo Russell,13.0,22.0,8.319231,26.457273,8.0,8.0,16.0,35.0,19.720286,0.457143,0.571429
9,Shai Gilgeous-Alexander,40.0,21.0,6.37925,27.780952,25.0,6.0,31.0,61.0,13.747049,0.508197,0.557377


#### Shots with <= 30 seconds left in 4th or OT, one possession lead, leading team player has ball

In [61]:
shot_df.loc[
    (shot_df["actionType"].isin(["2pt", "3pt"]))
    & (shot_df["clock_minutes"] == 0)
    & (shot_df["clock_seconds"] <= 30)
    & (((shot_df["home_lead"] == 1) & (shot_df["home_player"] == 1)) | ((shot_df["away_lead"] == 1) & (shot_df["away_player"] == 1)))
    & (np.abs(shot_df["scoreHome"].astype(int) - shot_df["scoreAway"].astype(int)) <= 3)
].groupby(["personId", "player_name", "actionType"]).agg({
    "shotResult_int": "sum",
    "game_id": "count",
    "shotDistance": "mean"
}).reset_index().pivot_table(
    index=["player_name"],
    columns=["actionType"],
    values=["shotDistance", "shotResult_int", "game_id"]
).assign(
    fgm=lambda row: (row[("shotResult_int", "2pt")] + row[("shotResult_int", "3pt")]),
    fga=lambda row: (row[("game_id", "2pt")] + row[("game_id", "3pt")]),
    avg_shotDistance=lambda row: ((row[("shotDistance", "2pt")]*row[("game_id", "2pt")])+(row[("shotDistance", "3pt")]*row[("game_id", "3pt")]))/row[("fga", "")],
    fg_perc=lambda row: row[("fgm", "")]/row[("fga", "")],
    efg_perc=lambda row: (row[("fgm", "")] + (0.5 * row[("shotResult_int", "3pt")]))/row[("fga", "")],
).sort_values(("fga",""), ascending=False).head(50).sort_values(("efg_perc",""), ascending=False)

Unnamed: 0_level_0,game_id,game_id,shotDistance,shotDistance,shotResult_int,shotResult_int,fgm,fga,avg_shotDistance,fg_perc,efg_perc
actionType,2pt,3pt,2pt,3pt,2pt,3pt,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
player_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
D'Angelo Russell,3.0,3.0,4.956667,25.42,2.0,3.0,5.0,6.0,15.188333,0.833333,1.083333
Karl-Anthony Towns,2.0,4.0,6.205,26.4325,2.0,3.0,5.0,6.0,19.69,0.833333,1.083333
Kyle Kuzma,3.0,4.0,8.31,25.23,1.0,4.0,5.0,7.0,17.978571,0.714286,1.0
Shai Gilgeous-Alexander,6.0,5.0,8.965,27.702,6.0,3.0,9.0,11.0,17.481818,0.818182,0.954545
Luka Doncic,3.0,7.0,7.083333,28.834286,2.0,5.0,7.0,10.0,22.309,0.7,0.95
Russell Westbrook,6.0,3.0,6.228333,23.02,5.0,2.0,7.0,9.0,11.825556,0.777778,0.888889
Jrue Holiday,5.0,2.0,12.028,25.62,3.0,2.0,5.0,7.0,15.911429,0.714286,0.857143
Chris Paul,4.0,1.0,11.1,27.07,4.0,0.0,4.0,5.0,14.294,0.8,0.8
Malik Monk,3.0,2.0,15.503333,26.975,1.0,2.0,3.0,5.0,20.092,0.6,0.8
Bradley Beal,6.0,1.0,11.16,24.68,4.0,1.0,5.0,7.0,13.091429,0.714286,0.785714


#### Shots with <= 60 seconds left in 4th or OT, one possession lead, leading team player has ball

In [67]:
shot_df.loc[
    (shot_df["actionType"].isin(["2pt", "3pt"]))
    & (shot_df["clock_minutes"] == 0)
    & (shot_df["clock_seconds"] <= 60)
    & (((shot_df["home_lead"] == 1) & (shot_df["home_player"] == 1)) | ((shot_df["away_lead"] == 1) & (shot_df["away_player"] == 1)))
    & (np.abs(shot_df["scoreHome"].astype(int) - shot_df["scoreAway"].astype(int)) <= 3)
].groupby(["personId", "player_name", "actionType"]).agg({
    "shotResult_int": "sum",
    "game_id": "count",
    "shotDistance": "mean"
}).reset_index().pivot_table(
    index=["player_name"],
    columns=["actionType"],
    values=["shotDistance", "shotResult_int", "game_id"]
).assign(
    fgm=lambda row: (row[("shotResult_int", "2pt")] + row[("shotResult_int", "3pt")]),
    fga=lambda row: (row[("game_id", "2pt")] + row[("game_id", "3pt")]),
    avg_shotDistance=lambda row: ((row[("shotDistance", "2pt")]*row[("game_id", "2pt")])+(row[("shotDistance", "3pt")]*row[("game_id", "3pt")]))/row[("fga", "")],
    fg_perc=lambda row: row[("fgm", "")]/row[("fga", "")],
    efg_perc=lambda row: (row[("shotResult_int", "2pt")] + (0.5 + row[("shotResult_int", "3pt")]))/row[("fga", "")],
).sort_values(
    ("fga",""), ascending=False
).head(50).sort_values(("efg_perc",""), ascending=False).reset_index()

Unnamed: 0_level_0,player_name,game_id,game_id,shotDistance,shotDistance,shotResult_int,shotResult_int,fgm,fga,avg_shotDistance,fg_perc,efg_perc
actionType,Unnamed: 1_level_1,2pt,3pt,2pt,3pt,2pt,3pt,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,Lauri Markkanen,7.0,6.0,7.63,26.708333,5.0,4.0,9.0,13.0,16.435385,0.692308,0.730769
1,Shai Gilgeous-Alexander,9.0,7.0,7.735556,27.25,8.0,3.0,11.0,16.0,16.273125,0.6875,0.71875
2,DeMar DeRozan,25.0,3.0,12.6748,24.863333,17.0,2.0,19.0,28.0,13.980714,0.678571,0.696429
3,Bradley Beal,9.0,2.0,10.57,25.48,6.0,1.0,7.0,11.0,13.280909,0.636364,0.681818
4,Tobias Harris,9.0,5.0,7.865556,23.97,4.0,5.0,9.0,14.0,13.617143,0.642857,0.678571
5,Damian Lillard,16.0,7.0,4.31,26.881429,10.0,5.0,15.0,23.0,11.179565,0.652174,0.673913
6,Jamal Murray,10.0,3.0,14.234,25.846667,6.0,2.0,8.0,13.0,16.913846,0.615385,0.653846
7,Harrison Barnes,8.0,4.0,5.3575,25.2875,5.0,2.0,7.0,12.0,12.000833,0.583333,0.625
8,Paul George,7.0,7.0,5.285714,26.111429,5.0,3.0,8.0,14.0,15.698571,0.571429,0.607143
9,Jrue Holiday,10.0,6.0,9.723,26.85,5.0,4.0,9.0,16.0,16.145625,0.5625,0.59375


#### Shots with <= 30 seconds left in 4th or OT, one possession lead, trailing team player has ball

In [62]:
shot_df.loc[
    (shot_df["actionType"].isin(["2pt", "3pt"]))
    & (shot_df["clock_minutes"] == 0)
    & (shot_df["clock_seconds"] <= 30)
    & (((shot_df["home_lead"] == 1) & (shot_df["home_player"] == 0)) | ((shot_df["away_lead"] == 1) & (shot_df["away_player"] == 0)))
    & (np.abs(shot_df["scoreHome"].astype(int) - shot_df["scoreAway"].astype(int)) <= 3)
].groupby(["personId", "player_name", "actionType"]).agg({
    "shotResult_int": "sum",
    "game_id": "count",
    "shotDistance": "mean"
}).reset_index().pivot_table(
    index=["player_name"],
    columns=["actionType"],
    values=["shotDistance", "shotResult_int", "game_id"]
).assign(
    fgm=lambda row: (row[("shotResult_int", "2pt")] + row[("shotResult_int", "3pt")]),
    fga=lambda row: (row[("game_id", "2pt")] + row[("game_id", "3pt")]),
    avg_shotDistance=lambda row: ((row[("shotDistance", "2pt")]*row[("game_id", "2pt")])+(row[("shotDistance", "3pt")]*row[("game_id", "3pt")]))/row[("fga", "")],
    fg_perc=lambda row: row[("fgm", "")]/row[("fga", "")],
    efg_perc=lambda row: (row[("fgm", "")] + (0.5 * row[("shotResult_int", "3pt")]))/row[("fga", "")],
).sort_values(("fga",""), ascending=False).head(50).sort_values(("efg_perc",""), ascending=False)

Unnamed: 0_level_0,game_id,game_id,shotDistance,shotDistance,shotResult_int,shotResult_int,fgm,fga,avg_shotDistance,fg_perc,efg_perc
actionType,2pt,3pt,2pt,3pt,2pt,3pt,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
player_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
Dejounte Murray,5.0,7.0,2.292,28.354286,3.0,5.0,8.0,12.0,17.495,0.666667,0.875
Paul George,2.0,10.0,2.73,25.611,2.0,5.0,7.0,12.0,21.7975,0.583333,0.791667
Stephen Curry,4.0,13.0,2.145,27.643077,3.0,6.0,9.0,17.0,21.643529,0.529412,0.705882
Zach LaVine,16.0,10.0,7.105,28.79,8.0,6.0,14.0,26.0,15.445385,0.538462,0.653846
Devonte' Graham,1.0,11.0,3.06,26.321818,0.0,5.0,5.0,12.0,24.383333,0.416667,0.625
Chris Paul,9.0,3.0,8.044444,26.986667,3.0,3.0,6.0,12.0,12.78,0.5,0.625
Nikola Jokic,16.0,6.0,5.41625,30.266667,10.0,2.0,12.0,22.0,12.193636,0.545455,0.590909
Kyle Kuzma,7.0,12.0,3.471429,29.299167,6.0,3.0,9.0,19.0,19.783684,0.473684,0.552632
Giannis Antetokounmpo,10.0,1.0,5.654,26.49,6.0,0.0,6.0,11.0,7.548182,0.545455,0.545455
Anfernee Simons,7.0,5.0,4.27,28.976,5.0,1.0,6.0,12.0,14.564167,0.5,0.541667


#### Shots with <= 60 seconds left in 4th or OT, one possession lead, trailing team player has ball

In [68]:
shot_df.loc[
    (shot_df["actionType"].isin(["2pt", "3pt"]))
    & (shot_df["clock_minutes"] == 0)
    & (shot_df["clock_seconds"] <= 60)
    & (((shot_df["home_lead"] == 1) & (shot_df["home_player"] == 0)) | ((shot_df["away_lead"] == 1) & (shot_df["away_player"] == 0)))
    & (np.abs(shot_df["scoreHome"].astype(int) - shot_df["scoreAway"].astype(int)) <= 3)
].groupby(["personId", "player_name", "actionType"]).agg({
    "shotResult_int": "sum",
    "game_id": "count",
    "shotDistance": "mean"
}).reset_index().pivot_table(
    index=["player_name"],
    columns=["actionType"],
    values=["shotDistance", "shotResult_int", "game_id"]
).assign(
    fgm=lambda row: (row[("shotResult_int", "2pt")] + row[("shotResult_int", "3pt")]),
    fga=lambda row: (row[("game_id", "2pt")] + row[("game_id", "3pt")]),
    avg_shotDistance=lambda row: ((row[("shotDistance", "2pt")]*row[("game_id", "2pt")])+(row[("shotDistance", "3pt")]*row[("game_id", "3pt")]))/row[("fga", "")],
    fg_perc=lambda row: row[("fgm", "")]/row[("fga", "")],
    efg_perc=lambda row: (row[("shotResult_int", "2pt")] + (0.5 + row[("shotResult_int", "3pt")]))/row[("fga", "")],
).sort_values(
    ("fga",""), ascending=False
).head(50).sort_values(("efg_perc",""), ascending=False).reset_index()

Unnamed: 0_level_0,player_name,game_id,game_id,shotDistance,shotDistance,shotResult_int,shotResult_int,fgm,fga,avg_shotDistance,fg_perc,efg_perc
actionType,Unnamed: 1_level_1,2pt,3pt,2pt,3pt,2pt,3pt,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,Dejounte Murray,13.0,7.0,2.944615,28.354286,8.0,5.0,13.0,20.0,11.838,0.65,0.675
1,Paul George,8.0,15.0,2.48125,25.569333,7.0,7.0,14.0,23.0,17.538696,0.608696,0.630435
2,Anthony Edwards,14.0,2.0,4.871429,24.615,7.0,2.0,9.0,16.0,7.339375,0.5625,0.59375
3,Kyrie Irving,10.0,9.0,7.245,28.651111,6.0,4.0,10.0,19.0,17.384737,0.526316,0.552632
4,Stephen Curry,12.0,18.0,3.439167,27.075,9.0,7.0,16.0,30.0,17.620667,0.533333,0.55
5,Jaylen Brown,16.0,7.0,7.184375,25.99,8.0,4.0,12.0,23.0,12.907826,0.521739,0.543478
6,Franz Wagner,6.0,10.0,2.556667,26.236,5.0,3.0,8.0,16.0,17.35625,0.5,0.53125
7,Joel Embiid,15.0,10.0,7.616667,32.917,6.0,6.0,12.0,25.0,17.7368,0.48,0.5
8,Malcolm Brogdon,10.0,7.0,4.042,30.037143,6.0,2.0,8.0,17.0,14.745882,0.470588,0.5
9,Trae Young,25.0,21.0,6.614,30.744286,16.0,6.0,22.0,46.0,17.63,0.478261,0.48913


#### Shots with <= 30 seconds left in 4th or OT, tie game

In [71]:
shot_df.loc[
    (shot_df["actionType"].isin(["2pt", "3pt"]))
    & (shot_df["clock_minutes"] == 0)
    & (shot_df["clock_seconds"] <= 30)
    & (shot_df["tied"]==1)
].groupby(["personId", "player_name", "actionType"]).agg({
    "shotResult_int": "sum",
    "game_id": "count",
    "shotDistance": "mean"
}).reset_index().pivot_table(
    index=["player_name"],
    columns=["actionType"],
    values=["shotDistance", "shotResult_int", "game_id"]
).assign(
    fgm=lambda row: (row[("shotResult_int", "2pt")] + row[("shotResult_int", "3pt")]),
    fga=lambda row: (row[("game_id", "2pt")] + row[("game_id", "3pt")]),
    avg_shotDistance=lambda row: ((row[("shotDistance", "2pt")]*row[("game_id", "2pt")])+(row[("shotDistance", "3pt")]*row[("game_id", "3pt")]))/row[("fga", "")],
    fg_perc=lambda row: row[("fgm", "")]/row[("fga", "")],
    efg_perc=lambda row: (row[("fgm", "")] + (0.5 * row[("shotResult_int", "3pt")]))/row[("fga", "")],
).sort_values(
    ("fga",""
    ), ascending=False).head(50).sort_values(("efg_perc",""), ascending=False).reset_index()

Unnamed: 0_level_0,player_name,game_id,game_id,shotDistance,shotDistance,shotResult_int,shotResult_int,fgm,fga,avg_shotDistance,fg_perc,efg_perc
actionType,Unnamed: 1_level_1,2pt,3pt,2pt,3pt,2pt,3pt,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,Buddy Hield,3.0,2.0,11.1,26.165,2.0,2.0,4.0,5.0,17.126,0.8,1.0
1,Malik Monk,2.0,3.0,0.97,24.563333,2.0,2.0,4.0,5.0,15.126,0.8,1.0
2,RJ Barrett,3.0,1.0,1.66,25.72,2.0,1.0,3.0,4.0,7.675,0.75,0.875
3,Bogdan Bogdanovic,1.0,3.0,1.97,24.626667,1.0,1.0,2.0,4.0,18.9625,0.5,0.625
4,De'Aaron Fox,7.0,1.0,9.238571,25.31,5.0,0.0,5.0,8.0,11.2475,0.625,0.625
5,Russell Westbrook,2.0,2.0,11.235,26.27,1.0,1.0,2.0,4.0,18.7525,0.5,0.625
6,Reggie Bullock Jr.,1.0,3.0,2.41,22.963333,1.0,1.0,2.0,4.0,17.825,0.5,0.625
7,Jamal Murray,5.0,4.0,15.164,27.7825,1.0,3.0,4.0,9.0,20.772222,0.444444,0.611111
8,Alec Burks,2.0,3.0,9.35,27.953333,0.0,2.0,2.0,5.0,20.512,0.4,0.6
9,Zach LaVine,3.0,3.0,7.063333,26.213333,2.0,1.0,3.0,6.0,16.638333,0.5,0.583333


#### Shots with <= 60 seconds left in 4th or OT, tie game

In [64]:
shot_df.loc[
    (shot_df["actionType"].isin(["2pt", "3pt"]))
    & (shot_df["clock_minutes"] == 0)
    & (shot_df["clock_seconds"] <= 60)
    & (shot_df["tied"]==1)
].groupby(["personId", "player_name", "actionType"]).agg({
    "shotResult_int": "sum",
    "game_id": "count",
    "shotDistance": "mean"
}).reset_index().pivot_table(
    index=["player_name"],
    columns=["actionType"],
    values=["shotDistance", "shotResult_int", "game_id"]
).assign(
    fgm=lambda row: (row[("shotResult_int", "2pt")] + row[("shotResult_int", "3pt")]),
    fga=lambda row: (row[("game_id", "2pt")] + row[("game_id", "3pt")]),
    avg_shotDistance=lambda row: ((row[("shotDistance", "2pt")]*row[("game_id", "2pt")])+(row[("shotDistance", "3pt")]*row[("game_id", "3pt")]))/row[("fga", "")],
    fg_perc=lambda row: row[("fgm", "")]/row[("fga", "")],
    efg_perc=lambda row: (row[("fgm", "")] + (0.5 * row[("shotResult_int", "3pt")]))/row[("fga", "")],
).sort_values(("fga",""), ascending=False).head(50).sort_values(("efg_perc",""), ascending=False)

Unnamed: 0_level_0,game_id,game_id,shotDistance,shotDistance,shotResult_int,shotResult_int,fgm,fga,avg_shotDistance,fg_perc,efg_perc
actionType,2pt,3pt,2pt,3pt,2pt,3pt,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
player_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
Malik Monk,3.0,3.0,0.993333,24.563333,3.0,2.0,5.0,6.0,12.778333,0.833333,1.0
RJ Barrett,4.0,2.0,1.4275,26.595,3.0,2.0,5.0,6.0,9.816667,0.833333,1.0
Luka Doncic,4.0,5.0,14.3275,27.26,3.0,2.0,5.0,9.0,21.512222,0.555556,0.666667
Miles Bridges,1.0,5.0,9.47,35.114,1.0,2.0,3.0,6.0,30.84,0.5,0.666667
Bogdan Bogdanovic,2.0,4.0,2.49,24.735,1.0,2.0,3.0,6.0,17.32,0.5,0.666667
Seth Curry,3.0,3.0,10.24,26.273333,1.0,2.0,3.0,6.0,18.256667,0.5,0.666667
Damian Lillard,4.0,7.0,9.665,30.367143,2.0,3.0,5.0,11.0,22.839091,0.454545,0.590909
Jamal Murray,7.0,5.0,16.511429,28.032,1.0,4.0,5.0,12.0,21.311667,0.416667,0.583333
D'Angelo Russell,4.0,9.0,9.6325,27.101111,3.0,3.0,6.0,13.0,21.726154,0.461538,0.576923
Devonte' Graham,2.0,5.0,5.3,26.886,1.0,2.0,3.0,7.0,20.718571,0.428571,0.571429


### Free Throws

#### FTs with <= 30 seconds left in 4th or OT, one possession game

In [54]:
shot_df.loc[
    (shot_df["actionType"].isin(["freethrow"]))
    & (shot_df["clock_minutes"] == 0)
    & (shot_df["clock_seconds"] <= 30)
    & (np.abs(shot_df["scoreHome"].astype(int) - shot_df["scoreAway"].astype(int)) <= 3)
].groupby(["personId", "player_name"]).agg({
    "shotResult_int": "sum",
    "game_id": "count"
}).reset_index().assign(
    ft_perc=lambda row: row["shotResult_int"]/row["game_id"]
).sort_values("game_id", ascending=False).head(50).sort_values("ft_perc", ascending=False)

Unnamed: 0,personId,player_name,shotResult_int,game_id,ft_perc
175,1628374,Lauri Markkanen,16,17,0.941176
103,203924,Jerami Grant,20,22,0.909091
43,202681,Kyrie Irving,20,23,0.869565
60,203078,Bradley Beal,26,30,0.866667
2,101108,Chris Paul,19,22,0.863636
28,201939,Stephen Curry,19,22,0.863636
8,201142,Kevin Durant,27,32,0.84375
38,202331,Paul George,21,25,0.84
112,203954,Joel Embiid,25,30,0.833333
98,203897,Zach LaVine,29,35,0.828571


#### FTs with <= 60 seconds left in 4th or OT, one possession game

In [17]:
shot_df.loc[
    (shot_df["actionType"].isin(["freethrow"]))
    & (shot_df["clock_minutes"] == 0)
    & (shot_df["clock_seconds"] <= 60)
    & (np.abs(shot_df["scoreHome"].astype(int) - shot_df["scoreAway"].astype(int)) <= 3)
].groupby(["personId", "player_name"]).agg({
    "shotResult_int": "sum",
    "game_id": "count"
}).reset_index().assign(
    fg_perc=lambda row: row["shotResult_int"]/row["game_id"]
).sort_values("game_id", ascending=False).head(50).sort_values("fg_perc", ascending=False)

Unnamed: 0,personId,player_name,shotResult_int,game_id,fg_perc
47,202681,Kyrie Irving,26,29,0.896552
31,201939,Stephen Curry,25,28,0.892857
111,203924,Jerami Grant,23,26,0.884615
2,101108,Chris Paul,22,25,0.88
106,203897,Zach LaVine,38,44,0.863636
9,201142,Kevin Durant,30,35,0.857143
42,202331,Paul George,24,28,0.857143
67,203078,Bradley Beal,36,42,0.857143
221,1628983,Shai Gilgeous-Alexander,46,54,0.851852
68,203081,Damian Lillard,43,51,0.843137


#### FTs with <= 30 seconds left in 4th or OT, one possession lead, leading team player has ball

In [18]:
shot_df.loc[
    (shot_df["actionType"].isin(["freethrow"]))
    & (shot_df["clock_minutes"] == 0)
    & (shot_df["clock_seconds"] <= 30)
    & (((shot_df["home_lead"] == 1) & (shot_df["home_player"] == 1)) | ((shot_df["away_lead"] == 1) & (shot_df["away_player"] == 1)))
    & (np.abs(shot_df["scoreHome"].astype(int) - shot_df["scoreAway"].astype(int)) <= 3)
].groupby(["personId", "player_name"]).agg({
    "shotResult_int": "sum",
    "game_id": "count",
}).reset_index().assign(
    fg_perc=lambda row: row["shotResult_int"]/row["game_id"]
).sort_values("game_id", ascending=False).head(50).sort_values("fg_perc", ascending=False)

Unnamed: 0,personId,player_name,shotResult_int,game_id,fg_perc
248,1630175,Cole Anthony,10,10,1.0
2,101108,Chris Paul,16,17,0.941176
7,201142,Kevin Durant,21,23,0.913043
56,203078,Bradley Beal,19,21,0.904762
57,203081,Damian Lillard,23,26,0.884615
151,1628368,De'Aaron Fox,15,17,0.882353
90,203897,Zach LaVine,12,14,0.857143
94,203924,Jerami Grant,11,13,0.846154
192,1629001,De'Anthony Melton,11,13,0.846154
25,201939,Stephen Curry,16,19,0.842105


#### FTs with <= 60 seconds left in 4th or OT, one possession lead, leading team player has ball

In [19]:
shot_df.loc[
    (shot_df["actionType"].isin(["freethrow"]))
    & (shot_df["clock_minutes"] == 0)
    & (shot_df["clock_seconds"] <= 60)
    & (((shot_df["home_lead"] == 1) & (shot_df["home_player"] == 1)) | ((shot_df["away_lead"] == 1) & (shot_df["away_player"] == 1)))
    & (np.abs(shot_df["scoreHome"].astype(int) - shot_df["scoreAway"].astype(int)) <= 3)
].groupby(["personId", "player_name"]).agg({
    "shotResult_int": "sum",
    "game_id": "count",
}).reset_index().assign(
    fg_perc=lambda row: row["shotResult_int"]/row["game_id"]
).sort_values("game_id", ascending=False).head(50).sort_values("fg_perc", ascending=False)

Unnamed: 0,personId,player_name,shotResult_int,game_id,fg_perc
2,101108,Chris Paul,18,19,0.947368
60,203078,Bradley Beal,26,28,0.928571
8,201142,Kevin Durant,23,25,0.92
96,203897,Zach LaVine,18,20,0.9
61,203081,Damian Lillard,23,26,0.884615
161,1628368,De'Aaron Fox,15,17,0.882353
100,203924,Jerami Grant,12,14,0.857143
27,201939,Stephen Curry,18,21,0.857143
215,1629027,Trae Young,29,34,0.852941
202,1629001,De'Anthony Melton,11,13,0.846154


#### FTs with <= 30 seconds left in 4th or OT, one possession lead, trailing team player has ball

In [20]:
shot_df.loc[
    (shot_df["actionType"].isin(["freethrow"]))
    & (shot_df["clock_minutes"] == 0)
    & (shot_df["clock_seconds"] <= 30)
    & (((shot_df["home_lead"] == 1) & (shot_df["home_player"] == 0)) | ((shot_df["away_lead"] == 1) & (shot_df["away_player"] == 0)))
    & (np.abs(shot_df["scoreHome"].astype(int) - shot_df["scoreAway"].astype(int)) <= 3)
].groupby(["personId", "player_name"]).agg({
    "shotResult_int": "sum",
    "game_id": "count"
}).reset_index().assign(
    fg_perc=lambda row: row["shotResult_int"]/row["game_id"]
).sort_values("game_id", ascending=False).head(50).sort_values("fg_perc", ascending=False)

Unnamed: 0,personId,player_name,shotResult_int,game_id,fg_perc
20,202681,Kyrie Irving,5,5,1.0
59,203924,Jerami Grant,7,7,1.0
128,1629014,Anfernee Simons,5,5,1.0
107,1628374,Lauri Markkanen,10,10,1.0
158,1630169,Tyrese Haliburton,7,7,1.0
124,1628983,Shai Gilgeous-Alexander,15,16,0.9375
19,202331,Paul George,8,9,0.888889
24,202692,Alec Burks,7,8,0.875
86,1627742,Brandon Ingram,6,7,0.857143
145,1629636,Darius Garland,6,7,0.857143


#### FTs with <= 60 seconds left in 4th or OT, one possession lead, trailing team player has ball

In [21]:
shot_df.loc[
    (shot_df["actionType"].isin(["freethrow"]))
    & (shot_df["clock_minutes"] == 0)
    & (shot_df["clock_seconds"] <= 60)
    & (((shot_df["home_lead"] == 1) & (shot_df["home_player"] == 0)) | ((shot_df["away_lead"] == 1) & (shot_df["away_player"] == 0)))
    & (np.abs(shot_df["scoreHome"].astype(int) - shot_df["scoreAway"].astype(int)) <= 3)
].groupby(["personId", "player_name"]).agg({
    "shotResult_int": "sum",
    "game_id": "count"
}).reset_index().assign(
    fg_perc=lambda row: row["shotResult_int"]/row["game_id"]
).sort_values("game_id", ascending=False).head(50).sort_values("fg_perc", ascending=False)

Unnamed: 0,personId,player_name,shotResult_int,game_id,fg_perc
28,202681,Kyrie Irving,10,10,1.0
126,1628374,Lauri Markkanen,12,12,1.0
190,1630169,Tyrese Haliburton,8,8,1.0
148,1628983,Shai Gilgeous-Alexander,20,21,0.952381
169,1629628,RJ Barrett,11,12,0.916667
72,203924,Jerami Grant,9,10,0.9
68,203903,Jordan Clarkson,9,10,0.9
33,202692,Alec Burks,9,10,0.9
101,1627742,Brandon Ingram,9,10,0.9
173,1629636,Darius Garland,9,10,0.9


#### FTs with <= 30 seconds left in 4th or OT, tie game

In [22]:
shot_df.loc[
    (shot_df["actionType"].isin(["freethrow"]))
    & (shot_df["clock_minutes"] == 0)
    & (shot_df["clock_seconds"] <= 30)
    & (shot_df["tied"]==1)
].groupby(["personId", "player_name"]).agg({
    "shotResult_int": "sum",
    "game_id": "count"
}).reset_index().assign(
    fg_perc=lambda row: row["shotResult_int"]/row["game_id"]
).sort_values("game_id", ascending=False).head(50).sort_values("fg_perc", ascending=False)

Unnamed: 0,personId,player_name,shotResult_int,game_id,fg_perc
41,1626164,Devin Booker,2,2,1.0
44,1627734,Domantas Sabonis,3,3,1.0
12,201937,Ricky Rubio,2,2,1.0
85,1629636,Darius Garland,2,2,1.0
77,1629014,Anfernee Simons,2,2,1.0
51,1627759,Jaylen Brown,2,2,1.0
24,203081,Damian Lillard,2,2,1.0
25,203114,Khris Middleton,2,2,1.0
96,1631095,Jabari Smith Jr.,1,1,1.0
4,201566,Russell Westbrook,3,3,1.0


#### FTs with <= 60 seconds left in 4th or OT, tie game

In [23]:
shot_df.loc[
    (shot_df["actionType"].isin(["freethrow"]))
    & (shot_df["clock_minutes"] == 0)
    & (shot_df["clock_seconds"] <= 60)
    & (shot_df["tied"]==1)
].groupby(["personId", "player_name"]).agg({
    "shotResult_int": "sum",
    "game_id": "count"
}).reset_index().assign(
    fg_perc=lambda row: row["shotResult_int"]/row["game_id"]
).sort_values("game_id", ascending=False).head(50).sort_values("fg_perc", ascending=False)

Unnamed: 0,personId,player_name,shotResult_int,game_id,fg_perc
126,1630567,Scottie Barnes,3,3,1.0
30,203081,Damian Lillard,3,3,1.0
55,1626179,Terry Rozier,3,3,1.0
127,1630578,Alperen Sengun,2,2,1.0
67,1627759,Jaylen Brown,2,2,1.0
128,1630595,Cade Cunningham,2,2,1.0
16,202331,Paul George,4,4,1.0
78,1628374,Lauri Markkanen,4,4,1.0
5,201566,Russell Westbrook,3,3,1.0
129,1631094,Paolo Banchero,3,3,1.0


### Last Shot

#### Last shot of the game %

In [65]:
shot_df.loc[
    (shot_df["actionType"].isin(["2pt", "3pt"]))
    & (shot_df["clock_minutes"] == 0)
    & (shot_df["clock_seconds"] <= 30)
    & (shot_df["tied"]==1)
].groupby(["personId", "player_name", "actionType"]).agg({
    "shotResult_int": "sum",
    "game_id": "count",
    "shotDistance": "mean"
}).reset_index().pivot_table(
    index=["player_name"],
    columns=["actionType"],
    values=["shotDistance", "shotResult_int", "game_id"]
).assign(
    fgm=lambda row: (row[("shotResult_int", "2pt")] + row[("shotResult_int", "3pt")]),
    fga=lambda row: (row[("game_id", "2pt")] + row[("game_id", "3pt")]),
    avg_shotDistance=lambda row: ((row[("shotDistance", "2pt")]*row[("game_id", "2pt")])+(row[("shotDistance", "3pt")]*row[("game_id", "3pt")]))/row[("fga", "")],
    fg_perc=lambda row: row[("fgm", "")]/row[("fga", "")],
    efg_perc=lambda row: (row[("fgm", "")] + (0.5 * row[("shotResult_int", "3pt")]))/row[("fga", "")],
).sort_values(("fga",""), ascending=False).head(50)#.sort_values(("efg_perc",""), ascending=False)

Unnamed: 0_level_0,game_id,game_id,shotDistance,shotDistance,shotResult_int,shotResult_int,fgm,fga,avg_shotDistance,fg_perc,efg_perc
actionType,2pt,3pt,2pt,3pt,2pt,3pt,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
player_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
Jayson Tatum,18.0,1.0,14.351111,25.11,7.0,1.0,8.0,19.0,14.917368,0.421053,0.447368
DeMar DeRozan,16.0,2.0,12.45375,26.595,6.0,0.0,6.0,18.0,14.025,0.333333,0.333333
LeBron James,9.0,7.0,4.663333,28.015714,3.0,1.0,4.0,16.0,14.88,0.25,0.28125
Jimmy Butler,7.0,8.0,11.062857,30.755,2.0,2.0,4.0,15.0,21.565333,0.266667,0.333333
D'Angelo Russell,4.0,8.0,9.6325,27.1575,3.0,2.0,5.0,12.0,21.315833,0.416667,0.5
Devin Booker,5.0,7.0,11.406,26.371429,1.0,1.0,2.0,12.0,20.135833,0.166667,0.208333
Joel Embiid,10.0,1.0,17.261,26.47,2.0,1.0,3.0,11.0,18.098182,0.272727,0.318182
Jaylen Brown,4.0,6.0,13.825,28.993333,1.0,2.0,3.0,10.0,22.926,0.3,0.4
James Harden,5.0,4.0,5.934,27.0175,0.0,0.0,0.0,9.0,15.304444,0.0,0.0
Jamal Murray,5.0,4.0,15.164,27.7825,1.0,3.0,4.0,9.0,20.772222,0.444444,0.611111


In [25]:
last_shot_base = shot_df.loc[
    (shot_df["actionType"].isin(["2pt", "3pt"]))
    & (shot_df["clock_minutes"] == 0)
    & (shot_df["clock_seconds"] <= 30)
].sort_values(["game_id", "period", "actionNumber"])

In [26]:
last_shot_base["shot_rev_order"] = last_shot_base.groupby(["game_id", "period"])["actionNumber"].rank(ascending=False)

In [27]:
last_shot_base["scoreHome_prev"] = last_shot_base.groupby(["game_id", "period"])["scoreHome"].shift(1)
last_shot_base["scoreAway_prev"] = last_shot_base.groupby(["game_id", "period"])["scoreAway"].shift(1)

In [28]:
last_shot_base

Unnamed: 0,season,game_id,home_id,away_id,home_team,away_team,personId,player_name,player_team,actionNumber,...,shotDistance,home_player,away_player,home_lead,away_lead,tied,shotResult_int,shot_rev_order,scoreHome_prev,scoreAway_prev
653638,2019-20,0021900001,1610612761,1610612740,TOR,NOP,201950,Jrue Holiday,NOP,727,...,24.88,0,1,0,0,1,0,2.0,,
653721,2019-20,0021900001,1610612761,1610612740,TOR,NOP,1626181,Norman Powell,TOR,734,...,28.37,1,0,0,0,1,0,1.0,117,117
653733,2019-20,0021900001,1610612761,1610612740,TOR,NOP,1627832,Fred VanVleet,TOR,798,...,1.96,1,0,1,0,0,0,2.0,,
653641,2019-20,0021900001,1610612761,1610612740,TOR,NOP,201950,Jrue Holiday,NOP,802,...,27.72,0,1,1,0,0,0,1.0,130,122
653545,2019-20,0021900002,1610612746,1610612747,LAC,LAL,203584,Troy Daniels,LAL,677,...,23.87,0,1,1,0,0,0,1.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
444,2023-24,0022301227,1610612738,1610612752,BOS,NYK,203944,Julius Randle,NYK,646,...,1.82,0,1,1,0,0,1,1.0,,
374,2023-24,0022301228,1610612756,1610612758,PHX,SAC,1631165,Keon Ellis,SAC,663,...,26.76,0,1,0,1,0,0,1.0,,
157,2023-24,0022301229,1610612749,1610612754,MIL,IND,203114,Khris Middleton,MIL,693,...,23.63,1,0,0,1,0,1,2.0,,
158,2023-24,0022301229,1610612749,1610612754,MIL,IND,203114,Khris Middleton,MIL,699,...,2.87,1,0,0,1,0,0,1.0,119,126


In [109]:
last_shot_base.loc[
    (shot_df["actionType"].isin(["2pt", "3pt"]))
    & (last_shot_base["shot_rev_order"] <= 2)
    & (np.abs(last_shot_base["scoreHome"].astype(int) - last_shot_base["scoreAway"].astype(int)) <= 3)
#     & (last_shot_base["tied"]==1)
].assign(
    tied_or_took_lead=lambda row: (((row["scoreHome_prev"] <= row["scoreAway_prev"]) & (row["scoreHome"] >= row["scoreAway"]) & (row["home_player"] == 1)) | (
        (row["scoreAway_prev"] <= row["scoreHome_prev"]) & (row["scoreAway"] >= row["scoreHome"]) & (row["away_player"]==1))),
    tied_or_took_lead_made=lambda row: (((row["scoreHome_prev"] <= row["scoreAway_prev"]) & (row["scoreHome"] >= row["scoreAway"])) | (
        (row["scoreAway_prev"] <= row["scoreHome_prev"]) & (row["scoreAway"] >= row["scoreHome"]))) & (row["shotResult_int"]==1)
).groupby(["personId", "player_name"]).agg({
    "shotResult_int": "sum",
    "game_id": "count",
    "shotDistance": "mean",
    "tied_or_took_lead_made": "sum",
    "tied_or_took_lead": "sum",
}).reset_index().assign(
    fg_perc=lambda row: row["shotResult_int"]/row["game_id"],
    fg_perc_tie_or_take_lead=lambda row: row["tied_or_took_lead_made"]/row["tied_or_took_lead"],
).sort_values(
    "game_id", ascending=False
).head(23).sort_values("fg_perc", ascending=False).reset_index()


Unnamed: 0,index,personId,player_name,shotResult_int,game_id,shotDistance,tied_or_took_lead_made,tied_or_took_lead,fg_perc,fg_perc_tie_or_take_lead
0,186,1628368,De'Aaron Fox,13,25,16.8008,4,6,0.52,0.666667
1,228,1628983,Shai Gilgeous-Alexander,18,35,14.958286,8,9,0.514286,0.888889
2,167,1627759,Jaylen Brown,11,23,16.98,3,8,0.478261,0.375
3,106,203897,Zach LaVine,13,28,15.455,4,5,0.464286,0.8
4,124,203999,Nikola Jokic,15,33,15.236364,6,8,0.454545,0.75
5,157,1627741,Buddy Hield,9,20,17.0005,2,3,0.45,0.666667
6,251,1629029,Luka Doncic,16,38,21.266842,5,9,0.421053,0.555556
7,70,203081,Damian Lillard,9,22,24.503636,4,6,0.409091,0.666667
8,187,1628369,Jayson Tatum,15,38,17.451316,6,14,0.394737,0.428571
9,32,201942,DeMar DeRozan,18,47,14.678723,6,14,0.382979,0.428571


### Redefining "Clutch" Games

In [30]:
clutch_games_base = shot_df.loc[
    (shot_df["period"] == 4)
    & (np.abs(shot_df["scoreHome"].astype(int) - shot_df["scoreAway"].astype(int)) <= 5)
    & (shot_df["clock_minutes"] < 5)
].sort_values(["game_id", "actionNumber"])
clutch_games_base["action_rank_asc"] = clutch_games_base.groupby(["game_id", "period"])["actionNumber"].rank(ascending=True)
# clutch_games_base["action_rank_desc"] = clutch_games_base.groupby(["game_id", "period"])["actionNumber"].rank(ascending=False)
clutch_games_base

Unnamed: 0,season,game_id,home_id,away_id,home_team,away_team,personId,player_name,player_team,actionNumber,...,x,y,shotDistance,home_player,away_player,home_lead,away_lead,tied,shotResult_int,action_rank_asc
653600,2019-20,0021900001,1610612761,1610612740,TOR,NOP,1627783,Pascal Siakam,TOR,649,...,8.492116,47.372855,3.53,1,0,0,1,0,0,1.0
653710,2019-20,0021900001,1610612761,1610612740,TOR,NOP,1626143,Jahlil Okafor,NOP,650,...,,,,0,1,0,1,0,0,2.0
653601,2019-20,0021900001,1610612761,1610612740,TOR,NOP,1627783,Pascal Siakam,TOR,651,...,,,,1,0,0,1,0,0,3.0
653602,2019-20,0021900001,1610612761,1610612740,TOR,NOP,1627783,Pascal Siakam,TOR,652,...,5.000000,50.000000,0.00,1,0,0,1,0,0,4.0
653603,2019-20,0021900001,1610612761,1610612740,TOR,NOP,1627783,Pascal Siakam,TOR,653,...,,,,1,0,0,1,0,0,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
214,2023-24,0022301229,1610612749,1610612754,MIL,IND,201572,Brook Lopez,MIL,672,...,66.310775,66.176471,27.63,1,0,0,1,0,0,41.0
273,2023-24,0022301229,1610612749,1610612754,MIL,IND,1626167,Myles Turner,IND,673,...,,,,0,1,0,1,0,0,42.0
215,2023-24,0022301229,1610612749,1610612754,MIL,IND,201572,Brook Lopez,MIL,674,...,,,,1,0,0,1,0,0,43.0
140,2023-24,0022301229,1610612749,1610612754,MIL,IND,1630169,Tyrese Haliburton,IND,676,...,37.270039,47.303922,29.81,0,1,0,1,0,0,44.0


In [31]:
clutch_games_base_end = shot_df.loc[
    (shot_df["period"] == 4)
].sort_values(["game_id", "actionNumber"])
clutch_games_base_end["action_rank_desc"] = clutch_games_base_end.groupby(["game_id", "period"])["actionNumber"].rank(ascending=False)
clutch_games_base_end

Unnamed: 0,season,game_id,home_id,away_id,home_team,away_team,personId,player_name,player_team,actionNumber,...,x,y,shotDistance,home_player,away_player,home_lead,away_lead,tied,shotResult_int,action_rank_desc
653580,2019-20,0021900001,1610612761,1610612740,TOR,NOP,1628384,OG Anunoby,TOR,546,...,,,,1,0,1,0,0,0,139.0
653586,2019-20,0021900001,1610612761,1610612740,TOR,NOP,1629638,Nickeil Alexander-Walker,NOP,547,...,,,,0,1,1,0,0,0,138.0
653587,2019-20,0021900001,1610612761,1610612740,TOR,NOP,202734,E'Twaun Moore,NOP,548,...,,,,0,1,1,0,0,0,137.0
653588,2019-20,0021900001,1610612761,1610612740,TOR,NOP,1627783,Pascal Siakam,TOR,549,...,,,,1,0,1,0,0,0,136.0
653614,2019-20,0021900001,1610612761,1610612740,TOR,NOP,1628402,Frank Jackson,NOP,550,...,,,,0,1,1,0,0,0,135.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13,2023-24,0022301230,1610612747,1610612740,LAL,NOP,1631108,Max Christie,LAL,648,...,17.821945,52.941176,11.59,1,0,0,1,0,1,5.0
38,2023-24,0022301230,1610612747,1610612740,LAL,NOP,1641722,Jordan Hawkins,NOP,649,...,75.377792,43.627451,18.17,0,1,0,1,0,0,4.0
111,2023-24,0022301230,1610612747,1610612740,LAL,NOP,1641721,Maxwell Lewis,LAL,650,...,,,,1,0,0,1,0,0,3.0
26,2023-24,0022301230,1610612747,1610612740,LAL,NOP,1630700,Dyson Daniels,NOP,652,...,69.333114,80.882353,28.19,0,1,0,1,0,0,2.0


In [32]:
clutch_games = clutch_games_base.loc[
    (clutch_games_base["action_rank_asc"]==1)
].rename(columns={
    "scoreHome": "scoreHome_at5m",
    "scoreAway": "scoreAway_at5m"
}).merge(
    clutch_games_base_end.loc[
        clutch_games_base_end["action_rank_desc"]==1,
        ["game_id", "scoreHome", "scoreAway"]
    ].rename(columns={
        "scoreHome": "scoreHome_at0m",
        "scoreAway": "scoreAway_at0m"
    }),
    on="game_id"
)

In [33]:
clutch_games.assign(
    score_diff_at5m = lambda row: row["scoreHome_at5m"].astype(int) - row["scoreAway_at5m"].astype(int),
    score_diff_at0m = lambda row: row["scoreHome_at0m"].astype(int) - row["scoreAway_at0m"].astype(int)
).pivot_table(
    index="score_diff_at5m",
    columns="score_diff_at0m",
    values="game_id",
    aggfunc="nunique"
).to_clipboard()

In [34]:
fig = go.Figure()
fig.add_trace(
    go.Bar(
        x=back_end_front_end["player_name"],
        y=back_end_front_end["1 of 2 diff from 2 of 2"],
        hovertemplate="<b>%{x}</b><br>"
                      "<b>% Diff</b>: %{y: .1%}"
                      "<extra></extra>",
#         marker_color=["red" if p in ["Andre Drummond", "Mitchell Robinson"] else "blue" for p in ft_data_pivot["player_name"]]
    )
)
fig.update_layout(
    xaxis=dict(
        categoryorder="total ascending",
        tickangle=-45,
        tickfont=dict(size=8)
    ),
    yaxis=dict(tickformat="0%"),
    plot_bgcolor="white",
)
fig.add_hline(
    y=back_front_league_average_diff,
    line=dict(dash="dot", width=1)
)
fig.add_annotation(
    xref="paper",
    x=0.65,
    y=back_front_league_average_diff+(back_front_league_average_diff*.15),
    showarrow=False,
    text=f"<b>Group Average Diff: {round(back_front_league_average_diff*100,1)}%</b>",
    font=dict(size=12)
)

NameError: name 'back_end_front_end' is not defined