In [26]:
import pandas as pd
import polars
from nba_api.stats import endpoints as nba_endpoints
from nba_api.live.nba import endpoints as nba_live_endpoints
import plotly.graph_objects as go
from scipy.stats import shapiro 
from scipy.stats import lognorm
from pprint import pprint

In [30]:
pd.set_option("display.max_rows", 200)
pd.set_option("display.max_columns", 500)

# FT Analysis

## Tatum's first FT of the game %

In [7]:
playerindex = nba_endpoints.playerindex.PlayerIndex()
data = playerindex.data_sets[0].get_dict().get("data")
headers = playerindex.data_sets[0].get_dict().get("headers")
players = pd.DataFrame(
    data, columns=headers
)

In [8]:
players

Unnamed: 0,PERSON_ID,PLAYER_LAST_NAME,PLAYER_FIRST_NAME,PLAYER_SLUG,TEAM_ID,TEAM_SLUG,IS_DEFUNCT,TEAM_CITY,TEAM_NAME,TEAM_ABBREVIATION,...,DRAFT_YEAR,DRAFT_ROUND,DRAFT_NUMBER,ROSTER_STATUS,FROM_YEAR,TO_YEAR,PTS,REB,AST,STATS_TIMEFRAME
0,1630173,Achiuwa,Precious,precious-achiuwa,1610612752,knicks,0,New York,Knicks,NYK,...,2020.0,1.0,20.0,1.0,2020,2024,,,,Season
1,203500,Adams,Steven,steven-adams,1610612745,rockets,0,Houston,Rockets,HOU,...,2013.0,1.0,12.0,1.0,2013,2024,2.7,2.9,1.1,Season
2,1628389,Adebayo,Bam,bam-adebayo,1610612748,heat,0,Miami,Heat,MIA,...,2017.0,1.0,14.0,1.0,2017,2024,16.3,9.5,3.9,Season
3,1630534,Agbaji,Ochai,ochai-agbaji,1610612761,raptors,0,Toronto,Raptors,TOR,...,2022.0,1.0,14.0,1.0,2022,2024,13.0,4.9,2.2,Season
4,1630583,Aldama,Santi,santi-aldama,1610612763,grizzlies,0,Memphis,Grizzlies,MEM,...,2021.0,1.0,30.0,1.0,2021,2024,12.9,7.1,3.2,Season
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
525,203469,Zeller,Cody,cody-zeller,1610612737,hawks,0,Atlanta,Hawks,ATL,...,2013.0,1.0,4.0,1.0,2013,2024,,,,Season
526,1627826,Zubac,Ivica,ivica-zubac,1610612746,clippers,0,LA,Clippers,LAC,...,2016.0,2.0,32.0,1.0,2016,2024,15.3,12.4,2.3,Season
527,1641783,da Silva,Tristan,tristan-da-silva,1610612753,magic,0,Orlando,Magic,ORL,...,2024.0,1.0,18.0,1.0,2024,2024,6.6,3.9,2.1,Season
528,1628427,Čančar,Vlatko,vlatko-čančar,1610612743,nuggets,0,Denver,Nuggets,DEN,...,2017.0,2.0,49.0,1.0,2019,2024,2.3,2.0,0.0,Season


In [9]:
player_first_name = "Jayson"
player_last_name = "Tatum"
player_team_name = "Celtics"
tatum_id = players.loc[
    (players["PLAYER_FIRST_NAME"]==player_first_name) &
    (players["PLAYER_LAST_NAME"]==player_last_name) &
    (players["TEAM_NAME"]==player_team_name), 
    "PERSON_ID"
].values[0]
start_year = players.loc[
    (players["PLAYER_FIRST_NAME"]==player_first_name) &
    (players["PLAYER_LAST_NAME"]==player_last_name) &
    (players["TEAM_NAME"]==player_team_name), 
    "DRAFT_YEAR"
].values[0]

In [10]:
players.loc[
    (players["PLAYER_FIRST_NAME"]==player_first_name) &
    (players["PLAYER_LAST_NAME"]==player_last_name) &
    (players["TEAM_NAME"]==player_team_name), 
]

Unnamed: 0,PERSON_ID,PLAYER_LAST_NAME,PLAYER_FIRST_NAME,PLAYER_SLUG,TEAM_ID,TEAM_SLUG,IS_DEFUNCT,TEAM_CITY,TEAM_NAME,TEAM_ABBREVIATION,...,DRAFT_YEAR,DRAFT_ROUND,DRAFT_NUMBER,ROSTER_STATUS,FROM_YEAR,TO_YEAR,PTS,REB,AST,STATS_TIMEFRAME
446,1628369,Tatum,Jayson,jayson-tatum,1610612738,celtics,0,Boston,Celtics,BOS,...,2017.0,1.0,3.0,1.0,2017,2024,29.9,8.1,5.9,Season


In [11]:
print(tatum_id, start_year)

1628369 2017.0


In [12]:
seasons = [
    f"{x}-{(x+1)-2000}" for x in range(int(2010),2025)
]
print(seasons)
# ['2017-18', '2018-19', '2019-20', '2020-21', '2021-22', '2022-23', '2023-24']

['2010-11', '2011-12', '2012-13', '2013-14', '2014-15', '2015-16', '2016-17', '2017-18', '2018-19', '2019-20', '2020-21', '2021-22', '2022-23', '2023-24', '2024-25']


In [13]:
playerlogs = nba_endpoints.playergamelogs

tatum_games = []
for s in seasons:
    games = playerlogs.PlayerGameLogs(
        season_nullable=s,
        player_id_nullable=tatum_id
    ).get_data_frames()[0]
    tatum_games.extend(games["GAME_ID"].to_list())

print(len(tatum_games))

528


In [14]:
len(tatum_games)

528

In [32]:
game = nba_endpoints.playbyplayv3

In [33]:
pd.DataFrame(game.PlayByPlayV3(game_id=tatum_games[0]).get_dict().get("game").get("actions"))

Unnamed: 0,actionNumber,clock,period,teamId,teamTricode,personId,playerName,playerNameI,xLegacy,yLegacy,shotDistance,shotResult,isFieldGoal,scoreHome,scoreAway,pointsTotal,location,description,actionType,subType,videoAvailable,shotValue,actionId
0,2,PT12M00.00S,1,0,,0,,,0,0,0,,0,0,0,0,,Start of 1st Period (8:16 PM EST),period,start,0,0,1
1,4,PT12M00.00S,1,1610612764,WAS,101162,Gortat,M. Gortat,0,0,0,,0,,,0,h,Jump Ball Gortat vs. Baynes: Tip to Morris,Jump Ball,,1,0,2
2,7,PT11M42.00S,1,1610612764,WAS,203490,Porter Jr.,O. Porter Jr.,-128,145,19,Missed,1,,,0,h,MISS Porter Jr. 19' Step Back Jump Shot,Missed Shot,Step Back Jump shot,1,0,3
3,8,PT11M39.00S,1,1610612738,BOS,1626179,Rozier,T. Rozier,0,0,0,,0,,,0,v,Rozier REBOUND (Off:0 Def:1),Rebound,Unknown,1,0,4
4,10,PT11M17.00S,1,1610612738,BOS,1628369,Tatum,J. Tatum,-192,20,19,Missed,1,,,0,v,MISS Tatum 19' Fadeaway Jumper,Missed Shot,Fadeaway Jump Shot,1,0,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
497,670,PT00M23.80S,4,1610612764,WAS,201196,Sessions,R. Sessions,0,0,0,,0,113,99,212,h,Sessions Free Throw 1 of 1 (3 PTS),Free Throw,Free Throw 1 of 1,0,0,498
498,671,PT00M17.20S,4,1610612738,BOS,1628400,Ojeleye,S. Ojeleye,17,35,4,Missed,1,,,0,v,MISS Ojeleye 4' Driving Layup,Missed Shot,Driving Layup Shot,1,0,499
499,672,PT00M16.90S,4,1610612738,BOS,1628443,Allen,K. Allen,0,0,0,,0,,,0,v,Allen REBOUND (Off:1 Def:2),Rebound,Unknown,1,0,500
500,673,PT00M16.20S,4,1610612738,BOS,1628443,Allen,K. Allen,0,-6,1,Made,1,113,101,214,v,Allen 1' Putback Layup (6 PTS),Made Shot,Putback Layup Shot,1,0,501


In [24]:
game = nba_live_endpoints.playbyplay

In [28]:
df = pd.DataFrame(
    game.PlayByPlay(
        game_id=tatum_games[200]
    ).get_dict().get("game").get("actions")
)

In [29]:
df

Unnamed: 0,actionNumber,clock,timeActual,period,periodType,actionType,subType,qualifiers,personId,x,...,assistTotal,stealPlayerName,stealPersonId,foulPersonalTotal,foulTechnicalTotal,foulDrawnPlayerName,foulDrawnPersonId,blockPlayerName,blockPersonId,value
0,2,PT12M00.00S,2019-12-19T02:40:40.4Z,1,REGULAR,period,start,[],0,,...,,,,,,,,,,
1,4,PT11M57.00S,2019-12-19T02:41:10.6Z,1,REGULAR,jumpball,recovered,[],203939,,...,,,,,,,,,,
2,7,PT11M46.00S,2019-12-19T02:41:22.4Z,1,REGULAR,3pt,jumpshot,[],204001,29.385677,...,,,,,,,,,,
3,8,PT11M43.00S,2019-12-19T02:41:25.4Z,1,REGULAR,rebound,defensive,[],1628400,,...,,,,,,,,,,
4,9,PT11M33.00S,2019-12-19T02:41:34.0Z,1,REGULAR,2pt,jumpshot,[],1628369,79.582786,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
611,698,PT00M16.20S,2019-12-19T05:05:11.2Z,4,REGULAR,freethrow,2of2,[],1627759,,...,,,,,,,,,,
612,699,PT00M10.50S,2019-12-19T05:05:25.2Z,4,REGULAR,3pt,jumpshot,[],203552,74.326544,...,,,,,,,,,,
613,700,PT00M07.60S,2019-12-19T05:05:28.1Z,4,REGULAR,rebound,defensive,[],1628369,,...,,,,,,,,,,
614,701,PT00M00.00S,2019-12-19T05:05:39.7Z,4,REGULAR,period,end,[],0,,...,,,,,,,,,,


In [None]:
pd.set_option("display.max_rows", 200)
pd.set_option("display.max_columns", 100)

In [None]:
df.loc[
    df["actionNumber"].isin(df.loc[
    (df["actionType"]=="foul") &
    (df["subType"]=="technical"),
    "actionNumber"
]+1),]

In [None]:
len(df.columns)

In [None]:
df.head(100)

In [None]:
df[["actionType", "shotResult"]].drop_duplicates().sort_values("actionType")

In [None]:
tatum_fts = pd.DataFrame()
for g in tatum_games:
    try:
        df = pd.DataFrame(game.PlayByPlay(game_id=g).get_dict().get("game").get("actions"))
        dff = df.loc[
            (df["actionType"] == "freethrow") &
            (df["personId"] == tatum_id),
            ["actionNumber", "subType", "shotResult"]
        ].assign(gameid=g)
        tatum_fts = pd.concat([tatum_fts, dff], axis=0)
    except Exception as e:
        print(f"Game ID {g} didn't work")

In [None]:
tatum_fts

In [None]:
tatum_fts["shotResult_int"]= np.where(tatum_fts["shotResult"]=="Made", 1, 0)
tatum_fts["ft_order"] = tatum_fts.groupby("gameid")["actionNumber"].rank()

In [None]:
len(tatum_fts.gameid.unique())

In [None]:
tatum_in_sample_avg = tatum_fts["shotResult_int"].sum()/tatum_fts["shotResult_int"].count()
print(tatum_in_sample_avg)
# 0.8228882833787466

In [None]:
len(tatum_fts["gameid"].unique())

In [None]:
tatum_fts

In [None]:
tatum_ft_order_perc = tatum_fts.loc[
    tatum_fts["ft_order"] <= 10
].groupby("ft_order").agg({
    "shotResult_int": "sum",
    "actionNumber": "count",
}).reset_index().assign(
    ft_perc=lambda row: row["shotResult_int"]/row["actionNumber"],
    in_sample_perc=tatum_in_sample_avg,
    diff_from_overall=lambda row: row["ft_perc"] - tatum_in_sample_avg
)
tatum_ft_order_perc

In [None]:
tatum_ft_order_perc = tatum_fts.groupby("ft_order").agg({
    "shotResult_int": "sum",
    "actionNumber": "count",
}).reset_index().assign(
    ft_perc=lambda row: row["shotResult_int"]/row["actionNumber"],
    in_sample_perc=tatum_in_sample_avg,
    diff_from_overall=lambda row: row["ft_perc"] - tatum_in_sample_avg
)
tatum_ft_order_perc

## Top FT Shooters

In [None]:
nba_endpoints.LeagueLeaders.get_available_data()

## Rest of league first FT of the game %

In [None]:
seasons

In [None]:
teamlogs = nba_endpoints.teamgamelogs

game_ids = []
for s in seasons:
    game_ids.extend(teamlogs.TeamGameLogs(season_nullable=s).get_data_frames()[0]["GAME_ID"])

In [None]:
game_ids = set(game_ids)

In [None]:
game_ids

In [None]:
len(game_ids)

In [None]:
df = pd.DataFrame(
    nba_endpoints.playbyplayv2.PlayByPlayV2(game_id='0021700469').get_dict().get("resultSets")[0].get("rowSet"),
    columns=nba_endpoints.playbyplayv2.PlayByPlayV2(game_id='0021700469').get_dict().get("resultSets")[0].get('headers')
)

In [None]:
nba_endpoints.playbyplay

In [None]:
df

In [None]:
df = pd.DataFrame(
    nba_endpoints.playbyplay.PlayByPlay(game_id='0021700469').get_dict().get("resultSets")[0].get('rowSet'),
    columns=nba_endpoints.playbyplay.PlayByPlay(game_id='0021700469').get_dict().get("resultSets")[0].get('headers')
)

In [None]:
df

In [None]:
all_fts = pd.DataFrame()
for i, g in enumerate(game_ids):
    try:
        df = pd.DataFrame(game.PlayByPlay(game_id=g).get_dict().get("game").get("actions"))
        dff = df.loc[
            (df["actionType"] == "freethrow"),
            ["actionNumber", "period", "subType", "shotResult", "personId"]
        ].assign(gameid=g)
        all_fts = pd.concat([all_fts, dff], axis=0)
        print(f"Game {i+1} out of {len(game_ids)} -> COMPLETE")
    except Exception as e:
        print(f"Game {i+1} out of {len(game_ids)} -> DIDN'T WORK -> {e}")

In [None]:
all_fts["gameid"].nunique()

In [None]:
all_fts["ft_order"] = all_fts.groupby(["gameid", "personId"])["actionNumber"].rank()
all_fts["shotResult_int"]= np.where(all_fts["shotResult"]=="Made", 1, 0)

In [None]:
len(all_fts.gameid.unique())

In [None]:
len(all_fts.personId.unique())

In [None]:
player_ft_perc = all_fts.groupby("personId").agg({
    "shotResult_int": ["sum", "count"]
}).reset_index()
player_ft_perc.columns = ["personID", "makes", "attempts"]
player_ft_perc["overall_ft_perc"] = player_ft_perc["makes"]/player_ft_perc["attempts"]

In [None]:
player_ft_perc = player_ft_perc.merge(
    players[["PERSON_ID", "PLAYER_FIRST_NAME", "PLAYER_LAST_NAME"]],
    left_on="personID",
    right_on="PERSON_ID",
    how="left"
).drop("PERSON_ID", axis=1).sort_values("attempts", ascending=False).reset_index()

In [None]:
player_ft_perc.head(100)

In [None]:
per_player_per_order = all_fts.loc[all_fts["ft_order"] <= 15].groupby(["personId", "ft_order"]).agg({
    "shotResult_int": "sum",
    "actionNumber": "count",
}).reset_index()

all_fts_long = per_player_per_order.merge(
    player_ft_perc[["personID", "overall_ft_perc"]],
    left_on="personId",
    right_on="personID"
).assign(
    ft_perc=lambda row: row["shotResult_int"]/row["actionNumber"],
    diff_from_overall=lambda row: row["ft_perc"] - row["overall_ft_perc"]
).merge(
    players[["PERSON_ID", "PLAYER_FIRST_NAME", "PLAYER_LAST_NAME"]],
    left_on="personId",
    right_on="PERSON_ID",
    how="left"
).drop("PERSON_ID", axis=1)

In [None]:
all_fts_long.sort_values("actionNumber", ascending=False)

In [None]:
ft_data = all_fts_long.loc[
    all_fts_long["personId"].isin(
        all_fts_long.loc[
            (all_fts_long["ft_order"]==1) &
            (all_fts_long["actionNumber"]>=150) &
            (~pd.isnull(all_fts_long["PLAYER_FIRST_NAME"])),
            "personId"
        ]
    )
]

In [None]:
ft_data

In [None]:
league_in_sample_average = ft_data["shotResult_int"].sum()/ft_data["actionNumber"].sum()
print(league_in_sample_average)

In [None]:
len(ft_data["personId"].unique())

In [None]:
ft_data.loc[ft_data["ft_order"]==1, "shotResult_int"].sum()/ft_data.loc[ft_data["ft_order"]==1, "actionNumber"].sum()

In [None]:
for i in range(1,16):
    league_average_diff = (
        ft_data.loc[ft_data["ft_order"]==i, "shotResult_int"].sum()/ft_data.loc[ft_data["ft_order"]==i, "actionNumber"].sum() -
        ft_data.loc[:, "shotResult_int"].sum()/ft_data.loc[:, "actionNumber"].sum()
    )
    print(f"FT #{i} Diff: {round(league_average_diff,4)}; Total FTs: {ft_data.loc[ft_data['ft_order']==i, 'actionNumber'].sum()}")

In [None]:
league_average_diff = (
        ft_data.loc[ft_data["ft_order"]==1, "shotResult_int"].sum()/ft_data.loc[ft_data["ft_order"]==1, "actionNumber"].sum() -
        league_in_sample_average
    )

In [None]:
ft_data_pivot = ft_data.assign(
    player_name=lambda row: row["PLAYER_FIRST_NAME"] + " " + row["PLAYER_LAST_NAME"]
).pivot_table(
    index=["player_name"],
    columns="ft_order",
    values="diff_from_overall"
).reset_index()

In [None]:
fig = go.Figure()
fig.add_trace(
    go.Histogram(
        x=ft_data_pivot[1.0],
        xbins=dict(size=.01),
#         hovertemplate="<b>%{x}</b><br>"
#                       "<b>% Diff</b>: %{y: .1%}"
#                       "<extra></extra>",
#         marker_color=["red" if p in ["Andre Drummond", "Mitchell Robinson"] else "blue" for p in ft_data_pivot["player_name"]]
    )
)
fig.update_layout(
    plot_bgcolor="white",
)
fig.add_vline(
    x=league_average_diff,
    line=dict(dash="dot", width=1)
)
# fig.add_annotation(
#     xref="paper",
#     x=0.65,
#     y=league_average_diff+(league_average_diff*.15),
#     showarrow=False,
#     text=f"<b>Group Average Diff: {round(league_average_diff*100,1)}%</b>",
#     font=dict(size=12)
# )

In [None]:
fig = go.Figure()
fig.add_trace(
    go.Bar(
        x=ft_data_pivot["player_name"],
        y=ft_data_pivot[1.0],
        hovertemplate="<b>%{x}</b><br>"
                      "<b>% Diff</b>: %{y: .1%}"
                      "<extra></extra>",
#         marker_color=["red" if p in ["Andre Drummond", "Mitchell Robinson"] else "blue" for p in ft_data_pivot["player_name"]]
    )
)
fig.update_layout(
    xaxis=dict(
        categoryorder="total ascending",
        tickangle=-45,
        tickfont=dict(size=8)
    ),
    yaxis=dict(tickformat="0%"),
    plot_bgcolor="white",
    title="NBA Player 'First FT' % vs. Player Average"
)
fig.add_hline(
    y=league_average_diff,
    line=dict(dash="dot", width=1)
)
fig.add_annotation(
    xref="paper",
    x=0.65,
    y=league_average_diff+(league_average_diff*.15),
    showarrow=False,
    text=f"<b>NBA Overall Diff: {round(league_average_diff*100,1)}%</b>",
    font=dict(size=12)
)

In [None]:
drummond = ft_data.loc[
    (ft_data["PLAYER_LAST_NAME"] == "Drummond") &
    (ft_data["ft_order"] <= 6)
]
robinson = ft_data.loc[
    (ft_data["PLAYER_LAST_NAME"] == "Robinson") &
    (ft_data["ft_order"] <= 6)
]
fig = go.Figure()
fig.add_trace(
    go.Bar(
        y=robinson["diff_from_overall"],
        x=robinson["ft_order"],
        name="Mitchell Robinson"
    )
)
fig.add_hline(
    y=0,
    line=dict(dash="dot", width=2, color="red")
)
fig.update_layout(
    yaxis=dict(tickformat="0%"),
    plot_bgcolor="white",
    title="Mitchell Robinson FT% by FT Number in Game"
)

In [None]:
ft_data_pivot.sort_values(1.0, ascending=False).head(10)

In [None]:
ft_data[
    ["PLAYER_FIRST_NAME", "PLAYER_LAST_NAME", "overall_ft_perc"]
].drop_duplicates().sort_values("overall_ft_perc").head(10)

In [None]:
ft_data.loc[
    (ft_data["PLAYER_LAST_NAME"].isin(["Drummond", "Robinson", "Gilgeous-Alexander", "Adams"])) &
    (ft_data["ft_order"] == 1)
]

## FT Consistency

In [None]:
ft_data.loc[:, "diff_abs_val"] = ft_data.loc[:, "diff_from_overall"].abs()

In [None]:
ft_consistency = ft_data.merge(
    ft_data.loc[ft_data["ft_order"] <= 5].groupby("personId").agg({
        "diff_abs_val": "mean"
    }).reset_index().rename(columns={"diff_abs_val": "MAE"}),
    on="personId"
)

In [None]:
ft_consistency[
    ["PLAYER_FIRST_NAME","PLAYER_LAST_NAME", "MAE", "overall_ft_perc"]
].drop_duplicates().sort_values("MAE", ascending=True).head(10)

## Front end and back end of pair of FTs

In [None]:
back_end_front_end_fts = all_fts.loc[all_fts["subType"].str.contains("2$", regex=True)].groupby(["personId", "subType"]).agg({
    "shotResult_int": "sum",
    "actionNumber": "count"
}).reset_index().assign(
    subType=lambda row: np.where(row["subType"].str.contains("^1", regex=True), "1 of 2", "2 of 2")
).groupby(["personId", "subType"]).agg({
    "shotResult_int": "sum",
    "actionNumber": "sum"
}).reset_index().assign(
    ft_perc=lambda row: row["shotResult_int"]/row["actionNumber"]
)

In [None]:
back_end_front_end_fts.groupby("subType").agg({
    "shotResult_int": "sum",
    "actionNumber": "sum"
}).reset_index().assign(
    ft_perc=lambda row: row["shotResult_int"]/row["actionNumber"]
)

In [None]:
back_end_front_end_pivot = back_end_front_end_fts.pivot_table(
    index="personId",
    columns="subType",
    values="ft_perc",
    aggfunc="max"
).reset_index()

In [None]:
back_end_front_end = back_end_front_end_pivot.merge(
    all_fts.groupby("personId").agg({
        "shotResult_int": "sum",
        "actionNumber": "count"
    }).reset_index().assign(
        ft_perc=lambda row: row["shotResult_int"]/row["actionNumber"],
    ),
    on="personId"
).rename(columns={
    "shotResult_int": "FTM",
    "actionNumber": "FTA"
}).merge(
    back_end_front_end_fts.groupby("personId").agg({
        "shotResult_int": "sum",
        "actionNumber": "sum"
    }).reset_index().assign(
        ft_perc_when_shooting_two=lambda row: row["shotResult_int"]/row["actionNumber"],
    ),
    on="personId"
).rename(columns={
    "shotResult_int": "FTM_when_shooting_two",
    "actionNumber": "FTA_when_shooting_two"
}).merge(
    players[["PERSON_ID", "PLAYER_FIRST_NAME", "PLAYER_LAST_NAME"]],
    left_on="personId",
    right_on="PERSON_ID",
    how="left"
).drop("PERSON_ID", axis=1).assign(
    player_name=lambda row: row["PLAYER_FIRST_NAME"] + " " + row["PLAYER_LAST_NAME"]
).drop(["PLAYER_FIRST_NAME","PLAYER_LAST_NAME"], axis=1)[[
    "personId", "player_name", "ft_perc", "ft_perc_when_shooting_two", 
    "1 of 2", "2 of 2", "FTM", "FTA", "FTM_when_shooting_two", "FTA_when_shooting_two"
]]

In [None]:
back_end_front_end = back_end_front_end.loc[~pd.isnull(back_end_front_end["player_name"])].sort_values("FTA", ascending=False).head(200)

In [None]:
back_end_front_end["1 of 2 diff from 2 of 2"] = back_end_front_end["1 of 2"] - back_end_front_end["2 of 2"]
back_end_front_end["1 of 2 diff from overall"] = back_end_front_end["1 of 2"] - back_end_front_end["ft_perc_when_shooting_two"]
back_end_front_end["2 of 2 diff from overall"] = back_end_front_end["2 of 2"] - back_end_front_end["ft_perc_when_shooting_two"]

In [None]:
back_end_front_end.sort_values("1 of 2 diff from 2 of 2").head()

In [None]:
back_front_league_average_diff = back_end_front_end_fts.groupby("subType").agg({
    "shotResult_int": "sum",
    "actionNumber": "sum"
}).reset_index().assign(
    ft_perc=lambda row: row["shotResult_int"]/row["actionNumber"]
)["ft_perc"][0] - back_end_front_end_fts.groupby("subType").agg({
    "shotResult_int": "sum",
    "actionNumber": "sum"
}).reset_index().assign(
    ft_perc=lambda row: row["shotResult_int"]/row["actionNumber"]
)["ft_perc"][1]

In [None]:
fig = go.Figure()
fig.add_trace(
    go.Bar(
        x=back_end_front_end["player_name"],
        y=back_end_front_end["1 of 2 diff from 2 of 2"],
        hovertemplate="<b>%{x}</b><br>"
                      "<b>% Diff</b>: %{y: .1%}"
                      "<extra></extra>",
#         marker_color=["red" if p in ["Andre Drummond", "Mitchell Robinson"] else "blue" for p in ft_data_pivot["player_name"]]
    )
)
fig.update_layout(
    xaxis=dict(
        categoryorder="total ascending",
        tickangle=-45,
        tickfont=dict(size=8)
    ),
    yaxis=dict(tickformat="0%"),
    plot_bgcolor="white",
)
fig.add_hline(
    y=back_front_league_average_diff,
    line=dict(dash="dot", width=1)
)
fig.add_annotation(
    xref="paper",
    x=0.65,
    y=back_front_league_average_diff+(back_front_league_average_diff*.15),
    showarrow=False,
    text=f"<b>Group Average Diff: {round(back_front_league_average_diff*100,1)}%</b>",
    font=dict(size=12)
)