In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd

In [3]:
league = "mens"
prior = "_team_prior"
year = 2023

In [4]:
player_ratings = pd.read_csv(f"./data/{league}_player_ratings{prior}.csv", index_col=0)
# https://www.espn.com/womens-college-basketball/player/_/id/5174285/milaysia-fulwiley
# is not in player_info
player_info = (
    pd.read_csv(f"./data/{league}_player_info.csv", index_col="player_id")
    .assign(team_id=lambda _: pd.to_numeric(_.team_id, errors="coerce"))
    .dropna(subset=["team_id"])
    .assign(team_id=lambda _: _.team_id.astype(int))
)
defense_ratings = pd.read_csv(  
    f"./data/{league}_player_ratings_defense{prior}.csv", index_col=0
)

player_ratings = (
    player_ratings.merge(player_info, left_index=True, right_index=True, how="left")
    .assign(team_id=lambda _: pd.to_numeric(_.team_id, errors="coerce"))
    .dropna(subset=["team_id"])
    .merge(defense_ratings, left_index=True, right_index=True, suffixes=("", "_defense"))
    .assign(net_vpp=lambda _: _.vpp - _.vpp_defense)
)
teams = (
    player_info.reset_index()[["team_name", "team_id"]]
    .drop_duplicates()
    .set_index("team_id")
)

In [5]:
performances = pd.read_csv(f"./data/all_performances_{league}_{year}.csv")
most_recent_games = (
    performances.groupby("team_id")
    .agg({"game_id": "max"})
    .rename(columns={"game_id": "most_recent_game"})
)
performances = (
    performances.merge(most_recent_games, left_on="team_id", right_index=True)
    .assign(is_most_recent=lambda _: _.most_recent_game == _.game_id)
)
in_most_recent = set(performances[performances.is_most_recent].player_id)

In [36]:
weighted_players = pd.concat([(
        team.groupby("player_id")
            .agg({"n_possessions": "sum"})
            .reset_index()
            .assign(pct_possessions=lambda df: 5 * df.n_possessions / df.n_possessions.sum())
            .sort_values("pct_possessions", ascending=False)
            .assign(team_id=team_id)
    )
    for team_id, team in performances.groupby("team_id")
]).set_index("player_id")
weighted_players = (
    weighted_players.merge(player_ratings, left_index=True, right_index=True, suffixes=("", "_right"))
        .assign(played_in_latest=lambda _: [pid in in_most_recent for pid in _.index])
)


In [42]:
# re-scale up to full net_impacts for each team
weighted_players = weighted_players.assign(pct_possessions=lambda _: _.pct_possessions * _.played_in_latest)
multipliers = (
    weighted_players.groupby("team_id")
    .agg({"pct_possessions": "sum"})
    .assign(multiplier=lambda _: 5 / _.pct_possessions)
    .drop(columns=["pct_possessions"])
)
weighted_players = (
    weighted_players.merge(multipliers, on="team_id")
    .assign(player_impact=lambda _: _.net_vpp * _.pct_possessions * _.multiplier)
)

In [43]:
team_ratings = (
    weighted_players
        .groupby("team_id")
        .agg({"player_impact": "sum"})
)

In [44]:
pretty_players = (
    weighted_players.sort_values("net_vpp", ascending=False)[["player_name", "team_name", "net_vpp", "player_impact", "vpp", "vpp_defense", "n_possessions", "Class"]]
    .assign(rank=lambda _: _.net_vpp.rank(ascending=False))
)

In [45]:
# TODO: get the team names directly
# Some team ids have multiple team names
# I think it's because things can get weird with transfers
# so I'm taking the "mode", hopefully that's better
team_names = weighted_players.groupby("team_id").agg({"team_name": pd.Series.mode})
team_ratings = (
    team_ratings.merge(team_names, left_index=True, right_index=True)
        .sort_values("player_impact", ascending=False)
        .assign(rank=lambda _: _.player_impact.rank(ascending=False))
)

In [46]:
team_ratings.head(25)

Unnamed: 0_level_0,player_impact,team_name,rank
team_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
57,0.714846,florida-gators,1.0
167,0.706704,new-mexico-lobos,2.0
245,0.702745,texas-am-aggies,3.0
2509,0.699548,purdue-boilermakers,4.0
2599,0.685956,st-johns-red-storm,5.0
265,0.679691,washington-state-cougars,6.0
2,0.675386,auburn-tigers,7.0
2294,0.664356,iowa-hawkeyes,8.0
333,0.65714,alabama-crimson-tide,9.0
41,0.65351,uconn-huskies,10.0


In [47]:
pretty_players.head(25)

Unnamed: 0,player_name,team_name,net_vpp,player_impact,vpp,vpp_defense,n_possessions,Class,rank
3925,Zach Edey,purdue-boilermakers,0.313449,0.247815,0.320055,0.006606,1796.901106,Senior,1.0
253,Donovan Clingan,uconn-huskies,0.243756,0.125992,0.269305,0.025549,1072.752187,Sophomore,2.0
2112,Greg Gordon,iona-gaels,0.23675,0.0,0.258161,0.021411,1060.243125,Junior,3.0
3051,Braden Huff,gonzaga-bulldogs,0.234433,0.083488,0.28449,0.050057,732.636875,Freshman,4.0
3320,Dame Adelekun,loyola-chicago-ramblers,0.230341,0.100117,0.23657,0.006229,926.45025,Senior,5.0
1072,Ryan Kalkbrenner,creighton-bluejays,0.228165,0.208248,0.230172,0.002007,1857.692412,Senior,6.0
3540,Drew Pember,unc-asheville-bulldogs,0.226843,0.174978,0.238625,0.011782,1677.704097,Senior,7.0
0,Johni Broome,auburn-tigers,0.225616,0.143512,0.230781,0.005165,1507.026875,Junior,8.0
3836,N'Faly Dante,oregon-ducks,0.224659,0.111978,0.229586,0.004927,1051.888438,Senior,9.0
4240,Collin Murray-Boyles,south-carolina-gamecocks,0.22438,0.117293,0.239532,0.015152,1007.329479,Freshman,10.0


In [48]:
def _team_summary(team: str):
    print(team_ratings[team_ratings.team_name == team])
    return pretty_players[pretty_players.team_name == team]

In [51]:
_team_summary("tennessee-volunteers")

         player_impact             team_name  rank
team_id                                           
2633          0.624078  tennessee-volunteers  16.0


Unnamed: 0,player_name,team_name,net_vpp,player_impact,vpp,vpp_defense,n_possessions,Class,rank
4415,Dalton Knecht,tennessee-volunteers,0.169036,0.130039,0.180175,0.011139,1714.4275,Senior,117.0
4418,Jonas Aidoo,tennessee-volunteers,0.156197,0.101462,0.171762,0.015565,1447.622813,Junior,214.0
4421,Tobe Awaka,tennessee-volunteers,0.14986,0.04777,0.164638,0.014777,710.389688,Sophomore,272.0
4414,Zakai Zeigler,tennessee-volunteers,0.133357,0.105199,0.143854,0.010496,1758.003438,Junior,511.0
873,Olivier Nkamhoua,tennessee-volunteers,0.119763,0.0,0.132738,0.012975,1535.519072,Senior,836.0
4419,Jordan Gainey,tennessee-volunteers,0.104474,0.049447,0.121937,0.017463,1054.771563,Junior,1316.0
4420,Jahmai Mashack,tennessee-volunteers,0.103407,0.04658,0.12084,0.017432,1003.8675,Junior,1353.0
4417,Santiago Vescovi,tennessee-volunteers,0.10157,0.06672,0.11008,0.008509,1463.910938,Senior,1426.0
4416,Josiah-Jordan James,tennessee-volunteers,0.098762,0.073437,0.108066,0.009303,1657.090312,Senior,1530.0
4422,J.P. Estrella,tennessee-volunteers,0.033284,0.002627,0.043584,0.010301,175.925,Freshman,3631.0


In [54]:
_team_summary("saint-peters-peacocks")

         player_impact              team_name   rank
team_id                                             
2612          0.405021  saint-peters-peacocks  303.0


Unnamed: 0,player_name,team_name,net_vpp,player_impact,vpp,vpp_defense,n_possessions,Class,rank
4324,Corey Washington,saint-peters-peacocks,0.175076,0.101919,0.194884,0.019808,1134.994688,Sophomore,81.0
4327,Michael Houge,saint-peters-peacocks,0.1702,0.057898,0.165519,-0.004681,663.241389,Junior,110.0
4326,Armoni Zeigler,saint-peters-peacocks,0.103109,0.05077,0.123601,0.020492,960.003229,Freshman,1364.0
4328,Stephon Roberts,saint-peters-peacocks,0.080115,0.026401,0.103916,0.023801,642.486007,Senior,2251.0
4323,Mouhamed Sow,saint-peters-peacocks,0.064208,0.038609,0.078114,0.013906,1172.36066,Junior,2832.0
4325,Marcus Randolph,saint-peters-peacocks,0.063916,0.033202,0.077578,0.013662,1012.786042,Junior,2842.0
4329,Elijah Wood,saint-peters-peacocks,0.054795,0.0,0.070182,0.015387,288.068472,Junior,3119.0
4321,Roy Clarke,saint-peters-peacocks,0.053621,0.034833,0.073022,0.019401,1266.537361,Senior,3156.0
4320,Latrell Reid,saint-peters-peacocks,0.044818,0.038788,0.062296,0.017478,1687.387292,Senior,3382.0
4322,Brent Bland,saint-peters-peacocks,0.036457,0.0226,0.055818,0.019361,1208.607396,Freshman,3561.0
