#### Final Season Rankings

In [5]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path

In [6]:
awards = pd.read_csv("project_data/awards_players.csv")
coaches = pd.read_csv(Path("project_data", "coaches.csv"))
players_teams = pd.read_csv(Path("project_data", "players_teams.csv"))
players = pd.read_csv(Path("project_data", "players.csv"))
series_post = pd.read_csv(Path("project_data", "series_post.csv"))
teams_post = pd.read_csv(Path("project_data", "teams_post.csv"))
teams = pd.read_csv(Path("project_data", "teams.csv"))

In [8]:
def get_players_for_team(players_teams_df, year, team_id):
    """
    Return all players who played for a specific team in a given year.

    Parameters:
        players_teams_df (pd.DataFrame): The full players_teams dataset.
        year (int): The season year.
        team_id (str): The team ID (e.g., "LAL", "BOS").

    Returns:
        pd.DataFrame: Filtered DataFrame of players for that team and year.
    """
    df_team = players_teams_df[
        (players_teams_df["year"] == year) &
        (players_teams_df["tmID"] == team_id)
    ].copy()
    return df_team

In [10]:

# Choose the year to analyze
year_to_analyze = 4 # change this as needed
# Chooses a team to analyze
team_to_analyze = "LAS"

team_players = get_players_for_team(players_teams, year_to_analyze, team_to_analyze)
print(team_players.head())

       playerID  year  stint tmID  lgID  GP  GS  minutes  points  oRebounds  \
233  byearla01w     4      0  LAS  WNBA   5   0       72      28         12   
388  dixonta01w     4      0  LAS  WNBA  30  30     1042     412         41   
549  gilloje01w     4      0  LAS  WNBA  33  10      397     103         18   
767  johnsch01w     4      0  LAS  WNBA   8   0       45       6          2   
906  leslili01w     4      0  LAS  WNBA  23  23      792     424         76   

     ...  PostBlocks  PostTurnovers  PostPF  PostfgAttempted  PostfgMade  \
233  ...           0              0       0                0           0   
388  ...           2             11      27               94          40   
549  ...           1              0       5                5           0   
767  ...           0              0       0                0           0   
906  ...          28             24      39              137          74   

     PostftAttempted  PostftMade  PostthreeAttempted  PostthreeMade 

In [16]:
def get_player_game_score(players_teams_df, player_id, year):
    """
    Compute a player's Game Score metrics for a given year and return native Python types.

    Parameters:
        players_teams_df (pd.DataFrame): The players_teams dataset.
        player_id (str): The player ID.
        year (int): The season year.

    Returns:
        dict: {
            'playerID': str,
            'year': int,
            'tmID': str,
            'Game_Score_Total': float,
            'Game_Score_Per_Game': float,
            'Game_Score_Per_Minute': float
        }
    """
    df_player = players_teams_df[
        (players_teams_df["playerID"] == player_id) &
        (players_teams_df["year"] == year)
    ].copy()

    if df_player.empty:
        return {
            "playerID": player_id,
            "year": year,
            "tmID": None,
            "Game_Score_Total": None,
            "Game_Score_Per_Game": None,
            "Game_Score_Per_Minute": None
        }

    # Apply Game Score formula
    base_score = (
        df_player["points"]
        + 0.4 * df_player["fgMade"]
        - 0.7 * df_player["fgAttempted"]
        - 0.4 * (df_player["ftAttempted"] - df_player["ftMade"])
        + 0.7 * df_player["oRebounds"]
        + 0.3 * df_player["dRebounds"]
        + df_player["steals"]
        + 0.7 * df_player["assists"]
        + 0.7 * df_player["blocks"]
        - 0.4 * df_player["PF"]
        - df_player["turnovers"]
    )

    df_player["Game_Score_Total"] = base_score
    df_player["Game_Score_Per_Game"] = base_score / df_player["GP"].replace(0, np.nan)
    df_player["Game_Score_Per_Minute"] = base_score / df_player["minutes"].replace(0, np.nan)

    # If the player switched teams mid-season, aggregate
    result = {
        "playerID": player_id,
        "year": int(year),
        "tmID": ",".join(df_player["tmID"].unique()),
        "Game_Score_Total": float(df_player["Game_Score_Total"].sum(skipna=True)),
        "Game_Score_Per_Game": float(df_player["Game_Score_Per_Game"].mean(skipna=True)),
        "Game_Score_Per_Minute": float(df_player["Game_Score_Per_Minute"].mean(skipna=True)),
    }

    return result

In [47]:
# Example usage
player_score = get_player_game_score(players_teams, player_id="starbka01w", year=3)
print(player_score)

{'playerID': 'starbka01w', 'year': 3, 'tmID': 'SEA,UTA', 'Game_Score_Total': 56.99999999999999, 'Game_Score_Per_Game': 2.684444444444444, 'Game_Score_Per_Minute': 0.21818792766373407}


In [43]:
def get_team_weighted_avg_score(players_teams_df, year, team_id):
    """
    Calculate the weighted average Game Score per minute for a team in a given year.
    Uses player-level Game Scores weighted by each player's total Game Score.

    Parameters:
        players_teams_df (pd.DataFrame): The full players_teams dataset.
        year (int): The season year.
        team_id (str): The team ID.

    Returns:
        dict: {
            'tmID': str,
            'year': int,
            'Team_Weighted_Avg_Per_Minute': float,
            'Num_Players': int
        }
    """
    # Get all players for that team and year
    df_team = get_players_for_team(players_teams_df, year, team_id)
    if df_team.empty:
        return {
            "tmID": team_id,
            "year": int(year),
            "Team_Weighted_Avg_Per_Minute": None,
            "Num_Players": 0
        }

    # Compute game scores for each player
    player_scores = []
    for player_id in df_team["playerID"].unique():
        player_result = get_player_game_score(players_teams_df, player_id, year - 1)
        player_scores.append(player_result)

    df_scores = pd.DataFrame(player_scores)

    # Weighted average: weight = Game_Score_Total
    total_weight = df_scores["Game_Score_Total"].sum(skipna=True)
    if total_weight == 0 or np.isnan(total_weight):
        weighted_avg = None
    else:
        weighted_avg = (
            (df_scores["Game_Score_Per_Minute"] * df_scores["Game_Score_Total"]).sum(skipna=True)
            / total_weight
        )

    return {
        "tmID": team_id,
        "year": int(year),
        "Team_Weighted_Avg_Per_Minute": float(weighted_avg) if weighted_avg is not None else None,
        "Num_Players": int(len(df_scores))
    }

In [44]:
# Example usage
team_score = get_team_weighted_avg_score(players_teams, year_to_analyze, team_to_analyze)
print(team_score)

{'tmID': 'LAS', 'year': 4, 'Team_Weighted_Avg_Per_Minute': 0.29988575294697356, 'Num_Players': 15}


In [45]:
def get_conference_rankings(players_teams_df, teams_df, year):
    """
    Compute predicted conference rankings for all teams in a given year.

    Parameters:
        players_teams_df (pd.DataFrame): The full players_teams dataset.
        teams_df (pd.DataFrame): The teams dataset (must contain confID, rank, name, etc.)
        year (int): The season year.

    Returns:
        pd.DataFrame: DataFrame containing each team's predicted and actual conference rank.
    """
    # --- Get list of teams for that year ---
    teams_year = teams_df[teams_df["year"] == year][["tmID", "confID", "name", "rank", "won", "lost"]].copy()
    teams_year.rename(columns={"rank": "Actual_Rank"}, inplace=True)

    if teams_year.empty:
        print(f"No team data found for year {year}.")
        return pd.DataFrame()

    # --- Compute predicted weighted average for each team ---
    team_results = []
    for team_id in teams_year["tmID"].unique():
        result = get_team_weighted_avg_score(players_teams_df, year, team_id)
        team_results.append(result)

    df_team_scores = pd.DataFrame(team_results)

    # --- Merge with conference info ---
    df_team_scores = pd.merge(df_team_scores, teams_year, on="tmID", how="left")

    # --- Compute predicted ranks within each conference ---
    df_team_scores["Predicted_Rank_Conf"] = (
        df_team_scores.groupby("confID")["Team_Weighted_Avg_Per_Minute"]
        .rank(ascending=False, method="first")
    )

    # --- Compute difference from actual rank ---
    df_team_scores["Rank_Diff"] = df_team_scores["Predicted_Rank_Conf"] - df_team_scores["Actual_Rank"]

   # --- Split into two separate conference DataFrames ---
    conf_ids = df_team_scores["confID"].dropna().unique()
    conf_dfs = {}
    for conf in conf_ids:
        conf_df = df_team_scores[df_team_scores["confID"] == conf].sort_values("Predicted_Rank_Conf").reset_index(drop=True)
        conf_dfs[conf] = conf_df[[
            "confID", "tmID", "name", "year",
            "Team_Weighted_Avg_Per_Minute", "Predicted_Rank_Conf",
            "Actual_Rank", "Rank_Diff", "won", "lost", "Num_Players"
        ]]

    # Return a tuple of two DataFrames
    # If the conference IDs are standard "EAST" and "WEST" we return in that order
    east_df = conf_dfs.get("EA", pd.DataFrame())
    west_df = conf_dfs.get("WE", pd.DataFrame())
    
    return east_df, west_df

In [56]:
# Example Usage
east_rankings, west_rankings = get_conference_rankings(players_teams, teams, year=10)

print("🏀 Eastern Conference Rankings")
print(east_rankings)

print("\n🏀 Western Conference Rankings")
print(west_rankings)

🏀 Eastern Conference Rankings
  confID tmID                name  year  Team_Weighted_Avg_Per_Minute  \
0     EA  CON     Connecticut Sun    10                      0.318042   
1     EA  DET       Detroit Shock    10                      0.301598   
2     EA  CHI         Chicago Sky    10                      0.291838   
3     EA  NYL    New York Liberty    10                      0.288212   
4     EA  ATL       Atlanta Dream    10                      0.281187   
5     EA  IND       Indiana Fever    10                      0.273435   
6     EA  WAS  Washington Mystics    10                      0.232280   

   Predicted_Rank_Conf  Actual_Rank  Rank_Diff  won  lost  Num_Players  
0                  1.0            6       -5.0   16    18           14  
1                  2.0            3       -1.0   18    16           18  
2                  3.0            5       -2.0   16    18           12  
3                  4.0            7       -3.0   13    21           11  
4                  5