In [1]:
import pandas as pd
from collections import defaultdict

def calculate_team_ratings(df: pd.DataFrame) -> pd.Series:
    """
    Calculates team ratings based on points per game (Win=3, Draw=1, Loss=0)
    and scales the result to a 1-100 range.

    Args:
        df: DataFrame with game records including columns
            'home_team', 'home_team_score', 'away_team', 'away_team_score'.

    Returns:
        A pandas Series with team names as index and ratings (1-100) as values.
        Returns an empty Series if the input DataFrame is empty or lacks required columns.
    """
    required_cols = {'home_team', 'home_team_score', 'away_team', 'away_team_score'}
    if not required_cols.issubset(df.columns) or df.empty:
        print("Error: DataFrame is missing required columns or is empty.")
        return pd.Series(dtype=float)

    # Use defaultdict to easily handle teams when they first appear
    team_points = defaultdict(int)
    team_games_played = defaultdict(int)
    # Optional: Track goal difference if you want to use it later
    # team_goal_difference = defaultdict(int)

    # --- Iterate through each game to assign points ---
    for index, row in df.iterrows():
        home_team = row['home_team']
        away_team = row['away_team']
        home_score = row['home_team_score']
        away_score = row['away_team_score']

        # Increment games played for both teams
        team_games_played[home_team] += 1
        team_games_played[away_team] += 1

        # Optional: Update goal difference
        # goal_diff = home_score - away_score
        # team_goal_difference[home_team] += goal_diff
        # team_goal_difference[away_team] -= goal_diff

        # Assign points based on outcome
        if home_score > away_score:  # Home team wins
            team_points[home_team] += 3
            team_points[away_team] += 0
        elif home_score < away_score:  # Away team wins
            team_points[home_team] += 0
            team_points[away_team] += 3
        else:  # Draw
            team_points[home_team] += 1
            team_points[away_team] += 1

    # --- Calculate Points Per Game (PPG) ---
    teams = list(team_games_played.keys())
    if not teams:
        print("No teams found after processing games.")
        return pd.Series(dtype=float)

    ppg = {}
    for team in teams:
        if team_games_played[team] > 0:
            ppg[team] = team_points[team] / team_games_played[team]
        else:
            ppg[team] = 0 # Should not happen if team is in team_games_played keys

    ppg_series = pd.Series(ppg, name="PPG").sort_values(ascending=False)

    # --- Scale PPG to 1-100 ---
    min_ppg = ppg_series.min()
    max_ppg = ppg_series.max()

    # Handle cases where all teams have the same PPG (avoid division by zero)
    if max_ppg == min_ppg:
        # Assign a middle rating (e.g., 50) to all teams or handle as needed
        scaled_ratings = pd.Series([50.0] * len(ppg_series), index=ppg_series.index, name="Rating")
    else:
        # Apply Min-Max scaling: Rating = 1 + (PPG - min_PPG) * 99 / (max_PPG - min_PPG)
        # We add 1 and multiply by 99 to get the scale from 1 to 100
        scaled_ratings = 1 + (ppg_series - min_ppg) * 99 / (max_ppg - min_ppg)
        scaled_ratings.name = "Rating"

    return scaled_ratings.sort_values(ascending=False)

games_df = pd.read_csv(r"C:\Users\maxwe\Player and Team Ratings\NCAA\ncaa_mens_scores_2024.csv")

# Calculate the ratings
team_ratings = calculate_team_ratings(games_df)

# Print the results
print("Team Ratings (1-100 Scale):")
print(team_ratings)

# --- To show intermediate PPG values (optional) ---
# temp_points = pd.Series(team_points)
# temp_games = pd.Series(team_games_played)
# temp_df = pd.DataFrame({'Points': temp_points, 'Games': temp_games})
# temp_df['PPG'] = temp_df['Points'] / temp_df['Games']
# print("\nIntermediate PPG:")
# print(temp_df.sort_values('PPG', ascending=False))

Team Ratings (1-100 Scale):
Ohio St.     100.000000
San Diego     99.428846
Penn          95.791498
Vermont       95.240385
Dayton        95.240385
                ...    
Blackburn      1.000000
Suffolk        1.000000
CCNY           1.000000
Neumann        1.000000
Warner         1.000000
Name: Rating, Length: 258, dtype: float64


In [3]:
team_ratings

Ohio St.     100.000000
San Diego     99.428846
Penn          95.791498
Vermont       95.240385
Dayton        95.240385
                ...    
Blackburn      1.000000
Suffolk        1.000000
CCNY           1.000000
Neumann        1.000000
Warner         1.000000
Name: Rating, Length: 258, dtype: float64