In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from scipy.optimize import minimize

Load Data

In [2]:
# Load drafted players list
with open('mls_2025_drafted_players.txt', 'r') as file:
    mls_2025_drafted_players = [line.strip() for line in file]

# Load player and team data
player_df = pd.read_csv('d1_player_stats.csv')
team_df = pd.read_csv('ncaa_ratings.csv')

# Clean up data
player_df = player_df.drop(columns=['Unnamed: 0'])
team_df = team_df.drop(columns=['Unnamed: 0'])
player_df = player_df[player_df['Position'] != 'Goalkeeper']
#player_df = player_df[player_df['Position'] != 'Defender']


# Merge player and team data
merged_df = player_df.merge(team_df, on='Team', suffixes=('_player', '_team'))


Data Normalization

In [3]:
# Normalize statistics
merged_df['Norm_Goals'] = merged_df['Goals'] / merged_df['Goals'].max()
merged_df['Norm_Assists'] = merged_df['Assists'] / merged_df['Assists'].max()
merged_df['Norm_Shots'] = merged_df['Shots'] / merged_df['Shots'].max()
merged_df['Norm_Fouls_Won'] = merged_df.get('Fouls Won', 0) / merged_df.get('Fouls Won', 1).max()
merged_df['Norm_Minutes_Played'] = merged_df['Minutes Played'] / merged_df['Minutes Played'].max()
merged_df['Norm_ATT'] = merged_df['ATT'] / merged_df['ATT'].max()
merged_df['Norm_DEF'] = merged_df['DEF'] / merged_df['DEF'].max()

# Calculate the percentage of team minutes played
team_total_minutes = merged_df.groupby('Team')['Minutes Played'].transform('sum')
merged_df['Team_Minutes_Played_Percentage'] = merged_df['Minutes Played'] / team_total_minutes


Objective Function and Optimization

In [4]:
# Initial weights and constraints
initial_weights = [
    0.3, 0.2, 0.1, 0.25, 0.15,  # Forward weights
    0.15, 0.25, 0.05, 0.3, 0.25,  # Midfielder weights
    0.05, 0.1, 0.05, 0.6, 0.2   # Defender weights
]
bounds = [(0, 1)] * len(initial_weights)
constraints = [
    {'type': 'eq', 'fun': lambda w: sum(w[0:5]) - 1},  # Forward
    {'type': 'eq', 'fun': lambda w: sum(w[5:10]) - 1},  # Midfielder
    {'type': 'eq', 'fun': lambda w: sum(w[10:15]) - 1}  # Defender
]

# Objective function
def objective(weights):
    updated_weights = {
        'Forward': {'Goals': weights[0], 'Assists': weights[1], 'Shots': weights[2], 'Team_Att': weights[3], 'Fouls_Won': weights[4]},
        'Midfielder': {'Goals': weights[5], 'Assists': weights[6], 'Shots': weights[7], 'Team_Att_Def': weights[8], 'Fouls_Won': weights[9]},
        'Defender': {'Goals': weights[10], 'Assists': weights[11], 'Shots': weights[12], 'Team_Def': weights[13], 'Fouls_Won': weights[14]}
    }

    def calculate_rating(row):
        if row['Position'] == 'Forward':
            return (
                row['Norm_Goals'] * updated_weights['Forward']['Goals'] +
                row['Norm_Assists'] * updated_weights['Forward']['Assists'] +
                row['Norm_Shots'] * updated_weights['Forward']['Shots'] +
                row['Norm_Fouls_Won'] * updated_weights['Forward']['Fouls_Won'] +
                row['Norm_ATT'] * row['Team_Minutes_Played_Percentage'] * updated_weights['Forward']['Team_Att']
            ) * 100
        elif row['Position'] == 'Midfielder':
            return (
                row['Norm_Goals'] * updated_weights['Midfielder']['Goals'] +
                row['Norm_Assists'] * updated_weights['Midfielder']['Assists'] +
                row['Norm_Shots'] * updated_weights['Midfielder']['Shots'] +
                row['Norm_Fouls_Won'] * updated_weights['Midfielder']['Fouls_Won'] +
                ((row['Norm_ATT'] + row['Norm_DEF']) / 2) * row['Team_Minutes_Played_Percentage'] * updated_weights['Midfielder']['Team_Att_Def']
            ) * 100
        elif row['Position'] == 'Defender':
            return (
                row['Norm_Goals'] * updated_weights['Defender']['Goals'] +
                row['Norm_Assists'] * updated_weights['Defender']['Assists'] +
                row['Norm_Shots'] * updated_weights['Defender']['Shots'] +
                row['Norm_Fouls_Won'] * updated_weights['Defender']['Fouls_Won'] +
                row['Norm_DEF'] * row['Team_Minutes_Played_Percentage'] * updated_weights['Defender']['Team_Def']
            ) * 100
        else:
            return 0

    merged_df['Overall_Rating'] = merged_df.apply(calculate_rating, axis=1)
    df = merged_df.copy()
    df['MAX'] = ((df['Overall_Rating'] - df['Overall_Rating'].min()) /
                 (df['Overall_Rating'].max() - df['Overall_Rating'].min()) * 100).fillna(0)
    drafted_players_df = df[df['Name'].isin(mls_2025_drafted_players)]
    return -drafted_players_df['MAX'].mean()

# Optimize weights
result = minimize(objective, initial_weights, bounds=bounds, constraints=constraints, method='SLSQP')
optimized_weights = result.x
print("Optimized Weights:", optimized_weights)


Optimized Weights: [2.52483157e-11 3.44579486e-01 2.55522534e-01 3.99897980e-01
 3.63546646e-12 3.36480389e-02 2.34372407e-11 2.35507735e-01
 3.67132606e-01 3.63711620e-01 1.54920626e-02 1.29879516e-11
 5.54153156e-01 6.80610926e-02 3.62293688e-01]


Calculate and Scale the Ratings

In [5]:
# Organize optimized weights into a dictionary
normalized_values = optimized_weights  # Get optimized weights from the minimize() result

weights = {
    'Forward': {
        'Goals': normalized_values[0],
        'Assists': normalized_values[1],
        'Shots': normalized_values[2],
        'Team_Att': normalized_values[3],
        'Fouls_Won': normalized_values[4]
    },
    'Midfielder': {
        'Goals': normalized_values[5],
        'Assists': normalized_values[6],
        'Shots': normalized_values[7],
        'Team_Att_Def': normalized_values[8],
        'Fouls_Won': normalized_values[9]
    },
    'Defender': {
        'Goals': normalized_values[10],
        'Assists': normalized_values[11],
        'Shots': normalized_values[12],
        'Team_Def': normalized_values[13],
        'Fouls_Won': normalized_values[14]
    }
}

# Define function to calculate player rating based on position
def calculate_rating(row):
    if row['Position'] == 'Forward':
        return (
            row['Norm_Goals'] * weights['Forward']['Goals'] +
            row['Norm_Assists'] * weights['Forward']['Assists'] +
            row['Norm_Shots'] * weights['Forward']['Shots'] +
            row['Norm_Fouls_Won'] * weights['Forward']['Fouls_Won'] +
            row['Norm_ATT'] * row['Team_Minutes_Played_Percentage'] * weights['Forward']['Team_Att']
        ) * 100
    elif row['Position'] == 'Midfielder':
        return (
            row['Norm_Goals'] * weights['Midfielder']['Goals'] +
            row['Norm_Assists'] * weights['Midfielder']['Assists'] +
            row['Norm_Shots'] * weights['Midfielder']['Shots'] +
            row['Norm_Fouls_Won'] * weights['Midfielder']['Fouls_Won'] +
            ((row['Norm_ATT'] + row['Norm_DEF']) / 2) * row['Team_Minutes_Played_Percentage'] * weights['Midfielder']['Team_Att_Def']
        ) * 100
    elif row['Position'] == 'Defender':
        return (
            row['Norm_Goals'] * weights['Defender']['Goals'] +
            row['Norm_Assists'] * weights['Defender']['Assists'] +
            row['Norm_Shots'] * weights['Defender']['Shots'] +
            row['Norm_Fouls_Won'] * weights['Defender']['Fouls_Won'] +
            row['Norm_DEF'] * row['Team_Minutes_Played_Percentage'] * weights['Defender']['Team_Def']
        ) * 100
    else:
        return 0  # Default for unknown positions


# Apply the rating calculation
merged_df['Overall_Rating'] = merged_df.apply(calculate_rating, axis=1)

# Apply logarithmic scaling to compress rating range
merged_df['Log_Rating'] = np.log1p(merged_df['Overall_Rating'])

# Normalize ratings to a 0-100 range
log_rating_min = merged_df['Log_Rating'].min()
log_rating_max = merged_df['Log_Rating'].max()
merged_df['MAX'] = (
    (merged_df['Log_Rating'] - log_rating_min) / (log_rating_max - log_rating_min) * 100
).astype(int)


Save the data

In [6]:
df = merged_df[['Name', 'Team', 'Position', 'Goals', 'Assists', 'Shots', 'Fouls Won', 
                'Minutes Played', 'Overall_Rating', 'MAX']]
df = df.sort_values(by='MAX', ascending=False).reset_index(drop=True)
df['Rank'] = df.index + 1  # Create a Rank column starting from 1
#df.to_csv(r"PlayerRatings.csv", index=False)


In [7]:
df[df['Position']=='Forward'].head(25)

Unnamed: 0,Name,Team,Position,Goals,Assists,Shots,Fouls Won,Minutes Played,Overall_Rating,MAX,Rank
3,Emil Jaaskelainen,Akron,Forward,23,7,92,52,1702,42.225962,99,4
6,Sydney Wathuta,Vermont,Forward,1,13,49,19,1745,42.27375,99,7
9,Elie Kisoka,Kansas City,Forward,11,9,67,29,1684,40.187214,98,10
14,Alex Harris,Cornell,Forward,19,5,86,20,1464,36.607232,96,15
16,Bailey Sparks,SMU,Forward,9,10,49,29,1804,37.074574,96,17
36,Jesus Barea,Missouri St.,Forward,15,4,81,34,1382,33.148481,93,37
39,Noeh Hernandez,DePaul,Forward,8,9,43,27,1227,32.368991,93,40
42,Denis Krioutchenkov,Davidson,Forward,5,7,61,32,1126,32.416375,93,43
44,Samuel Sarver,Indiana,Forward,5,8,47,40,1632,31.625616,92,45
51,Ethan Blake,Lindenwood,Forward,8,9,44,20,1282,32.08404,92,52
