In [146]:
import pandas as pd
import scipy
data = pd.read_excel('playerratingsNCAAB.xlsx')
data = data[~data['PlayerName'].isin(['Tyler Bey', 'Devin Carter','Isaac Jones','Tre Jones','Mason Jones','Isaiah Jackson',
                                     'Jalen Johnson','Marcus Garrett','Andre Jackson','David Jones','Jalen Smith'
                                     ,'Jalen Williams','Jaylin Williams','Jalen Harris','Cameron Johnson','Chris Smith',
                                     'Donovan Williams','Tyler Harris','Grant Williams'])]

# Filter out G Webb and AL A&M teams
data = data[~data['PlayerTeam'].isin(['G WEBB', 'AL A&M','E MICH'])]
def calculate_potential_score(position, height, player_class, athlete, 
                             position_avg_heights, position_height_stds=None,
                             height_weight=5, class_weights=None, athleticism_weight=15):
    """
    Calculate potential score with position-adjusted height importance.
    """
    # Default class weights if not provided
    if class_weights is None:
        class_weights = {
            "FR": 30,    # Freshmen get highest weight
            "SO": 27,    # Sophomores 
            "JR": 25,    # Juniors
            "SR": 25     # Seniors
        }
    
    # Position-specific height weight modifiers
    height_importance_by_position = {
        # Pure positions
        "PG": 1.0,  # Point guards - height matters most
        "PG/SG": 1,
        "SG": 0.9,
        "SG/SF": .9,# Shooting guards
        "SF": 0.9, 
        "SF/PF": .9,# Small forwards
        "PF": 0.9,
        "PF/C": 0.9,# Power forwards
        "C": 0.7,   # Centers - height matters least
     
    }
    
    # Get position-specific height weight (default to 1.0 if position not found)
    position_height_modifier = height_importance_by_position.get(position, 1.0)
    
    # Apply the modifier to the height weight
    adjusted_height_weight = height_weight * position_height_modifier
    
    if position not in position_avg_heights:
        return 50  # Default score
    
    avg_height = position_avg_heights[position]
    
    # Height component with adjusted weight
    if position_height_stds and position in position_height_stds:
        std_dev = position_height_stds[position]
        height_z_score = (height - avg_height) / std_dev if std_dev > 0 else 0
        height_component = height_z_score * adjusted_height_weight
    else:
        height_diff = height - avg_height
        height_component = height_diff * adjusted_height_weight
    
    # Class component
    class_component = class_weights.get(player_class, 10)
    
    # Athleticism component
    athleticism_component = 0
    if athlete == 6:
        athleticism_component = -3  # Big penalty
    elif athlete == 7:
        athleticism_component = -1   # No effect
    elif athlete == 8:
        athleticism_component = 3   # Small boost
    elif athlete == 9:
        athleticism_component = 8   # Good boost
    elif athlete == 10:
        athleticism_component = 20   # Big boost
    else:
        # Linear interpolation for other values
        if athlete < 6:
            athleticism_component = -6  # Larger penalty for very low athleticism
        elif athlete > 10:
            athleticism_component = 12   # Larger boost for exceptional athleticism
    
    # Apply the athleticism weight
    athleticism_component *= athleticism_weight / 10
    
    # Final score - sum all components
    final_score = height_component + class_component + athleticism_component
    
    # Add a base value to ensure mostly positive scores
    final_score += 50
    
    return final_score

# Calculate standard deviations for height by position
position_height_stds = data.groupby('PositionDetail')['Height'].std().to_dict()

# Calculate average heights by position
position_avg_heights = data.groupby('PositionDetail')['Height'].mean().to_dict()

# Apply the calculation to get the potential scores
data['PotentialRaw'] = data.apply(
    lambda row: calculate_potential_score(
        row['PositionDetail'],
        row['Height'],
        row['PlayerClass'],
        row['Athlete'],
        position_avg_heights,
        position_height_stds,
        height_weight=3,       # Slightly more weight on height
        athleticism_weight=6  # More weight on athleticism
    ),
    axis=1
)

# Normalize final scores to 0-100 scale
min_score = data['PotentialRaw'].min()
max_score = data['PotentialRaw'].max()
data['NBA'] = 100 * (data['PotentialRaw'] - min_score) / (max_score - min_score)


In [147]:
# For offense - heavier emphasis on usage/winning combination
data['OffenseMultiplier'] = 1 + (0.3 * data['UsageRate'] + 0.7 * data['WinPercentage']) * 0.5

# For defense - smaller emphasis on usage but higher on winning %
data['DefenseMultiplier'] = 1 + (data['WinPercentage']) * 0.3

# Calculate the offense and defense scores with multipliers as you already have
data['OffenseScore'] = (((1.5*data['pocNJ2MakeOff'] + data['pocJ2MakeOff'] + 1.5*data['pocJ3MakeOff'] + 
                         .5*data['pocORB'] + 3*data['pFTMake'] + data['pJ3Make'] + 
                         data['pocNJ2AssistOff'] + data['pocJ2AssistOff'] + data['pocJ3AssistOff']) / 11) * \
                         data['OffenseMultiplier']) * (.25*data['NBA'])

data['DefenseScore'] = (((data['pocNJ2MakeDef'] + .5*data['pocJ2MakeDef'] + data['pocJ3MakeDef'] + 
                         data['pocDRB'] + data['pocNJ2BlockDef']) / 4.5) ) * (.25*data['NBA'])


# Normalize to 0-100 scale where 100 is the best
data['OScore'] = 100 * (data['OffenseScore'] - data['OffenseScore'].min()) / (data['OffenseScore'].max() - data['OffenseScore'].min())

data['DScore'] = 100 * (data['DefenseScore'] - data['DefenseScore'].min()) / (data['DefenseScore'].max() - data['DefenseScore'].min())

data['Score'] = (2*data['OScore'] + data['DScore'] ) /3

data['Rank'] = data['Score'].rank(ascending=False, method='min')

data = data.round(1)

In [148]:
data[['Draft','Rank','PlayerName','PositionDetail','Height','PlayerClass','PlayerTeam','NBA','OScore','DScore','Score']].dropna().to_csv('AllProspects.csv')

In [149]:
data.tail(20)

Unnamed: 0,PlayerId,PlayerName,PlayerTeam,Position,Height,PlayerClass,pocNJ2MakeOff,pocJ2MakeOff,pocJ3MakeOff,pocNJ2MakeDef,...,PotentialRaw,NBA,OffenseMultiplier,DefenseMultiplier,OffenseScore,DefenseScore,OScore,DScore,Score,Rank
527,82,Aaron Estrada,ALA,G,76,SR,1113.3,1136.8,1112.8,1116.8,...,74.0,37.3,4.6,1.2,54178.0,10664.2,32.1,37.7,34.0,378.0
528,3831,Boo Buie,NWSTRN,G,74,SR,1091.0,1146.3,1123.9,1155.1,...,74.3,38.5,5.4,1.2,66579.7,11144.2,39.5,39.4,39.5,297.0
529,41686,Zach Edey,PURDUE,C,88,SR,1112.8,1119.3,1132.0,1183.7,...,77.9,50.9,6.3,1.3,95315.8,15889.8,56.6,56.2,56.4,84.0
530,5589,Cam Spencer,UCONN,G,76,SR,1092.1,1120.9,1090.7,1185.1,...,71.4,28.4,4.2,1.3,38811.8,8285.5,23.0,29.3,25.1,477.0
531,37567,Tristen Newton,UCONN,G,77,SR,1105.7,1126.5,1119.1,1238.7,...,75.8,43.7,5.0,1.3,69134.0,13425.5,41.0,47.5,43.2,244.0
532,43480,Donovan Clingan,UCONN,C,86,SO,1114.4,1098.5,1096.9,1267.7,...,78.9,54.5,5.1,1.3,77291.7,17324.2,45.9,61.2,51.0,139.0
533,45960,Stephon Castle,UCONN,G,78,FR,1129.0,1106.6,1102.8,1206.0,...,85.0,75.8,4.7,1.3,107091.0,22240.7,63.5,78.6,68.6,30.0
534,15952,Jaedon Ledee,SDSU,F,81,SR,1096.9,1115.1,1079.2,1088.2,...,76.2,45.1,5.8,1.2,76242.3,12837.0,45.2,45.4,45.3,215.0
536,40054,Ajay Mitchell,UCSB,G,76,JR,1106.0,1109.4,1107.5,1090.0,...,74.0,37.3,5.9,1.2,68872.8,10272.5,40.9,36.3,39.3,299.0
537,2494,Antonio Reeves,UK,G,76,SR,1096.4,1130.6,1135.9,1125.9,...,71.4,28.4,5.1,1.2,45317.1,8358.6,26.9,29.5,27.8,452.0
