In [43]:
import pandas as pd
import scipy
data = pd.read_excel('playerratingsNCAAB.xlsx')
data = data[~data['PlayerName'].isin(['Tyler Bey', 'Devin Carter','Isaac Jones','Tre Jones','Mason Jones','Isaiah Jackson',
                                     'Jalen Johnson','Marcus Garrett','Andre Jackson','David Jones','Jalen Smith'
                                     ,'Jalen Williams','Jaylin Williams','Jalen Harris','Cameron Johnson','Chris Smith',
                                     'Donovan Williams','Tyler Harris','Grant Williams'])]

# Filter out G Webb and AL A&M teams
data = data[~data['PlayerTeam'].isin(['G WEBB', 'AL A&M'])]
def calculate_potential_score(position, height, player_class, athlete, 
                             position_avg_heights, position_height_stds=None,
                             height_weight=5, class_weights=None, athleticism_weight=15):
    """
    Calculate potential score with only height, class, and athleticism as factors.
    
    Parameters:
    - position: Player's position (string)
    - height: Player's height (numeric)
    - player_class: Player's college class (FR, SO, JR, SR)
    - athlete: Player's athleticism rating (typically 1-10)
    - position_avg_heights: Dictionary of average heights by position
    - position_height_stds: Dictionary of height standard deviations by position
    """
    # Default class weights if not provided
    if class_weights is None:
        class_weights = {
            "FR": 40,    # Freshmen get highest weight
            "SO": 35,    # Sophomores 
            "JR": 25,    # Juniors
            "SR": 10     # Seniors
        }
    
    if position not in position_avg_heights:
        return 50  # Default score
    
    avg_height = position_avg_heights[position]
    
    # Height component
    if position_height_stds and position in position_height_stds:
        std_dev = position_height_stds[position]
        height_z_score = (height - avg_height) / std_dev if std_dev > 0 else 0
        height_component = height_z_score * height_weight
    else:
        height_diff = height - avg_height
        height_component = height_diff * height_weight
    
    # Class component
    class_component = class_weights.get(player_class, 10)
    
    # Athleticism component
    athleticism_component = 0
    if athlete == 6:
        athleticism_component = -4  # Big penalty
    elif athlete == 7:
        athleticism_component = -1   # No effect
    elif athlete == 8:
        athleticism_component = 2   # Small boost
    elif athlete == 9:
        athleticism_component = 6   # Good boost
    elif athlete == 10:
        athleticism_component = 14   # Big boost
    else:
        # Linear interpolation for other values
        if athlete < 6:
            athleticism_component = -6  # Larger penalty for very low athleticism
        elif athlete > 10:
            athleticism_component = 12   # Larger boost for exceptional athleticism
    
    # Apply the athleticism weight
    athleticism_component *= athleticism_weight / 10
    
    # Final score - sum all components
    final_score = height_component + class_component + athleticism_component
    
    # Add a base value to ensure mostly positive scores
    final_score += 50
    
    return final_score

# Calculate standard deviations for height by position
position_height_stds = data.groupby('PositionDetail')['Height'].std().to_dict()

# Calculate average heights by position
position_avg_heights = data.groupby('PositionDetail')['Height'].mean().to_dict()

# Apply the calculation to get the potential scores
data['PotentialRaw'] = data.apply(
    lambda row: calculate_potential_score(
        row['PositionDetail'],
        row['Height'],
        row['PlayerClass'],
        row['Athlete'],
        position_avg_heights,
        position_height_stds,
        height_weight=3,       # Slightly more weight on height
        athleticism_weight=6  # More weight on athleticism
    ),
    axis=1
)

# Normalize final scores to 0-100 scale
min_score = data['PotentialRaw'].min()
max_score = data['PotentialRaw'].max()
data['NBA'] = 100 * (data['PotentialRaw'] - min_score) / (max_score - min_score)

data.sort_values('NBA', ascending=False).head(30)

Unnamed: 0,PlayerId,PlayerName,PlayerTeam,Position,Height,PlayerClass,pocNJ2MakeOff,pocJ2MakeOff,pocJ3MakeOff,pocNJ2MakeDef,...,PlayerId4,UsageRate,Column1,DraftPick,PositionDetail,BigBoardNumber,Athlete,Draft,PotentialRaw,NBA
56,46638,Ace Bailey,RUTGER,G,82,FR,1111.3539,1128.3014,1104.3748,1063.8494,...,46638.0,27.554336,AceBailey,AceBailey,SG/SF,4,9.0,2025.0,101.604508,100.0
399,4277,Brandon Miller,ALA,F,81,FR,1110.6101,1115.2278,1105.6622,1228.455,...,4277.0,26.387889,BrandonMiller,BrandonMiller,SF,3,9.0,2023.0,99.000184,95.122146
295,42779,Cade Cunningham,OKLAST,G,80,FR,1118.0698,1108.9001,1109.4633,1119.1088,...,42779.0,29.304322,CadeCunningham,CadeCunningham,PG/SG,1,8.0,2021.0,98.106242,93.447807
271,42839,Evan Mobley,USC,F,84,FR,1110.7352,1121.3972,1117.5083,1188.5536,...,42839.0,23.16058,EvanMobley,EvanMobley,PF/C,2,9.0,2021.0,98.037577,93.319199
439,43865,Gradey Dick,KANSAS,G,80,FR,1120.9294,1124.1642,1118.1854,1129.2672,...,43865.0,20.638896,GradeyDick,GradeyDick,SG,7,8.0,2023.0,97.693054,92.673913
352,40482,John Butler,FSU,F,85,FR,1095.3286,1090.0381,1103.9458,1115.4608,...,40482.0,17.567438,JohnButler,JohnButler,PF/C,28,8.0,2022.0,97.605181,92.50933
64,46673,Asa Newell,UGA,F,83,FR,1110.9156,1110.2115,1094.618,1150.3243,...,46673.0,23.695779,AsaNewell,AsaNewell,PF,6,9.0,2025.0,97.554829,92.415021
72,46744,Egor Demin,BYU,G,81,FR,1103.5135,1116.987,1109.8137,1124.4523,...,46744.0,24.736897,EgorDemin,EgorDemin,SG/SF,7,8.0,2025.0,96.963245,91.306994
156,24910,Louis King,OREGN,F,81,FR,1090.6572,1093.4734,1097.0509,1143.0347,...,24910.0,25.615334,LouisKing,LouisKing,SF,133,8.0,2020.0,96.600184,90.626986
463,43224,Anthony Black,ARK,G,79,FR,1127.1547,1108.6976,1104.0448,1153.09,...,43224.0,20.997983,AnthonyBlack,AnthonyBlack,PG/SG,5,8.0,2023.0,96.270058,90.008667


In [55]:
# For offense - heavier emphasis on usage/winning combination
data['OffenseMultiplier'] = 1 + (0.3 * data['UsageRate'] + 0.7 * data['WinPercentage']) * 0.5

# For defense - smaller emphasis on usage but higher on winning %
data['DefenseMultiplier'] = 1 + (data['WinPercentage']) * 0.3

# Calculate the offense and defense scores with multipliers as you already have
data['OffenseScore'] = (((1.5*data['pocNJ2MakeOff'] + data['pocJ2MakeOff'] + 1.5*data['pocJ3MakeOff'] + 
                         .5*data['pocORB'] + 3*data['pFTMake'] + data['pJ3Make'] + 
                         data['pocNJ2AssistOff'] + data['pocJ2AssistOff'] + data['pocJ3AssistOff']) / 11) * \
                         data['OffenseMultiplier']) * data['NBA']

data['DefenseScore'] = (((data['pocNJ2MakeDef'] + .5*data['pocJ2MakeDef'] + data['pocJ3MakeDef'] + 
                         data['pocDRB'] + data['pocNJ2BlockDef']) / 4.5) ) * data['DefenseMultiplier'] * data['NBA']

# Normalize to 0-100 scale where 100 is the best
data['OScore'] = 100 * (data['OffenseScore'] - data['OffenseScore'].min()) / (data['OffenseScore'].max() - data['OffenseScore'].min())

data['DScore'] = 100 * (data['DefenseScore'] - data['DefenseScore'].min()) / (data['DefenseScore'].max() - data['DefenseScore'].min())

data['Score'] = (2*data['OScore'] + data['DScore'] ) /3

data['Rank'] = data['Score'].rank(ascending=False, method='min')

data = data[data['Draft'] >= 2023]

In [58]:
data[['Draft','Rank','PlayerName','PositionDetail','Height','PlayerClass','PlayerTeam','NBA','OScore','DScore','Score']].dropna().to_csv('AllProspects.csv')

In [59]:
data.tail(20)

Unnamed: 0,PlayerId,PlayerName,PlayerTeam,Position,Height,PlayerClass,pocNJ2MakeOff,pocJ2MakeOff,pocJ3MakeOff,pocNJ2MakeDef,...,PotentialRaw,NBA,OffenseMultiplier,DefenseMultiplier,OffenseScore,DefenseScore,OScore,DScore,Score,Rank
527,82,Aaron Estrada,ALA,G,76,SR,1113.2667,1136.7787,1112.8121,1116.7706,...,58.961508,20.130382,4.584357,1.20271,116859.193339,27664.908352,17.595202,20.074246,18.42155,164.0
528,3831,Boo Buie,NWSTRN,G,74,SR,1090.9546,1146.2716,1123.9285,1155.1178,...,59.285387,20.737001,5.443857,1.20001,143609.056789,28845.184036,21.622863,20.930679,21.392135,147.0
529,41686,Zach Edey,PURDUE,C,88,SR,1112.7917,1119.2953,1132.0233,1183.7247,...,64.263267,30.060487,6.328897,1.26154,225223.450208,47366.270825,33.911342,34.369973,34.064219,126.0
530,5589,Cam Spencer,UCONN,G,76,SR,1092.0665,1120.8538,1090.7417,1185.0706,...,56.108173,14.786133,4.232748,1.2775,80916.609411,22067.651309,12.183415,16.012757,13.459863,200.0
531,37567,Tristen Newton,UCONN,G,77,SR,1105.6958,1126.4552,1119.0544,1238.7494,...,60.797692,23.569522,5.045582,1.2775,149118.619107,36993.971467,22.452425,26.843612,23.916154,137.0
532,43480,Donovan Clingan,UCONN,C,86,SO,1114.3983,1098.4662,1096.877,1267.6595,...,87.998164,74.515549,5.08454,1.27429,422327.019005,120625.013466,63.588743,87.528075,71.56852,30.0
533,45960,Stephon Castle,UCONN,G,78,FR,1129.032,1106.596,1102.8204,1206.008,...,94.433875,86.569526,4.68778,1.28236,489290.844236,130308.229096,73.671322,94.554422,80.632355,14.0
534,15952,Jaedon Ledee,SDSU,F,81,SR,1096.8784,1115.0646,1079.2334,1088.2345,...,61.63312,25.134265,5.802583,1.20295,170112.617465,34454.99865,25.613439,25.00128,25.409386,133.0
536,40054,Ajay Mitchell,UCSB,G,76,JR,1106.0352,1109.3911,1107.4733,1090.0377,...,73.961508,48.225129,5.885979,1.15555,355884.974617,61337.587819,53.584727,44.507858,50.559104,100.0
537,2494,Antonio Reeves,UK,G,76,SR,1096.393,1130.5698,1135.9419,1125.9158,...,56.108173,14.786133,5.131639,1.2091,94479.320408,21070.272801,14.225519,15.289038,14.580026,195.0
