# NBA Season

### Data Initialization

We are pulling the NBA season stats throughout the years from the URL of https://www.kaggle.com/datasets/justinas/nba-players-data/data

In [88]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from itertools import combinations
%matplotlib inline

**Get the kaggle dataset**

In [89]:
# read a csv file into a df
playerData = pd.read_csv('nba.csv')

teamNames = pd.read_csv('unique_teams.csv')

playerData.head()

Unnamed: 0.1,Unnamed: 0,player_name,team_abbreviation,age,player_height,player_weight,college,country,draft_year,draft_round,...,pts,reb,ast,net_rating,oreb_pct,dreb_pct,usg_pct,ts_pct,ast_pct,season
0,0,Randy Livingston,HOU,22.0,193.04,94.800728,Louisiana State,USA,1996,2,...,3.9,1.5,2.4,0.3,0.042,0.071,0.169,0.487,0.248,1996-97
1,1,Gaylon Nickerson,WAS,28.0,190.5,86.18248,Northwestern Oklahoma,USA,1994,2,...,3.8,1.3,0.3,8.9,0.03,0.111,0.174,0.497,0.043,1996-97
2,2,George Lynch,VAN,26.0,203.2,103.418976,North Carolina,USA,1993,1,...,8.3,6.4,1.9,-8.2,0.106,0.185,0.175,0.512,0.125,1996-97
3,3,George McCloud,LAL,30.0,203.2,102.0582,Florida State,USA,1989,1,...,10.2,2.8,1.7,-2.7,0.027,0.111,0.206,0.527,0.125,1996-97
4,4,George Zidek,DEN,23.0,213.36,119.748288,UCLA,USA,1995,1,...,2.8,1.7,0.3,-14.1,0.102,0.169,0.195,0.5,0.064,1996-97


**Put abbreviations to Cities**

Some cities have multiple as franchises have changed

Cities had to be added as ESPN win rates used cities

In [90]:
# merge the two dataframes on team_abbreviation with df and dfTeams on abbreviations
playerData = pd.merge(playerData, teamNames, left_on='team_abbreviation', right_on='abbreviations')

# drop the team_abbreviation column and abbreviations column
playerData = playerData.drop(columns=['team_abbreviation', 'abbreviations'])

In [91]:
playerData.head()

Unnamed: 0.1,Unnamed: 0,player_name,age,player_height,player_weight,college,country,draft_year,draft_round,draft_number,...,reb,ast,net_rating,oreb_pct,dreb_pct,usg_pct,ts_pct,ast_pct,season,team
0,0,Randy Livingston,22.0,193.04,94.800728,Louisiana State,USA,1996,2,42,...,1.5,2.4,0.3,0.042,0.071,0.169,0.487,0.248,1996-97,Houston
1,18,Hakeem Olajuwon,34.0,213.36,115.66596,Houston,Nigeria,1984,1,1,...,9.2,3.0,6.5,0.075,0.206,0.308,0.558,0.158,1996-97,Houston
2,29,Emanual Davis,28.0,195.58,87.996848,Delaware State,USA,Undrafted,Undrafted,Undrafted,...,1.7,2.0,6.6,0.011,0.098,0.144,0.565,0.191,1996-97,Houston
3,61,Joe Stephens,24.0,200.66,95.25432,Arkansas-Little Rock,USA,Undrafted,Undrafted,Undrafted,...,1.5,0.0,-17.4,0.25,0.111,0.279,0.3,0.0,1996-97,Houston
4,97,Eddie Johnson,38.0,200.66,97.52228,Illinois,USA,1981,2,29,...,2.7,1.0,4.1,0.034,0.126,0.22,0.541,0.102,1996-97,Houston


In [92]:
# drop the unnamed column
playerData.drop('Unnamed: 0', axis=1, inplace=True)

playerData.dtypes

player_name       object
age              float64
player_height    float64
player_weight    float64
college           object
country           object
draft_year        object
draft_round       object
draft_number      object
gp                 int64
pts              float64
reb              float64
ast              float64
net_rating       float64
oreb_pct         float64
dreb_pct         float64
usg_pct          float64
ts_pct           float64
ast_pct          float64
season            object
team              object
dtype: object

In [93]:
# look for null values
playerData.isnull().sum()

player_name         0
age                 0
player_height       0
player_weight       0
college          1852
country             0
draft_year          0
draft_round         0
draft_number        0
gp                  0
pts                 0
reb                 0
ast                 0
net_rating          0
oreb_pct            0
dreb_pct            0
usg_pct             0
ts_pct              0
ast_pct             0
season              0
team                0
dtype: int64

### Data Preprocessing

We need to deal with the columns we want to keep and also all the categorial data cols of:

player_name           object

team_abbreviation     object

college               object

country               object

draft_year            object

draft_round           object

draft_number          object

season                object

#### **Drop Some of Them**

In [94]:
playerData.drop(['player_name', 'college', 'draft_year', 'draft_round', 'draft_number', 'country'], axis=1, inplace=True)

In [95]:
playerData.head()

Unnamed: 0,age,player_height,player_weight,gp,pts,reb,ast,net_rating,oreb_pct,dreb_pct,usg_pct,ts_pct,ast_pct,season,team
0,22.0,193.04,94.800728,64,3.9,1.5,2.4,0.3,0.042,0.071,0.169,0.487,0.248,1996-97,Houston
1,34.0,213.36,115.66596,78,23.2,9.2,3.0,6.5,0.075,0.206,0.308,0.558,0.158,1996-97,Houston
2,28.0,195.58,87.996848,13,5.0,1.7,2.0,6.6,0.011,0.098,0.144,0.565,0.191,1996-97,Houston
3,24.0,200.66,95.25432,2,1.5,1.5,0.0,-17.4,0.25,0.111,0.279,0.3,0.0,1996-97,Houston
4,38.0,200.66,97.52228,52,8.2,2.7,1.0,4.1,0.034,0.126,0.22,0.541,0.102,1996-97,Houston


In [96]:
# categorical code season col but i want to keep the original
playerData['season'] = pd.Categorical(playerData['season']).codes + 1997

# drop the rows where seasonEncoded is less than 7. This keeps the season of 03-04 and later
playerData = playerData[playerData['season'] >= 2004]

playerData.reset_index(drop=True, inplace=True)

#### **Add in Win Rates**

In [97]:
def winRateFromYear(year):

    winRateDf = pd.read_html(f'https://www.teamrankings.com/nba/stat/win-pct-all-games?date={year}-06-16')[0]

    winRateDf['Win PCT']= winRateDf[f'{year - 1}'] 

    winRateDf['season'] = year

    winRateDf = winRateDf[['Team', 'Win PCT', 'season']]

    return winRateDf

**Merge DF2 with df on Team Names**

In [98]:
def getWinRates():
    # merge the two dataframes on team with df and teams on team where season is 2004
    winRateDf = pd.DataFrame()

    for year in range(2004, 2024):
        winRateDf = pd.concat([winRateDf, winRateFromYear(year)], ignore_index=True)

    winRateDf.tail()

    return winRateDf

winRateDf = pd.read_csv('winRate.csv')


In [99]:
winRateDf.head()

Unnamed: 0,Team,Win PCT,season
0,Indiana,0.725,2004
1,San Antonio,0.685,2004
2,Minnesota,0.68,2004
3,Detroit,0.667,2004
4,LA Lakers,0.664,2004


**TODO: MERGE THE DFS**

In [100]:
updatedPlayerData = pd.merge(playerData, winRateDf, left_on=['team', 'season'], right_on=['Team', 'season'])

updatedPlayerData.drop(['Team'], axis=1, inplace=True)

updatedPlayerData.head()

Unnamed: 0,age,player_height,player_weight,gp,pts,reb,ast,net_rating,oreb_pct,dreb_pct,usg_pct,ts_pct,ast_pct,season,team,Win PCT
0,40.0,205.74,111.13004,7,1.3,0.7,0.3,-7.9,0.0,0.217,0.165,0.521,0.1,2004,Houston,0.529
1,33.0,200.66,122.46984,52,5.0,3.9,0.6,-5.0,0.104,0.175,0.152,0.538,0.063,2004,Houston,0.529
2,28.0,193.04,97.52228,80,15.8,4.5,3.2,1.8,0.015,0.112,0.2,0.535,0.144,2004,Houston,0.529
3,23.0,205.74,100.243832,45,3.1,1.6,0.7,2.0,0.021,0.136,0.159,0.477,0.103,2004,Houston,0.529
4,29.0,195.58,104.32616,19,0.6,1.0,0.5,-8.4,0.01,0.167,0.093,0.278,0.132,2004,Houston,0.529


In [101]:
updatedPlayerData['AVG Minutes Played'] = updatedPlayerData['gp'] * updatedPlayerData['usg_pct']

In [102]:
# Filter the df to only include the max 5 of games played from each seasonEncoded and Team
updatedPlayerData = updatedPlayerData.groupby(['team', 'season']).apply(lambda x: x.nlargest(5, 'AVG Minutes Played')).reset_index(drop=True)

updatedPlayerData.head()

Unnamed: 0,age,player_height,player_weight,gp,pts,reb,ast,net_rating,oreb_pct,dreb_pct,usg_pct,ts_pct,ast_pct,season,team,Win PCT,AVG Minutes Played
0,26.0,203.2,98.883056,80,18.1,4.6,3.1,-3.8,0.038,0.102,0.249,0.521,0.155,2004,Atlanta,0.342,19.92
1,26.0,187.96,81.64656,81,16.8,4.1,5.4,-4.0,0.019,0.106,0.231,0.519,0.261,2004,Atlanta,0.342,18.711
2,31.0,195.58,90.7184,80,7.5,4.1,2.9,0.8,0.071,0.158,0.191,0.51,0.24,2004,Atlanta,0.342,15.28
3,29.0,205.74,106.59412,56,10.2,3.1,0.8,-5.2,0.054,0.117,0.214,0.544,0.068,2004,Atlanta,0.342,11.984
4,29.0,185.42,86.18248,71,3.8,1.6,2.7,-9.8,0.011,0.094,0.143,0.44,0.256,2004,Atlanta,0.342,10.153


#### **Convert Player Stats into Starters' Team Stats**

In [103]:
teamData = updatedPlayerData.groupby(['season', 'team']).agg(
    ptsTotal=('pts', 'sum'),
    rebTotal=('reb', 'sum'),
    astTotal=('ast', 'sum'),
    MinutesPlayed=('AVG Minutes Played', 'sum'),
    averageAge=('age', 'mean'),
    averageHeight=('player_height', 'mean'),
    averageWeight=('player_weight', 'mean'),
    winRate=('Win PCT', 'first')
).reset_index()

teamData

Unnamed: 0,season,team,ptsTotal,rebTotal,astTotal,MinutesPlayed,averageAge,averageHeight,averageWeight,winRate
0,2004,Atlanta,56.4,17.5,14.9,76.048,28.2,195.580,92.804923,0.342
1,2004,Boston,58.8,21.7,13.2,87.015,24.0,200.152,99.608803,0.419
2,2004,Brooklyn,71.6,30.0,20.0,80.431,28.8,199.136,100.062395,0.581
3,2004,Chicago,62.5,24.9,16.1,77.090,27.8,199.644,100.788142,0.281
4,2004,Cleveland,67.7,31.5,11.6,82.232,26.0,207.772,110.313574,0.427
...,...,...,...,...,...,...,...,...,...,...
554,2023,Sacramento,87.8,26.9,21.8,85.421,26.4,200.152,95.072883,0.573
555,2023,San Antonio,64.1,19.4,17.0,67.671,23.8,194.056,93.439952,0.268
556,2023,Toronto,85.6,26.6,19.8,77.189,26.6,198.120,96.252222,0.494
557,2023,Utah,83.9,24.2,16.7,73.005,27.0,200.152,99.608803,0.451


In [104]:
winRate = teamData['winRate']

In [105]:
teamData.drop(['season','team','winRate'], axis=1, inplace=True)

teamData.head()

Unnamed: 0,ptsTotal,rebTotal,astTotal,MinutesPlayed,averageAge,averageHeight,averageWeight
0,56.4,17.5,14.9,76.048,28.2,195.58,92.804923
1,58.8,21.7,13.2,87.015,24.0,200.152,99.608803
2,71.6,30.0,20.0,80.431,28.8,199.136,100.062395
3,62.5,24.9,16.1,77.09,27.8,199.644,100.788142
4,67.7,31.5,11.6,82.232,26.0,207.772,110.313574


In [106]:
winRate.head()

0    0.342
1    0.419
2    0.581
3    0.281
4    0.427
Name: winRate, dtype: float64

In [107]:
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(teamData, winRate, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test_Scaled = scaler.transform(X_test)

In [108]:
model = keras.Sequential([
    keras.layers.Input(shape=(X_train.shape[1],)),  # Input layer
    keras.layers.Dense(64, activation='relu'),       # Hidden layer
    keras.layers.Dense(32, activation='relu'),       # Hidden layer
    keras.layers.Dense(1)                             # Output layer (for regression)
])

In [109]:
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

In [110]:
model.fit(X_train, y_train, epochs=100, batch_size=5, validation_split=0.2, verbose=0)

<keras.src.callbacks.history.History at 0x244365e5050>

In [111]:
# Example input for a new set of NBA players

predicted_win_rates = model.predict(X_test_Scaled)
actual_win_rates = y_test.values

# Display predictions alongside actual values
for pred, actual in zip(predicted_win_rates.flatten(), actual_win_rates):
    print(f'Predicted: {pred:.3f}, Actual: {actual:.3f}')

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Predicted: 0.714, Actual: 0.679
Predicted: 0.200, Actual: 0.339
Predicted: 0.337, Actual: 0.317
Predicted: 0.259, Actual: 0.750
Predicted: 0.414, Actual: 0.471
Predicted: 0.643, Actual: 0.511
Predicted: 0.620, Actual: 0.354
Predicted: 0.446, Actual: 0.500
Predicted: 0.504, Actual: 0.640
Predicted: 0.539, Actual: 0.792
Predicted: 0.562, Actual: 0.293
Predicted: 0.460, Actual: 0.628
Predicted: 0.428, Actual: 0.317
Predicted: 0.693, Actual: 0.725
Predicted: 0.630, Actual: 0.638
Predicted: 0.574, Actual: 0.632
Predicted: 0.611, Actual: 0.644
Predicted: 0.523, Actual: 0.725
Predicted: 0.690, Actual: 0.523
Predicted: 0.560, Actual: 0.596
Predicted: 0.582, Actual: 0.470
Predicted: 0.567, Actual: 0.563
Predicted: 0.504, Actual: 0.506
Predicted: 0.484, Actual: 0.512
Predicted: 0.300, Actual: 0.534
Predicted: 0.500, Actual: 0.613
Predicted: 0.695, Actual: 0.656
Predicted: 0.384, Actual: 0.232
Predicted: 0.343, Actual: 0.402


In [112]:
# Calculate regression metrics
mae = mean_absolute_error(actual_win_rates, predicted_win_rates)
mse = mean_squared_error(actual_win_rates, predicted_win_rates)
r2 = r2_score(actual_win_rates, predicted_win_rates)

print(f'Mean Absolute Error: {mae:.2f}')
print(f'Mean Squared Error: {mse:.2f}')
print(f'R-squared: {r2:.2f}')

Mean Absolute Error: 0.11
Mean Squared Error: 0.02
R-squared: 0.12


In [113]:
# Define weights for each statistic
weights = {
    'pts': 0.3,  # Weight for points
    'ast': 0.3,  # Weight for assists
    'reb': 0.4   # Weight for rebounds
}

# Calculate combined score
updatedPlayerData['combined_score'] = (updatedPlayerData['pts'] * weights['pts'] +
                        updatedPlayerData['ast'] * weights['ast'] +
                        updatedPlayerData['reb'] * weights['reb'])

In [114]:
top_players = updatedPlayerData.sort_values(by='combined_score', ascending=False).head(10)

In [115]:
top_players

Unnamed: 0,age,player_height,player_weight,gp,pts,reb,ast,net_rating,oreb_pct,dreb_pct,usg_pct,ts_pct,ast_pct,season,team,Win PCT,AVG Minutes Played,combined_score
785,27.0,210.82,128.820128,74,27.1,13.8,7.9,8.4,0.09,0.313,0.309,0.661,0.388,2022,Denver,0.563,22.866,16.02
1575,25.0,210.82,109.769264,63,29.5,13.6,5.6,15.4,0.068,0.307,0.363,0.613,0.328,2020,Milwaukee,0.815,22.869,15.97
1590,28.0,213.36,110.222856,63,31.1,11.8,5.7,7.2,0.065,0.268,0.373,0.605,0.314,2023,Milwaukee,0.678,23.499,15.76
1070,29.0,195.58,99.79024,78,36.1,6.6,7.5,6.3,0.023,0.157,0.396,0.616,0.394,2019,Houston,0.634,30.888,15.72
690,24.0,200.66,104.32616,66,32.4,8.6,8.0,2.1,0.024,0.224,0.368,0.609,0.408,2023,Dallas,0.463,24.288,15.56
1585,27.0,210.82,109.769264,67,29.9,11.6,5.8,8.0,0.06,0.267,0.34,0.633,0.304,2022,Milwaukee,0.617,22.78,15.35
1060,27.0,195.58,99.79024,81,29.1,8.1,11.2,6.3,0.035,0.212,0.341,0.613,0.505,2017,Houston,0.656,27.621,15.33
2090,29.0,213.36,127.00576,66,33.1,10.2,4.2,8.8,0.057,0.243,0.37,0.655,0.233,2023,Philadelphia,0.656,24.42,15.27
1075,30.0,195.58,99.79024,68,34.3,6.6,7.5,5.8,0.026,0.139,0.356,0.626,0.366,2020,Houston,0.625,24.208,15.18
2085,28.0,213.36,127.00576,68,30.6,11.7,4.2,7.9,0.068,0.273,0.375,0.616,0.236,2022,Philadelphia,0.606,25.5,15.12


In [116]:
# Get all combinations of 5 players
combinations_of_5 = list(combinations(top_players.index, 5))

In [117]:
print(combinations_of_5)

[(785, 1575, 1590, 1070, 690), (785, 1575, 1590, 1070, 1585), (785, 1575, 1590, 1070, 1060), (785, 1575, 1590, 1070, 2090), (785, 1575, 1590, 1070, 1075), (785, 1575, 1590, 1070, 2085), (785, 1575, 1590, 690, 1585), (785, 1575, 1590, 690, 1060), (785, 1575, 1590, 690, 2090), (785, 1575, 1590, 690, 1075), (785, 1575, 1590, 690, 2085), (785, 1575, 1590, 1585, 1060), (785, 1575, 1590, 1585, 2090), (785, 1575, 1590, 1585, 1075), (785, 1575, 1590, 1585, 2085), (785, 1575, 1590, 1060, 2090), (785, 1575, 1590, 1060, 1075), (785, 1575, 1590, 1060, 2085), (785, 1575, 1590, 2090, 1075), (785, 1575, 1590, 2090, 2085), (785, 1575, 1590, 1075, 2085), (785, 1575, 1070, 690, 1585), (785, 1575, 1070, 690, 1060), (785, 1575, 1070, 690, 2090), (785, 1575, 1070, 690, 1075), (785, 1575, 1070, 690, 2085), (785, 1575, 1070, 1585, 1060), (785, 1575, 1070, 1585, 2090), (785, 1575, 1070, 1585, 1075), (785, 1575, 1070, 1585, 2085), (785, 1575, 1070, 1060, 2090), (785, 1575, 1070, 1060, 1075), (785, 1575, 1070, 

In [121]:
createdTeams = []

for combo in combinations_of_5:
    team = pd.DataFrame()
    
    for player in combo:
        # print(player)
        # print(top_players.loc[player].to_frame().T)
        team = pd.concat([team, top_players.loc[player].to_frame().T])

    team['team'] = 1

    print(team)
    createdTeams.append(team.groupby(['team']).agg(
        ptsTotal=('pts', 'sum'),
        rebTotal=('reb', 'sum'),
        astTotal=('ast', 'sum'),
        MinutesPlayed=('AVG Minutes Played', 'sum'),
        averageAge=('age', 'mean'),
        averageHeight=('player_height', 'mean'),
        averageWeight=('player_weight', 'mean'),
    ).reset_index())

       age player_height player_weight  gp   pts   reb  ast net_rating  \
785   27.0        210.82    128.820128  74  27.1  13.8  7.9        8.4   
1575  25.0        210.82    109.769264  63  29.5  13.6  5.6       15.4   
1590  28.0        213.36    110.222856  63  31.1  11.8  5.7        7.2   
1070  29.0        195.58      99.79024  78  36.1   6.6  7.5        6.3   
690   24.0        200.66     104.32616  66  32.4   8.6  8.0        2.1   

     oreb_pct dreb_pct usg_pct ts_pct ast_pct season  team Win PCT  \
785      0.09    0.313   0.309  0.661   0.388   2022     1   0.563   
1575    0.068    0.307   0.363  0.613   0.328   2020     1   0.815   
1590    0.065    0.268   0.373  0.605   0.314   2023     1   0.678   
1070    0.023    0.157   0.396  0.616   0.394   2019     1   0.634   
690     0.024    0.224   0.368  0.609   0.408   2023     1   0.463   

     AVG Minutes Played combined_score  
785              22.866          16.02  
1575             22.869          15.97  
1590       

In [123]:
for i in range(len(createdTeams)):
    createdTeams[i].drop(['team'], axis=1, inplace=True)

In [134]:
teamWinPCTs = []
for team in createdTeams:
    teamWinPCTs.append(model.predict(scaler.transform(team)))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18

In [140]:
final_teamWinPCTs = [pct_array[0,0] for pct_array in teamWinPCTs]

In [145]:
maxPercentage = max(final_teamWinPCTs)
maxIndex = final_teamWinPCTs.index(maxPercentage)

print(f"The maximum percentage is: {maxPercentage} at index: {maxIndex}")

The maximum percentage is: 1.0931907892227173 at index: 239


This is the best team. They are projected to win 109.3% of their games. How, I have no idea! They put up 162.5 pts/g, 41.6 reb/g and 38.4 ast/g.

In [146]:
createdTeams[maxIndex]

Unnamed: 0,ptsTotal,rebTotal,astTotal,MinutesPlayed,averageAge,averageHeight,averageWeight
0,162.5,41.6,38.4,132.505,27.6,200.152,106.140528


These are the indexes of the players in the **top_players** dataframe. You can find the stats of those players by doing the **top_players.loc[ *index_num* ]** seen below. I believe that player is James Harden, but we didn't save the names.

In [147]:
combinations_of_5[maxIndex]

(1070, 690, 1060, 1075, 2085)

In [148]:
top_players.loc[1070]

age                       29.0
player_height           195.58
player_weight         99.79024
gp                          78
pts                       36.1
reb                        6.6
ast                        7.5
net_rating                 6.3
oreb_pct                 0.023
dreb_pct                 0.157
usg_pct                  0.396
ts_pct                   0.616
ast_pct                  0.394
season                    2019
team                   Houston
Win PCT                  0.634
AVG Minutes Played      30.888
combined_score           15.72
Name: 1070, dtype: object