This project will train a neural network to classify what position (G, F, C) an NBA player likely is based on their height, weight, and recent averages (points, rebounds, assists). Data is retreieved from the balldontlie API.

In [None]:
import numpy as np
import operator
import requests
import json
np.random.seed(3030)

#Get player id's and other identifying data
#Wait 20-30 seconds between consecutive uses of this cell and the cell below (API restrictions)
players = {}
players_api = requests.get('https://www.balldontlie.io/api/v1/players', params={'per_page': 100})
players_data = json.loads(players_api.text)
player_pages = players_data['meta']['total_pages']

for i in range(player_pages + 1):
  players_api = requests.get('https://www.balldontlie.io/api/v1/players', params={'page': i, 'per_page': 100})
  players_data = json.loads(players_api.text)
  for player in players_data['data']:
    if player['position'] and player['height_feet'] and player['weight_pounds']:
      players[player['id']] = [12 * player['height_feet'] + player['height_inches'], player['weight_pounds'], player['position']]
  
print(len(players))

458


In [None]:
#Get stats for all players (going back years for retired players)
#Wait 20-30 seconds between consecutive uses of this cell and the cell above (API restrictions)
stats = {}
for year in list(range(2000, 2021))[::-1]:
  stats_request_string = 'https://www.balldontlie.io/api/v1/season_averages?season=' + str(year)
  for id in players:
    stats_request_string += '&player_ids[]=' + str(id)

  stats_api = requests.get(stats_request_string)
  raw_stats_data = json.loads(stats_api.text)['data']

  for val in raw_stats_data:
    if val['player_id'] not in stats:
      stats[val['player_id']] = [val['pts'], val['reb'], val['ast']]
print(len(stats))

457


In [None]:
from sklearn import preprocessing

#Combine players + stats into one dict {id: [height in inches, weight, position, points, rebounds, assists]}
#Then, separate the data into lists of guards, forwards, and centers
raw_data = {}
for player_id in stats:
  raw_data[player_id] = players[player_id] + stats[player_id]

#guards, forwards, and centers
g, f, c = [], [], []
for id in raw_data:
  pos = raw_data[id].pop(2)
  if pos == 'G':
    g.append(raw_data[id])
  elif 'F' in pos and raw_data[id][0] <= 81:
    f.append(raw_data[id])
  else:
    c.append(raw_data[id])

#Normalize the data
total = g + f + c
scaler = preprocessing.StandardScaler().fit(total)
g = np.array(scaler.transform(g))
f = np.array(scaler.transform(f))
c = np.array(scaler.transform(c))

def clean_data(player):
  return scaler.transform(np.array(player).reshape(1, -1))

print(len(g), g[:10])
print(len(f), f[:10])
print(len(c), c[:10])

177 [[-1.53480940e+00 -1.20587317e+00 -1.19740409e+00 -1.23990767e+00
  -9.22325646e-01]
 [-6.27425318e-01 -8.67707006e-01  2.23978644e-01 -1.42069794e-01
   5.94234824e-02]
 [-1.23234804e+00 -7.83165465e-01 -9.18588874e-01 -9.25115447e-01
  -5.53520418e-01]
 [-2.13973212e+00 -1.50176857e+00 -1.69811922e-01 -7.12630697e-01
   6.25617424e-01]
 [-3.24963957e-01 -1.20587317e+00  8.76463743e-01  5.50473095e-01
   1.98656066e+00]
 [-3.24963957e-01 -1.20587317e+00  6.06271676e-01  2.59290289e-01
   6.77561823e-01]
 [-6.27425318e-01 -7.40894694e-01 -2.17239253e-01 -3.18925162e-02
  -1.84715190e-01]
 [-6.27425318e-01 -4.87270068e-01  3.18603104e+00  5.22928776e-01
   1.26972796e+00]
 [-6.27425318e-01 -9.94519319e-01  9.48323335e-01 -4.13293946e-04
  -4.96597541e-02]
 [-1.83727076e+00 -1.41722703e+00 -1.39630893e-01 -6.33717384e-02
   6.46179222e-02]]
157 [[ 0.58242013  0.06224995 -0.89990538 -0.38996867 -0.65221478]
 [ 0.58242013  0.48495766  0.56028154  0.92822376  0.88014498]
 [ 0.27995877  

In [None]:
#Creating the finalized training data + labels 
data = []
for guard in g:
  data.append((guard, 0))
for forward in f:
  data.append((forward, 1))
for center in c:
  data.append((center, 2))

np.random.shuffle(data)
data[:10]

[(array([ 0.88488149,  0.40041612, -0.79642757, -0.61032322, -0.86518681]), 2),
 (array([-0.0225026 ,  0.48495766, -0.20574172,  1.46730544,  3.60722589]), 1),
 (array([ 0.88488149,  0.90766537, -1.16434868, -1.11792568, -0.92232565]), 2),
 (array([ 1.18734285,  0.78085306,  0.6192064 ,  2.61629705, -0.12757635]), 2),
 (array([ 0.27995877,  0.90766537, -1.19021813, -1.21236335, -1.00024224]), 1),
 (array([-0.32496396,  1.11901923, -0.70588448,  0.15698282, -0.41327054]), 1),
 (array([-0.92988668, -0.1491039 ,  1.1653393 , -0.54342988,  0.27239552]), 0),
 (array([ 0.27995877,  1.11901923, -1.03068984, -1.03135782, -0.89635345]), 1),
 (array([ 1.18734285,  0.06224995, -0.4917429 ,  1.18399244, -0.62624258]), 2),
 (array([0.58242013, 0.48495766, 1.79626652, 1.46730544, 1.28011684]), 1)]

In [None]:
import torch
import torch.nn as nn
import tqdm

In [None]:
model = nn.Sequential(
  nn.Linear(5, 4),
  nn.ReLU(),
  nn.Linear(4, 4),
  nn.ReLU(),
  nn.Linear(4, 3),
  nn.Softmax(-1)
).cuda()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [None]:
for epoch in range(50):
  losses = []
  for player, label in tqdm.tqdm(data):
    # Process data
    player = torch.tensor(player).float().reshape((1, -1)).cuda()
    label = torch.tensor(label).long().reshape((1,)).cuda()
    
    # Get our guess
    guess = model(player)

    # Get the loss
    loss = nn.functional.nll_loss(guess.log(), label)

    # Optimize the model
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    losses.append(loss.item())
  print("LOSS", np.mean(losses))

100%|██████████| 457/457 [00:00<00:00, 919.79it/s]


LOSS 0.4649840674528147


100%|██████████| 457/457 [00:00<00:00, 911.42it/s]


LOSS 0.4597144982567632


100%|██████████| 457/457 [00:00<00:00, 917.73it/s]


LOSS 0.4545910803232285


100%|██████████| 457/457 [00:00<00:00, 919.72it/s]


LOSS 0.44962665707859284


100%|██████████| 457/457 [00:00<00:00, 929.03it/s]


LOSS 0.44478506603047935


100%|██████████| 457/457 [00:00<00:00, 922.36it/s]


LOSS 0.4400844206255035


100%|██████████| 457/457 [00:00<00:00, 921.05it/s]


LOSS 0.43553105274651455


100%|██████████| 457/457 [00:00<00:00, 932.93it/s]


LOSS 0.4310642577273694


100%|██████████| 457/457 [00:00<00:00, 1013.55it/s]


LOSS 0.4266722795657439


100%|██████████| 457/457 [00:00<00:00, 979.94it/s]


LOSS 0.4223804091576169


100%|██████████| 457/457 [00:00<00:00, 1000.06it/s]


LOSS 0.41817741473745434


100%|██████████| 457/457 [00:00<00:00, 992.77it/s]


LOSS 0.41411026069971874


100%|██████████| 457/457 [00:00<00:00, 982.83it/s]


LOSS 0.4101486427111335


100%|██████████| 457/457 [00:00<00:00, 962.37it/s]


LOSS 0.4062665420155402


100%|██████████| 457/457 [00:00<00:00, 991.67it/s]


LOSS 0.40247101196815427


100%|██████████| 457/457 [00:00<00:00, 993.03it/s]


LOSS 0.3987650486805702


100%|██████████| 457/457 [00:00<00:00, 966.45it/s]


LOSS 0.3951844659351685


100%|██████████| 457/457 [00:00<00:00, 943.43it/s]


LOSS 0.39169108929546625


100%|██████████| 457/457 [00:00<00:00, 980.80it/s]


LOSS 0.38832821650277716


100%|██████████| 457/457 [00:00<00:00, 976.93it/s]


LOSS 0.38502751804834245


100%|██████████| 457/457 [00:00<00:00, 985.82it/s] 


LOSS 0.3818232852578901


100%|██████████| 457/457 [00:00<00:00, 993.94it/s]


LOSS 0.3786530535621263


100%|██████████| 457/457 [00:00<00:00, 966.66it/s]


LOSS 0.3755646913594625


100%|██████████| 457/457 [00:00<00:00, 954.94it/s]


LOSS 0.3725148079617772


100%|██████████| 457/457 [00:00<00:00, 970.31it/s]


LOSS 0.3695274271171324


100%|██████████| 457/457 [00:00<00:00, 985.46it/s]


LOSS 0.3666855874682799


100%|██████████| 457/457 [00:00<00:00, 999.91it/s]


LOSS 0.3639161141958145


100%|██████████| 457/457 [00:00<00:00, 964.33it/s]


LOSS 0.36119303443172085


100%|██████████| 457/457 [00:00<00:00, 993.78it/s]


LOSS 0.3585252722115684


100%|██████████| 457/457 [00:00<00:00, 977.19it/s]


LOSS 0.3559563787525654


100%|██████████| 457/457 [00:00<00:00, 981.11it/s]


LOSS 0.3534404964626184


100%|██████████| 457/457 [00:00<00:00, 959.39it/s]


LOSS 0.3509993078402362


100%|██████████| 457/457 [00:00<00:00, 956.52it/s]


LOSS 0.3485842694953049


100%|██████████| 457/457 [00:00<00:00, 984.03it/s]


LOSS 0.3461964181682943


100%|██████████| 457/457 [00:00<00:00, 944.66it/s]


LOSS 0.3438667359598734


100%|██████████| 457/457 [00:00<00:00, 967.26it/s]


LOSS 0.3415662490404099


100%|██████████| 457/457 [00:00<00:00, 972.79it/s]


LOSS 0.3393361417545338


100%|██████████| 457/457 [00:00<00:00, 921.02it/s]


LOSS 0.3371795304532263


100%|██████████| 457/457 [00:00<00:00, 960.79it/s]


LOSS 0.33506764362477526


100%|██████████| 457/457 [00:00<00:00, 977.85it/s]


LOSS 0.3329603581205805


100%|██████████| 457/457 [00:00<00:00, 970.85it/s]


LOSS 0.3308718550988627


100%|██████████| 457/457 [00:00<00:00, 951.20it/s]


LOSS 0.32882354316093143


100%|██████████| 457/457 [00:00<00:00, 967.72it/s]


LOSS 0.32675058029191334


100%|██████████| 457/457 [00:00<00:00, 980.88it/s]


LOSS 0.324738270555548


100%|██████████| 457/457 [00:00<00:00, 948.07it/s]


LOSS 0.3227654695506725


100%|██████████| 457/457 [00:00<00:00, 984.08it/s]


LOSS 0.32087013271410036


100%|██████████| 457/457 [00:00<00:00, 968.14it/s]


LOSS 0.31899106960953316


100%|██████████| 457/457 [00:00<00:00, 991.29it/s]


LOSS 0.31712389203872765


100%|██████████| 457/457 [00:00<00:00, 994.66it/s] 


LOSS 0.3152801246356727


100%|██████████| 457/457 [00:00<00:00, 975.44it/s]

LOSS 0.3134081972497951





In [None]:
def predict_pos(model, player):
  new_player = clean_data(player)
  new_player = torch.tensor(new_player).float().reshape(1, -1).cuda()
  result = model(new_player).cpu().detach().numpy().tolist()[0]
  max_result = max(result)
  if result[0] == max_result:
    print("Your player is likely a guard!")
  elif result[1] == max_result:
    print("Your player is likely a forward!")
  else:
    print("Your player is likely a center!")

In [None]:
# [height (inches), weight (pounds), points, rebounds, assists]

small_player = [72, 170, 15, 3, 10]
fat_player = [82, 350, 18, 10, 10]
kevin_durant = [82, 225, 30, 5, 5]

predict_pos(model, kevin_durant)

Your player is likely a forward!
