# Background
Every year me and a group of my friends make an NBA fantasy basketball league. We each put in $100 and then winner of the fantasy league gets the money and often times the loser of the league has to do a dare that we pick before the season starts. Safe to say I definetely want to do good in this league every year. I have always used the approach of just who I think will do good in the season when it comes to picking my players but what if there was a way to use machine learning to help me come up with players to pick. In this notebook I will go over my attempt to make a model that will help predict NBA player performance that will hopefully give me an edge over my friends for the next fantasy season.

In order to do this I will be scraping data from a website called basketball reference that has all the players stats for almost every season.

In [1]:
# Import needed dependencies
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import date
from bs4 import BeautifulSoup, Comment

In [2]:
#Get a list of the last 5 years to collect NBA data from starting with 2023
last_five_years = []
year = 2023
while year > 2018:
    last_five_years.append(year)
    year -= 1

In [3]:
last_five_years

[2023, 2022, 2021, 2020, 2019]

In [4]:
#Empty list I will use to gather data
data_rows = []
for year in last_five_years:
    #Get URL for each year
    URL = f'https://www.basketball-reference.com/leagues/NBA_{year}_totals.html'
    r = requests.get(URL)
    soup = BeautifulSoup(r.content, 'lxml')

    #Find part in the html that has the table data
    results = soup.find('div', id = 'div_totals_stats')
    #Get all the data in the table and add it to the empty list we created
    table_data = results.find_all('tr')
    for row in table_data[1:]:
        #Find all table data
        data = row.find_all('td')
        individual_data = [i.text for i in data]
        individual_data.append(year)
        #Add data to the empty list
        data_rows.append(individual_data)

In [5]:
data_rows

[['Precious Achiuwa',
  'C',
  '23',
  'TOR',
  '55',
  '12',
  '1140',
  '196',
  '404',
  '.485',
  '29',
  '108',
  '.269',
  '167',
  '296',
  '.564',
  '.521',
  '87',
  '124',
  '.702',
  '100',
  '228',
  '328',
  '50',
  '31',
  '30',
  '59',
  '102',
  '508',
  2023],
 ['Steven Adams',
  'C',
  '29',
  'MEM',
  '42',
  '42',
  '1133',
  '157',
  '263',
  '.597',
  '0',
  '1',
  '.000',
  '157',
  '262',
  '.599',
  '.597',
  '47',
  '129',
  '.364',
  '214',
  '271',
  '485',
  '97',
  '36',
  '46',
  '79',
  '98',
  '361',
  2023],
 ['Bam Adebayo',
  'C',
  '25',
  'MIA',
  '75',
  '75',
  '2598',
  '602',
  '1114',
  '.540',
  '1',
  '12',
  '.083',
  '601',
  '1102',
  '.545',
  '.541',
  '324',
  '402',
  '.806',
  '184',
  '504',
  '688',
  '240',
  '88',
  '61',
  '187',
  '208',
  '1529',
  2023],
 ['Ochai Agbaji',
  'SG',
  '22',
  'UTA',
  '59',
  '22',
  '1209',
  '165',
  '386',
  '.427',
  '81',
  '228',
  '.355',
  '84',
  '158',
  '.532',
  '.532',
  '56',
  '69'

In [6]:
#Get a list of all the headers
data_headers = results.find_all('th', class_ = ['poptip center','poptip sort_default_asc center','ranker poptip sort_default_asc show_partial_when_sorting center','poptip hide_non_quals center'])
for row in data_headers:
    titles = [i.text for i in data_headers]

In [7]:
#Take the first 30 columns in the list because it keeps repeating
titles = titles[1:30]

In [8]:
#Create an extra column for the year
titles.append('Year')
titles

['Player',
 'Pos',
 'Age',
 'Tm',
 'G',
 'GS',
 'MP',
 'FG',
 'FGA',
 'FG%',
 '3P',
 '3PA',
 '3P%',
 '2P',
 '2PA',
 '2P%',
 'eFG%',
 'FT',
 'FTA',
 'FT%',
 'ORB',
 'DRB',
 'TRB',
 'AST',
 'STL',
 'BLK',
 'TOV',
 'PF',
 'PTS',
 'Year']

In [9]:
#Create the dataframe with all the NBA player data for past 5 years
df = pd.DataFrame(data_rows, columns= titles)

In [10]:
pd.set_option('display.max_columns', None)
df

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Year
0,Precious Achiuwa,C,23,TOR,55,12,1140,196,404,.485,29,108,.269,167,296,.564,.521,87,124,.702,100,228,328,50,31,30,59,102,508,2023.0
1,Steven Adams,C,29,MEM,42,42,1133,157,263,.597,0,1,.000,157,262,.599,.597,47,129,.364,214,271,485,97,36,46,79,98,361,2023.0
2,Bam Adebayo,C,25,MIA,75,75,2598,602,1114,.540,1,12,.083,601,1102,.545,.541,324,402,.806,184,504,688,240,88,61,187,208,1529,2023.0
3,Ochai Agbaji,SG,22,UTA,59,22,1209,165,386,.427,81,228,.355,84,158,.532,.532,56,69,.812,43,78,121,67,16,15,41,99,467,2023.0
4,Santi Aldama,PF,22,MEM,77,20,1682,247,525,.470,94,266,.353,153,259,.591,.560,108,144,.750,85,286,371,97,45,48,60,143,696,2023.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3684,Tyler Zeller,C,29,MEM,4,1,82,16,28,.571,0,0,,16,28,.571,.571,14,18,.778,9,9,18,3,1,3,4,16,46,2019.0
3685,Ante Žižić,C,22,CLE,59,25,1082,183,331,.553,0,0,,183,331,.553,.553,93,132,.705,108,212,320,53,13,22,61,113,459,2019.0
3686,Ivica Zubac,C,21,TOT,59,37,1040,212,379,.559,0,0,,212,379,.559,.559,101,126,.802,115,247,362,63,14,51,70,137,525,2019.0
3687,Ivica Zubac,C,21,LAL,33,12,516,112,193,.580,0,0,,112,193,.580,.580,57,66,.864,54,108,162,25,4,27,33,73,281,2019.0


In [11]:
#Check null values
df.isnull().sum()

Player      0
Pos       134
Age       134
Tm        134
G         134
GS        134
MP        134
FG        134
FGA       134
FG%       134
3P        134
3PA       134
3P%       134
2P        134
2PA       134
2P%       134
eFG%      134
FT        134
FTA       134
FT%       134
ORB       134
DRB       134
TRB       134
AST       134
STL       134
BLK       134
TOV       134
PF        134
PTS       134
Year      134
dtype: int64

In [12]:
#Drop NA rows
df = df.dropna()

In [44]:
df.isnull().sum()

Player    0
Pos       0
Age       0
Tm        0
G         0
GS        0
MP        0
FG        0
FGA       0
FG%       0
3P        0
3PA       0
3P%       0
2P        0
2PA       0
2P%       0
eFG%      0
FT        0
FTA       0
FT%       0
ORB       0
DRB       0
TRB       0
AST       0
STL       0
BLK       0
TOV       0
PF        0
PTS       0
Year      0
dtype: int64

In [45]:
df.dtypes

Player     object
Pos        object
Age        object
Tm         object
G          object
GS         object
MP         object
FG         object
FGA        object
FG%        object
3P         object
3PA        object
3P%        object
2P         object
2PA        object
2P%        object
eFG%       object
FT         object
FTA        object
FT%        object
ORB        object
DRB        object
TRB        object
AST        object
STL        object
BLK        object
TOV        object
PF         object
PTS        object
Year      float64
dtype: object

In [13]:
#Some of the columns have values '' so I'm going to change those to be 0
#Convert empty string to null values
df = df.replace('', float('nan'))
#Fill null values with 0
df = df.fillna(0)

In [14]:
#Change the data types of some of the columns
df[['Age','MP','FG','FGA','G','GS','3P','3PA','2P','2PA','FT','FTA','ORB','DRB','TRB','AST','STL','BLK','TOV','PF','PTS','Year']] = df[['Age','MP','FG','FGA','G','GS','3P','3PA','2P','2PA','FT','FTA','ORB','DRB','TRB','AST','STL','BLK','TOV','PF','PTS','Year']].astype(int)
df[['FG%','3P%','2P%','eFG%','FT%']] = df[['FG%','3P%','2P%','eFG%','FT%']].astype(float)

In [70]:
df.dtypes

Player     object
Pos        object
Age         int64
Tm         object
G           int64
GS          int64
MP          int64
FG          int64
FGA         int64
FG%       float64
3P          int64
3PA         int64
3P%       float64
2P          int64
2PA         int64
2P%       float64
eFG%      float64
FT          int64
FTA         int64
FT%       float64
ORB         int64
DRB         int64
TRB         int64
AST         int64
STL         int64
BLK         int64
TOV         int64
PF          int64
PTS         int64
Year        int64
dtype: object

In [71]:
df.describe()

Unnamed: 0,Age,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Year
count,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0,3555.0
mean,25.976653,39.610689,18.165401,893.726301,150.731083,325.57609,0.440541,45.152743,126.156118,0.296099,105.57834,199.419972,0.49985,0.505243,64.264698,83.004782,0.696893,37.682982,126.292827,163.975809,90.800281,27.970183,17.692546,49.404782,74.624191,410.879606,2021.028973
std,4.128121,24.51703,24.096486,757.644408,156.57488,330.293921,0.124397,54.352194,143.825715,0.14989,120.923806,222.881417,0.150099,0.128314,85.633181,106.342648,0.236381,46.847577,128.140282,168.499771,113.361787,27.026243,23.514929,53.16055,61.353226,432.91456,1.404232
min,19.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2019.0
25%,23.0,17.0,0.0,206.0,26.0,60.0,0.398,3.0,13.0,0.25,16.0,33.5,0.456,0.474,9.0,12.0,0.651,7.0,26.5,36.0,13.0,6.0,3.0,9.0,20.0,70.0,2020.0
50%,25.0,41.0,5.0,713.0,97.0,218.0,0.441,24.0,73.0,0.333,61.0,118.0,0.512,0.519,34.0,45.0,0.758,22.0,88.0,114.0,50.0,20.0,10.0,33.0,62.0,263.0,2021.0
75%,29.0,62.0,30.0,1464.5,230.0,498.0,0.495,68.0,199.0,0.3775,154.0,292.0,0.571,0.563,85.0,112.0,0.833,51.0,188.0,242.0,118.5,43.0,23.0,71.0,117.0,625.0,2022.0
max,43.0,83.0,83.0,3028.0,843.0,1909.0,1.0,378.0,1028.0,1.0,724.0,1393.0,1.0,1.5,754.0,858.0,1.0,423.0,813.0,1232.0,784.0,170.0,199.0,387.0,292.0,2818.0,2023.0


In [15]:
#Going to remove players with less than 25 games to make for better model
df = df[df['G'] >= 25]

In [73]:
df.describe()

Unnamed: 0,Age,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Year
count,2316.0,2316.0,2316.0,2316.0,2316.0,2316.0,2316.0,2316.0,2316.0,2316.0,2316.0,2316.0,2316.0,2316.0,2316.0,2316.0,2316.0,2316.0,2316.0,2316.0,2316.0,2316.0,2316.0,2316.0,2316.0,2316.0,2316.0
mean,26.193869,54.665371,26.753022,1282.978411,218.612263,470.764249,0.462343,65.355786,181.917962,0.321549,153.256477,288.846287,0.524886,0.531127,93.345855,120.265112,0.758565,53.769862,181.464594,235.234456,131.599741,40.099741,25.375216,71.21114,106.102332,595.926166,2021.021157
std,4.210444,15.56409,25.842242,656.713667,154.742271,323.817097,0.074132,57.419591,149.552834,0.108031,125.10769,228.865104,0.074365,0.058749,93.351682,114.811114,0.107861,50.628449,126.735428,168.165543,121.300982,26.023963,25.817426,53.980605,52.950381,431.232953,1.427887
min,19.0,26.0,0.0,138.0,11.0,16.0,0.29,0.0,0.0,0.0,4.0,6.0,0.194,0.322,0.0,0.0,0.0,0.0,10.0,12.0,1.0,0.0,0.0,2.0,7.0,28.0,2019.0
25%,23.0,42.0,3.0,737.75,97.0,219.0,0.414,20.0,65.0,0.3,60.0,115.75,0.477,0.493,32.0,43.0,0.7,21.0,88.0,113.75,49.0,20.0,9.0,33.0,64.0,262.75,2020.0
50%,26.0,56.0,17.0,1216.0,182.5,388.0,0.448,51.0,147.0,0.343,115.0,221.0,0.521,0.5275,63.0,83.0,0.775,37.0,152.5,196.0,92.0,35.0,17.0,56.0,101.0,487.0,2021.0
75%,29.0,67.0,50.0,1810.5,293.25,640.0,0.494,98.0,266.0,0.379,208.0,388.0,0.569,0.563,123.0,159.0,0.836,67.0,240.0,307.0,176.0,55.0,33.0,95.0,142.0,817.25,2022.0
max,43.0,83.0,83.0,3028.0,843.0,1909.0,0.819,378.0,1028.0,1.0,724.0,1393.0,0.897,0.819,754.0,858.0,1.0,423.0,813.0,1232.0,784.0,170.0,199.0,387.0,292.0,2818.0,2023.0


In [37]:
#Players who have been traded appear 3 times, one for the original team, one for the team they got traded to and one for total, only want total
#Make a dataset of all players with a TOT team row to use later to confirm it worked
TOT_player_list = df[df['Tm'] == 'TOT']['Player'].unique()
#Going to take the row that has the most G because TOT would have all the games combined for that year and then drop other 2 rows
df = df.sort_values(['Player','G']).drop_duplicates(['Player','Year'], keep = 'last').sort_index()

In [43]:
#Check to see if that worked using the player list we created earlier
df[df['Player'].isin(TOT_player_list)].sort_values('Player')

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Year
3213,Aaron Gordon,PF,23,ORL,78,78,2633,470,1046,0.449,121,347,0.349,349,699,0.499,0.507,185,253,0.731,129,445,574,289,57,56,162,172,1246,2019
1787,Aaron Gordon,PF,25,TOT,50,50,1384,231,499,0.463,59,176,0.335,172,323,0.533,0.522,97,149,0.651,77,207,284,161,33,34,97,89,618,2021
2518,Aaron Gordon,PF,24,ORL,62,62,2017,335,767,0.437,73,237,0.308,262,530,0.494,0.484,151,224,0.674,107,368,475,228,51,39,100,125,894,2020
974,Aaron Gordon,PF,26,DEN,75,75,2376,434,834,0.520,87,260,0.335,347,574,0.605,0.573,171,230,0.743,125,314,439,188,44,44,133,148,1126,2022
222,Aaron Gordon,PF,27,DEN,68,68,2055,429,761,0.564,60,173,0.347,369,588,0.628,0.603,191,314,0.608,164,282,446,203,54,51,98,129,1109,2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1672,Willie Cauley-Stein,C,27,DAL,53,16,906,115,182,0.632,1,11,0.091,114,171,0.667,0.635,49,78,0.628,66,170,236,35,21,43,29,129,280,2021
2397,Willie Cauley-Stein,C,26,TOT,54,39,1097,175,302,0.579,0,1,0.000,175,301,0.581,0.579,40,66,0.606,84,231,315,70,49,61,40,121,390,2020
3081,Willie Cauley-Stein,C,25,SAC,81,81,2213,412,741,0.556,1,2,0.500,411,739,0.556,0.557,140,254,0.551,181,497,678,194,96,51,84,227,965,2019
2401,Wilson Chandler,PF,32,BRK,35,3,734,74,183,0.404,37,121,0.306,37,62,0.597,0.505,20,23,0.870,11,134,145,39,16,12,34,79,205,2020


In [44]:
#Need to create 2 columns for fantasy points which are the points calculated and used to evaluate a players performance
df['FPTS'] = (df['PTS'] + df['FG'] - df['FGA'] + df['FT'] - df['FTA'] + df['TRB'] + df['AST'] + (1.5 * df['STL']) + (1.5 * df['BLK']) - df['TOV'])
df['AVG_FPTS'] = (df['FPTS']/df['G']).round(2)
df

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Year,FPTS,AVG_FPTS
0,Precious Achiuwa,C,23,TOR,55,12,1140,196,404,0.485,29,108,0.269,167,296,0.564,0.521,87,124,0.702,100,228,328,50,31,30,59,102,508,2023,673.5,12.25
1,Steven Adams,C,29,MEM,42,42,1133,157,263,0.597,0,1,0.000,157,262,0.599,0.597,47,129,0.364,214,271,485,97,36,46,79,98,361,2023,799.0,19.02
2,Bam Adebayo,C,25,MIA,75,75,2598,602,1114,0.540,1,12,0.083,601,1102,0.545,0.541,324,402,0.806,184,504,688,240,88,61,187,208,1529,2023,1903.5,25.38
3,Ochai Agbaji,SG,22,UTA,59,22,1209,165,386,0.427,81,228,0.355,84,158,0.532,0.532,56,69,0.812,43,78,121,67,16,15,41,99,467,2023,426.5,7.23
4,Santi Aldama,PF,22,MEM,77,20,1682,247,525,0.470,94,266,0.353,153,259,0.591,0.560,108,144,0.750,85,286,371,97,45,48,60,143,696,2023,929.5,12.07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3679,Thaddeus Young,PF,30,IND,81,81,2489,443,841,0.527,51,146,0.349,392,695,0.564,0.557,87,135,0.644,192,331,523,204,123,36,123,194,1024,2019,1420.5,17.54
3680,Trae Young,PG,20,ATL,81,81,2503,525,1256,0.418,156,482,0.324,369,774,0.477,0.480,343,414,0.829,64,237,301,653,72,15,308,140,1549,2019,1523.5,18.81
3681,Cody Zeller,C,26,CHO,49,47,1243,190,345,0.551,6,22,0.273,184,323,0.570,0.559,111,141,0.787,110,223,333,102,38,41,62,164,497,2019,803.5,16.40
3685,Ante Žižić,C,22,CLE,59,25,1082,183,331,0.553,0,0,0.000,183,331,0.553,0.553,93,132,0.705,108,212,320,53,13,22,61,113,459,2019,636.5,10.79


In [45]:
#Sort Data
df = df.sort_values(['Year','Player'], ascending=[True, True])

In [47]:
#Calculate percentile rank of fantasy points within each group
grouped = df.groupby('Year')
top_players = pd.DataFrame(columns=df.columns)
for year, group in grouped:
    group['FPTS_Perc_Rank'] = group['FPTS'].rank(pct=True)
    group['AVGFPTS_Perc_Rank'] = group['AVG_FPTS'].rank(pct=True)
    group
    #Add to other dataframe
    top_players = pd.concat([top_players, group], ignore_index = True)

  top_players = pd.concat([top_players, group], ignore_index = True)


In [48]:
#Going to take the average of the two percentile rank columns for each year to have one value
top_players['Rank'] = (top_players['FPTS_Perc_Rank'] + top_players['AVGFPTS_Perc_Rank'])/2

In [49]:
#Let's also make a column that'll help evaluate if this player is getting better or worse every year to see where they're trending
#Create a list of unique players
players_list = top_players['Player'].unique().tolist()

player_trend = []
Years = []
for player in players_list:
    player_df = top_players[top_players['Player'] == player]
    #Find line of best fit for the players rank in all the seasons
    x = np.array(player_df['Year'], dtype = float)
    y = np.array(player_df['Rank'], dtype = float)
    slope, intercept = np.polyfit(x,y,1)
    player_trend.append(slope)
    Years.append(len(top_players[top_players['Player'] == player]))

  slope, intercept = np.polyfit(x,y,1)
  slope, intercept = np.polyfit(x,y,1)
  slope, intercept = np.polyfit(x,y,1)
  slope, intercept = np.polyfit(x,y,1)
  slope, intercept = np.polyfit(x,y,1)
  slope, intercept = np.polyfit(x,y,1)
  slope, intercept = np.polyfit(x,y,1)
  slope, intercept = np.polyfit(x,y,1)
  slope, intercept = np.polyfit(x,y,1)
  slope, intercept = np.polyfit(x,y,1)
  slope, intercept = np.polyfit(x,y,1)
  slope, intercept = np.polyfit(x,y,1)
  slope, intercept = np.polyfit(x,y,1)
  slope, intercept = np.polyfit(x,y,1)
  slope, intercept = np.polyfit(x,y,1)
  slope, intercept = np.polyfit(x,y,1)
  slope, intercept = np.polyfit(x,y,1)
  slope, intercept = np.polyfit(x,y,1)
  slope, intercept = np.polyfit(x,y,1)
  slope, intercept = np.polyfit(x,y,1)
  slope, intercept = np.polyfit(x,y,1)
  slope, intercept = np.polyfit(x,y,1)
  slope, intercept = np.polyfit(x,y,1)
  slope, intercept = np.polyfit(x,y,1)
  slope, intercept = np.polyfit(x,y,1)
  slope, intercept = np.p

In [50]:
top_players

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Year,FPTS,AVG_FPTS,FPTS_Perc_Rank,AVGFPTS_Perc_Rank,Rank
0,Aaron Gordon,PF,23,ORL,78,78,2633,470,1046,0.449,121,347,0.349,349,699,0.499,0.507,185,253,0.731,129,445,574,289,57,56,162,172,1246,2019,1472.5,18.88,0.884236,0.852217,0.868227
1,Aaron Holiday,PG,22,IND,50,0,646,105,262,0.401,43,127,0.339,62,135,0.459,0.483,41,50,0.820,5,62,67,87,21,13,40,71,294,2019,293.0,5.86,0.187192,0.137931,0.162562
2,Abdel Nader,SF,25,OKC,61,1,694,91,215,0.423,32,100,0.320,59,115,0.513,0.498,27,36,0.750,14,102,116,20,20,12,26,68,241,2019,266.0,4.36,0.155172,0.061576,0.108374
3,Al Horford,C,32,BOS,68,68,1973,387,723,0.535,73,203,0.360,314,520,0.604,0.586,78,95,0.821,120,338,458,283,59,86,102,126,925,2019,1428.5,21.01,0.864532,0.901478,0.883005
4,Al-Farouq Aminu,PF,28,POR,81,81,2292,257,593,0.433,96,280,0.343,161,313,0.514,0.514,150,173,0.867,112,498,610,104,68,33,72,143,760,2019,1194.5,14.75,0.805419,0.704433,0.754926
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2046,Zach Collins,C,25,SAS,63,26,1441,284,548,0.518,55,147,0.374,229,401,0.571,0.568,108,142,0.761,116,286,402,180,37,49,129,199,731,2023,1015.0,16.11,0.695035,0.725768,0.710402
2047,Zach LaVine,SG,27,CHI,77,77,2768,673,1388,0.485,204,544,0.375,469,844,0.556,0.558,363,428,0.848,42,303,345,327,69,18,194,159,1913,2023,1741.5,22.62,0.945626,0.893617,0.919622
2048,Zeke Nnaji,PF,22,DEN,53,5,728,110,196,0.561,17,65,0.262,93,131,0.710,0.605,40,62,0.645,65,73,138,18,17,23,31,105,277,2023,354.0,6.68,0.257683,0.208038,0.232861
2049,Ziaire Williams,SF,21,MEM,37,4,561,84,196,0.429,25,97,0.258,59,99,0.596,0.492,17,22,0.773,16,63,79,35,14,6,37,58,210,2023,200.0,5.41,0.108747,0.163121,0.135934


In [52]:
#Get average rank for each player over all seasons they played
avg_rank = []
for player in players_list:
    avg_rank.append(sum(top_players[top_players['Player'] == player]['Rank'])/ len(top_players[top_players['Player'] == player]))    

In [54]:
player_trend

[-0.00533081413702316,
 -0.017019455874747336,
 0.10516728243017796,
 -0.03270362935548553,
 0.0001869554503155978,
 0.011695499649174287,
 0.053302478767438405,
 -0.15032995649950204,
 -0.100178512220535,
 -0.20834616174048784,
 0.00011497912686619962,
 4.6357632052154745e-05,
 -0.10644105929291348,
 -0.09430174210477205,
 -0.02406147230879843,
 0.00012351869066528075,
 -0.008422799605151198,
 -0.05729808087025978,
 4.0562928045635405e-05,
 -0.00848007057595843,
 -0.05168615979170093,
 -0.06656059028368687,
 0.02888908646990411,
 -0.08780891476462108,
 -0.0019260260407629665,
 -0.1740503751116227,
 -0.10067214660111046,
 0.056022246637364416,
 -0.012008944171973521,
 -0.01057354572042667,
 0.1004967553997892,
 -0.04560297358468791,
 0.017600888232691604,
 -0.02841492200327448,
 0.02601433439084778,
 0.10700364318227322,
 -0.3300749178980823,
 -0.13087844642742316,
 -0.01830912451925569,
 3.842803709586512e-05,
 0.004689533007826208,
 0.05650941742833267,
 0.03431184946940173,
 -0.0602

In [55]:
#Make new dataframe with only relevant columns
final_df = pd.DataFrame(columns = ['Player','Rank','Trend'])
final_df['Player'] = players_list
final_df['Years'] = Years
final_df['Rank'] = avg_rank
final_df['Trend'] = player_trend
final_df['Trend Rank'] = final_df['Rank'] + (final_df['Trend'] * (final_df['Years']/5))

In [56]:
pd.set_option('display.float_format', '{:.5f}'.format)
final_df.sort_values(by='Trend Rank', ascending = False)

Unnamed: 0,Player,Rank,Trend,Years,Trend Rank
294,Nikola Jokić,0.99467,0.00390,5,0.99857
132,Giannis Antetokounmpo,0.99617,-0.00222,5,0.99395
253,Luka Dončić,0.97411,0.01495,5,0.98906
190,Joel Embiid,0.98066,0.00710,5,0.98775
97,Domantas Sabonis,0.96067,0.01849,5,0.97916
...,...,...,...,...,...
613,Justin Robinson,0.00592,0.00000,1,0.00592
525,Juwan Morgan,0.00361,0.00000,1,0.00361
461,Malcolm Miller,0.00260,0.00000,1,0.00260
384,Tyler Lydon,0.00246,0.00000,1,0.00246


In [57]:
final_df = final_df.sort_values(by='Trend Rank', ascending = False)

In [58]:
final_df.head(25)

Unnamed: 0,Player,Rank,Trend,Years,Trend Rank
294,Nikola Jokić,0.99467,0.0039,5,0.99857
132,Giannis Antetokounmpo,0.99617,-0.00222,5,0.99395
253,Luka Dončić,0.97411,0.01495,5,0.98906
190,Joel Embiid,0.98066,0.0071,5,0.98775
97,Domantas Sabonis,0.96067,0.01849,5,0.97916
176,Jayson Tatum,0.93802,0.03466,5,0.97268
249,LeBron James,0.96487,-0.00138,5,0.96349
167,James Harden,0.97511,-0.0131,5,0.96202
296,Nikola Vučević,0.96629,-0.00512,5,0.96117
374,Trae Young,0.94223,0.01843,5,0.96066


In [59]:
draft_df = final_df

In [60]:
#Now make functions to use for draft
def drafted_player(player):
    global draft_df
    draft_df = draft_df[draft_df['Player'] != player]
    return draft_df.head(25)

In [61]:
drafted_player('Nikola Jokić')

Unnamed: 0,Player,Rank,Trend,Years,Trend Rank
132,Giannis Antetokounmpo,0.99617,-0.00222,5,0.99395
253,Luka Dončić,0.97411,0.01495,5,0.98906
190,Joel Embiid,0.98066,0.0071,5,0.98775
97,Domantas Sabonis,0.96067,0.01849,5,0.97916
176,Jayson Tatum,0.93802,0.03466,5,0.97268
249,LeBron James,0.96487,-0.00138,5,0.96349
167,James Harden,0.97511,-0.0131,5,0.96202
296,Nikola Vučević,0.96629,-0.00512,5,0.96117
374,Trae Young,0.94223,0.01843,5,0.96066
350,Stephen Curry,0.96257,-0.00253,4,0.96054


In [62]:
drafted_player('Stephen Curry')
drafted_player('Giannis Antetokounmpo')
drafted_player('Luka Dončić')

Unnamed: 0,Player,Rank,Trend,Years,Trend Rank
190,Joel Embiid,0.98066,0.0071,5,0.98775
97,Domantas Sabonis,0.96067,0.01849,5,0.97916
176,Jayson Tatum,0.93802,0.03466,5,0.97268
249,LeBron James,0.96487,-0.00138,5,0.96349
167,James Harden,0.97511,-0.0131,5,0.96202
296,Nikola Vučević,0.96629,-0.00512,5,0.96117
374,Trae Young,0.94223,0.01843,5,0.96066
79,DeMar DeRozan,0.95004,0.00513,5,0.95517
331,Rudy Gobert,0.96531,-0.01117,5,0.95414
22,Bam Adebayo,0.92449,0.02889,5,0.95338
