# Setup

In [7]:
import pandas as pd
import numpy as np
import math

# Load Data

In [2]:
teams_file = './data_files/2015-20_team_ratings.csv'
teams_df = pd.read_csv(teams_file)

In [3]:
teams_df.head()

Unnamed: 0.1,Unnamed: 0,Rk,Team,Conf,Div,W,L,W/L%,MOV,ORtg,DRtg,NRtg,MOV/A,ORtg/A,DRtg/A,NRtg/A,season
0,0,1,Milwaukee Bucks,E,C,41,7,0.854,12.27,114.62,102.88,11.74,10.95,114.21,103.78,10.44,2019-20
1,1,2,Los Angeles Lakers,W,P,36,11,0.766,6.94,113.38,106.6,6.77,7.22,113.14,106.07,7.07,2019-20
2,2,3,Dallas Mavericks,W,SW,29,19,0.604,5.85,117.87,111.93,5.94,6.18,118.09,111.86,6.23,2019-20
3,3,4,Boston Celtics,E,A,32,15,0.681,6.87,113.53,106.56,6.97,5.96,113.14,107.11,6.03,2019-20
4,4,5,Los Angeles Clippers,W,P,33,15,0.688,5.79,113.07,107.44,5.62,6.06,112.8,106.9,5.9,2019-20


# Question 1: Win Expectation

In [None]:
# win %  = ortg ^ x / (ortg + drtg) ^ x
# figure out x for lowest error
# https://en.wikipedia.org/wiki/Pythagorean_expectation
# Morey says 13.91
# https://www.nbastuffer.com/analytics101/pythagorean-winning-percentage/
# this site says 16.5
# so calculate mae

In [5]:
# Add up total games played
# most should be 82
teams_df['G'] = teams_df.W + teams_df.L

In [75]:
def calc_expectation(in_df, x, out_dict):
    df = in_df.copy()
    df['rounded_wins'] = in_df.G * (in_df.ORtg ** x) / (in_df.ORtg ** x + in_df.DRtg ** x)
    #in_df.rounded_wins = math.floor(expected_wins)

    #print(df.head())

    w_std = df.rounded_wins.std()
    n = len(df)

    df['abs_error'] = abs(df.rounded_wins - df.W)
    df['se'] = float(w_std / np.sqrt(n))

    # print(df.abs_error.mean())
    # print(df.se.mean())

    out_dict[x] = {'se': df.se.mean(), 'mae':df.abs_error.mean()}

In [79]:
calc_df = teams_df[['G', 'W', 'L', 'ORtg', 'DRtg']].copy()
results_dict = dict()

In [80]:
# Go through whole numbers of 1-20
for i in range(1,20):
    #print(i)
    calc_expectation(calc_df, i, results_dict)

8.74661295817776
0.5559254340217776
8.12115314534777
0.568235586313577
7.5008966331650075
0.5882108082289218
6.884776570872502
0.6149074360823298
6.273534341393199
0.6472391680394897
5.668272749621451
0.6841091218077936
5.070022552457779
0.7244972146464049
4.479733297078285
0.7675041526882335
3.9151506374648224
0.8123642794022742
3.396204295439574
0.8584403771761913
2.9560869593399404
0.9052100875600602
2.5829037907360983
0.9522497707146601
2.313016028622644
0.9992187865378362
2.177822684143679
1.0458454587961767
2.1901020934745667
1.0919150719869806
2.298883743961574
1.1372598270060357
2.4909600898027233
1.1817505194999312
2.7771756174694877
1.2252896691234507
3.1205043359570768
1.2678058462897759


In [81]:
results_dict

{1: {'se': 0.5559254340217776, 'mae': 8.74661295817776},
 2: {'se': 0.568235586313577, 'mae': 8.12115314534777},
 3: {'se': 0.5882108082289218, 'mae': 7.5008966331650075},
 4: {'se': 0.6149074360823298, 'mae': 6.884776570872502},
 5: {'se': 0.6472391680394897, 'mae': 6.273534341393199},
 6: {'se': 0.6841091218077936, 'mae': 5.668272749621451},
 7: {'se': 0.7244972146464049, 'mae': 5.070022552457779},
 8: {'se': 0.7675041526882335, 'mae': 4.479733297078285},
 9: {'se': 0.8123642794022742, 'mae': 3.9151506374648224},
 10: {'se': 0.8584403771761913, 'mae': 3.396204295439574},
 11: {'se': 0.9052100875600602, 'mae': 2.9560869593399404},
 12: {'se': 0.9522497707146601, 'mae': 2.5829037907360983},
 13: {'se': 0.9992187865378362, 'mae': 2.313016028622644},
 14: {'se': 1.0458454587961767, 'mae': 2.177822684143679},
 15: {'se': 1.0919150719869806, 'mae': 2.1901020934745667},
 16: {'se': 1.1372598270060357, 'mae': 2.298883743961574},
 17: {'se': 1.1817505194999312, 'mae': 2.4909600898027233},
 18

In [82]:
results_df = pd.DataFrame.from_dict(results_dict)

In [83]:
results_df

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
se,0.555925,0.568236,0.588211,0.614907,0.647239,0.684109,0.724497,0.767504,0.812364,0.85844,0.90521,0.95225,0.999219,1.045845,1.091915,1.13726,1.181751,1.22529,1.267806
mae,8.746613,8.121153,7.500897,6.884777,6.273534,5.668273,5.070023,4.479733,3.915151,3.396204,2.956087,2.582904,2.313016,2.177823,2.190102,2.298884,2.49096,2.777176,3.120504


In [88]:
# fine tune by decimals
results_dict = dict()
for i in np.arange(10, 11.2, 0.1):
    calc_expectation(calc_df, i, results_dict)

3.396204295439574
0.8584403771761913
3.3477889779879
0.8630938456726082
3.300016061717185
0.8677537049100285
3.252337643801435
0.872419500565937
3.2059694055471026
0.8770907878489602
3.162580201518585
0.8817671313359942
3.1196872045780446
0.8864481048105868
3.077890413680149
0.8911332911027485
3.0370306874144015
0.8958222819302364
2.996254613429181
0.9005146777414562
2.956086959339944
0.9052100875600602
2.916486731246307
0.9099081288312586


In [89]:
results_dict

{10.0: {'se': 0.8584403771761913, 'mae': 3.396204295439574},
 10.1: {'se': 0.8630938456726082, 'mae': 3.3477889779879},
 10.2: {'se': 0.8677537049100285, 'mae': 3.300016061717185},
 10.299999999999999: {'se': 0.872419500565937, 'mae': 3.252337643801435},
 10.399999999999999: {'se': 0.8770907878489602, 'mae': 3.2059694055471026},
 10.499999999999998: {'se': 0.8817671313359942, 'mae': 3.162580201518585},
 10.599999999999998: {'se': 0.8864481048105868, 'mae': 3.1196872045780446},
 10.699999999999998: {'se': 0.8911332911027485, 'mae': 3.077890413680149},
 10.799999999999997: {'se': 0.8958222819302364, 'mae': 3.0370306874144015},
 10.899999999999997: {'se': 0.9005146777414562, 'mae': 2.996254613429181},
 10.999999999999996: {'se': 0.9052100875600602, 'mae': 2.956086959339944},
 11.099999999999996: {'se': 0.9099081288312586, 'mae': 2.916486731246307}}

In [95]:
# use 10.8 as calculation
calc_df = teams_df[['Team', 'G', 'W', 'L', 'ORtg', 'DRtg']].copy()

calc_df['expected_wins'] = calc_df.G * (calc_df.ORtg ** 10.8) / (calc_df.ORtg ** 10.8 + calc_df.DRtg ** 10.8)
calc_df.expected_wins = calc_df.expected_wins.round(0)

In [96]:
calc_df.expected_wins

0      37.0
1      31.0
2      31.0
3      31.0
4      30.0
       ... 
145    32.0
146    27.0
147    26.0
148    22.0
149    21.0
Name: expected_wins, Length: 150, dtype: float64

In [103]:
# Morey's exponent
calc_df['expected_wins2'] = calc_df.G * (calc_df.ORtg ** 13.91) / (calc_df.ORtg ** 13.91 + calc_df.DRtg ** 13.91)
calc_df.expected_wins2 = calc_df.expected_wins2.round(0)

In [105]:
# nbastuffer exponent
calc_df['expected_wins3'] = calc_df.G * (calc_df.ORtg ** 16.5) / (calc_df.ORtg ** 16.5 + calc_df.DRtg ** 16.5)
calc_df.expected_wins3 = calc_df.expected_wins3.round(0)

In [106]:
calc_df[calc_df.Team == 'San Antonio Spurs']

Unnamed: 0,Team,G,W,L,ORtg,DRtg,expected_wins,expected_wins2,expected_wins3
13,San Antonio Spurs,47,21,26,112.44,113.3,23.0,22.0,22.0
41,San Antonio Spurs,82,48,34,113.3,111.55,44.0,45.0,46.0
67,San Antonio Spurs,82,47,35,108.49,105.36,47.0,49.0,51.0
91,San Antonio Spurs,82,61,21,111.81,104.25,56.0,60.0,62.0
120,San Antonio Spurs,82,67,15,111.04,99.55,63.0,67.0,70.0


Spurs are over performing this season.

# Question 2

Data Sources:

* Basketball-Reference
* NBA.com/stats

Method: 

I just eyeballed it and picked young, interesting prospects for the sake of analysis.

In [101]:
players_file = './data_files/players_comparison.csv'
players_df = pd.read_csv(players_file)

In [102]:
players_df

Unnamed: 0.1,Unnamed: 0,Luka Doncic,Bradley Beal,Zach LaVine,Trae Young
0,usg,36.1,32.4,30.8,33.8
1,3p_perc_cs,30.2,35.8,41.8,47.8
2,ast_perc,45.9,28.4,21.0,42.0
3,def_per_g,1.6,1.8,2.1,1.9
4,fg_diff,1.3,2.4,-4.6,3.5
5,pr_ppp,1.06,0.97,0.9,0.96


# Question 3

In [107]:
# Subtract Derozan's ortg an drtg
112.44 - 119

-6.560000000000002

In [108]:
113.30 - 113

0.29999999999999716

In [109]:
# Add Luka's
118 + -6.560000000000002

111.44

In [110]:
0.29999999999999716 + 108

108.3

In [111]:
# diffs
print(111.44 - 112.44)
print(113.30 - 108.3)

-1.0
5.0


In [112]:
47 * (111.44**10.8) / (111.44**10.8 + 108.3**10.8)

27.098424555127075