## Prediction model

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
salary_df = pd.read_csv("dataset_for_prediction.csv")
#salary_df[(salary_df["Year"]==2000) & (salary_df["Tm"]=="SEA")]

In [3]:
teams = salary_df[["Tm", "Year", "Salaries"]]
teams = teams.groupby(["Tm", "Year"]).sum()
teams.rename(columns={'Salaries':'Teamcap'}, inplace=True)
dataset = pd.merge(left=salary_df, right=teams, left_on=["Tm", "Year"], right_on=["Tm", "Year"])
dataset['Wage%'] = round(dataset['Salaries'] / dataset['Teamcap'], 2)

#### Creating dummy variables ####

In [4]:

data_dum = pd.get_dummies(columns=["Pos", "Tm", "Teamcap"], data=dataset)
prediction_data = data_dum.drop(columns = ["Player"])
print(prediction_data)

       Unnamed: 0  Year   Age     G      MP   PER    TS%   3PAr    FTr  ORB%  \
0               0  2000  24.0  82.0  3070.0  20.6  0.570  0.288  0.282   3.2   
1               1  2000  23.0  27.0   361.0   4.3  0.310  0.147  0.042   1.6   
2              18  2000  30.0  81.0  2899.0  21.1  0.551  0.077  0.380   2.8   
3              61  2000  32.0  80.0  2129.0  12.7  0.550  0.004  0.563  12.8   
4             108  2000  27.0  81.0  2909.0  17.8  0.534  0.162  0.194   4.3   
5             124  2000  22.0  44.0   447.0  15.8  0.517  0.033  0.557  13.1   
6             133  2000  31.0  68.0  1488.0  15.6  0.539  0.000  0.303  14.0   
7               2  2000  29.0  82.0  2593.0  17.4  0.524  0.223  0.257   2.3   
8              78  2000  25.0  61.0   879.0   7.2  0.451  0.478  0.235   4.1   
9              98  2000  22.0  73.0  2583.0  19.8  0.550  0.255  0.409   3.5   
10            101  2000  24.0  79.0  1797.0  13.8  0.527  0.002  0.260  11.0   
11            126  2000  23.0  82.0  300

#### Spliting data for testing and using StandardScaler ####

In [5]:
from sklearn.model_selection import train_test_split
X = prediction_data.copy().drop(columns=["Wage%", "Salaries"])
y = prediction_data["Wage%"]

X_train, X_test, y_train, y_test =  train_test_split(X, y, test_size=0.33, random_state=42)

In [6]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
standardized_data = scaler.fit_transform(X_train)
standardized_test = scaler.fit_transform(X_test)

In [7]:
def getPlayerData(name, year=9999):    
    testPlayer = data_dum[data_dum["Player"] == name]
    if year!=9999:
        testPlayer = testPlayer[testPlayer["Year"] == year]
    testPlayer = testPlayer.drop(columns=["Player", "Salaries", "Wage%"])
    return testPlayer

def getPlayerDataWSalary(name, year=9999):    
    testPlayer = data_dum[data_dum["Player"] == name]
    if year!=9999:
        testPlayer = testPlayer[testPlayer["Year"] == year]
    return testPlayer
#rayAllen_pred = rf.predict(rayAllen)
#print(rayAllen_pred)

### Predicting using Random Forest ###

In [8]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix

# TODO
## rf = RandomForestClassifier(n_estimators=100, max_depth=4,random_state=0).fit(X_train, y_train)

### Predicting using Linear Regression ###

In [10]:
from sklearn.linear_model import LinearRegression
#"Linear regression does no t respect the bounds of 0. It's linear, always and everywhere.
#It may not be appropriate for values that need to be close to 0 but are strictly positive."
#https://stats.stackexchange.com/questions/145383/getting-negative-predicted-values-after-linear-regression

lr = LinearRegression()
lr.fit(standardized_data, y_train)
print(lr.coef_)

[-2.04736340e-02 -1.19629128e+10  2.90281573e-03 -7.73035292e-03
 -1.36166646e-02  4.57434456e-03 -4.64335132e-04 -1.67876293e-03
  6.86675183e-04 -6.84654713e-03 -8.39117169e-03  1.23969316e-02
  3.78862023e-04  1.14504993e-03  1.16500258e-03  1.26630068e-03
 -2.61507928e-03 -1.46040916e-02 -1.57427192e-02  4.23494577e-02
 -7.61252642e-03  1.04368925e-02  8.02278519e-05 -6.12613559e-03
  8.10545683e-03 -6.21677462e+10  6.42948219e+10 -6.32464886e-04
  1.79023176e+10 -2.17230646e+10 -4.41074371e-05  5.51179070e+10
 -5.16784512e+10  1.38372183e-03 -5.14507294e-04  5.80902057e+08
  2.02808976e-02 -2.13786960e-04 -3.05415841e+09 -7.16570644e+09
  9.87785946e+09 -8.53543729e-03 -6.25228882e-03  1.45828724e-03
  1.36032701e-02 -3.55005264e-03 -2.63615017e+09 -4.84924316e-02
  4.10476860e+10  4.15881783e+10  4.05661538e+10  3.90730488e+10
  4.17358768e+10 -7.61896359e+09  3.26961304e+10 -1.62071358e+10
 -2.66946406e+10 -1.22770520e+10  7.90183396e+09 -5.52467260e+10
 -1.74338364e+10  1.19149

In [11]:
print(lr.predict(getPlayerData("Kobe Bryant", 2005)))


[-7.60408681e+12]
2331    0.21
Name: Wage%, dtype: float64


###  Predicting using RidgeRegression  ###

In [29]:
solvers = ['svd', 'cholesky', 'lsqr', 'sparse_cg']
best_model = clf
abs_error = 100
for solver in solvers: 
    from sklearn.linear_model import Ridge
    from sklearn.metrics import mean_absolute_error
    clf = Ridge(alpha=1.0, solver=solver)
    clf.fit(X, y)
    print(solver)
    error = mean_absolute_error(y_test, clf.predict(standardized_test))
    if(error < abs_error):
        abs_error = error
        best_model = clf



svd
cholesky
lsqr
sparse_cg


In [None]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error
clf = Ridge(alpha=1.0)
clf.fit(X, y)
clf.predict(getPlayerData("Kobe Bryant", 2005))

mean_absolute_error(y_test, clf.predict(standardized_test))

### Predicting Key Players Salaries ###
#### Predicting new contract salary by year ####
Chosen players:
* Anthony Davis
* Damian Lillard
* Giannis Antetokounmpo

In [36]:
players = ["Anthony Davis", "Damian Lillard", "Giannis Antetokounmpo"]
choosenModel = best_model
for player in players:
    salaryNow = getPlayerDataWSalary(player, 2019)["Wage%"].iloc[0]
    predictedSalary = choosenModel.predict(getPlayerData(player, 2019))
    print("Player: " + str(player)+"; "+ "Current Wage%: "+ str(salaryNow) +"; "+"Predicted next season Wage%: "+ str(predictedSalary))
    



Player: Anthony Davis; Current Wage%: 0.22; Predicted next season Wage%: [0.20162981]
Player: Damian Lillard; Current Wage%: 0.22; Predicted next season Wage%: [0.19217969]
Player: Giannis Antetokounmpo; Current Wage%: 0.17; Predicted next season Wage%: [0.22330134]
