In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn import tree
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import balanced_accuracy_score

In [2]:
nbadf = pd.read_csv('all_seasons.csv', sep = ",")
nbadf.columns

Index(['Unnamed: 0', 'player_name', 'team_abbreviation', 'age',
       'player_height', 'player_weight', 'college', 'country', 'draft_year',
       'draft_round', 'draft_number', 'gp', 'pts', 'reb', 'ast', 'net_rating',
       'oreb_pct', 'dreb_pct', 'usg_pct', 'ts_pct', 'ast_pct', 'season'],
      dtype='object')

In [3]:
kolom = ['player_name', 'team_abbreviation', 'age', 'player_height', 'player_weight', 'country', 'pts', 'reb']
nbadf = nbadf[kolom]
nbadf.head()

Unnamed: 0,player_name,team_abbreviation,age,player_height,player_weight,country,pts,reb
0,Chris Robinson,VAN,23.0,195.58,90.7184,USA,4.6,1.7
1,Matt Fish,MIA,27.0,210.82,106.59412,USA,0.3,0.8
2,Matt Bullard,HOU,30.0,208.28,106.59412,USA,4.5,1.6
3,Marty Conlon,BOS,29.0,210.82,111.13004,USA,7.8,4.4
4,Martin Muursepp,DAL,22.0,205.74,106.59412,USA,3.7,1.6


In [4]:
nbadf = nbadf[['age', 'player_height', 'player_weight', 'pts', 'reb']]
nbadf = nbadf.rename(columns = {'age' : 'Age', 'player_height' : 'Height', 'player_weight' : 'Weight', 'pts' : 'Points', 'reb' : 'Rebounds'})
nbadf.head()

Unnamed: 0,Age,Height,Weight,Points,Rebounds
0,23.0,195.58,90.7184,4.6,1.7
1,27.0,210.82,106.59412,0.3,0.8
2,30.0,208.28,106.59412,4.5,1.6
3,29.0,210.82,111.13004,7.8,4.4
4,22.0,205.74,106.59412,3.7,1.6


In [5]:
# TESTING
nbadf['Age <= 25'] = nbadf.apply(lambda row: 1 if row['Age'] <= 25 else 0,axis=1)
nbadf['Points >= 6'] = nbadf.apply(lambda row: 1 if row['Points'] >= 6.0 else 0,axis=1)
nbadf['Rebounds >= 3'] = nbadf.apply(lambda row: 1 if row['Rebounds'] >= 3.0 else 0,axis=1)

# ACTUAL TARGET CREATION
nbadf['Accepted'] = nbadf.apply(
    lambda row: 1 if (row['Age'] <= 25) & (row['Points >= 6'] >= 6.0) & (row['Rebounds >= 3']>=3.0) else 0,
    axis=1
)
nbadf.head()

Unnamed: 0,Age,Height,Weight,Points,Rebounds,Age <= 25,Points >= 6,Rebounds >= 3,Accepted
0,23.0,195.58,90.7184,4.6,1.7,1,0,0,0
1,27.0,210.82,106.59412,0.3,0.8,0,0,0,0
2,30.0,208.28,106.59412,4.5,1.6,0,0,0,0
3,29.0,210.82,111.13004,7.8,4.4,0,1,1,0
4,22.0,205.74,106.59412,3.7,1.6,1,0,0,0


### __Standardize__

In [6]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(nbadf[["Age" ,"Height", "Weight", "Points", "Rebounds"]])
z = scaler.fit_transform(nbadf[["Age" ,"Height", "Weight", "Points", "Rebounds"]])
z

array([[-0.99674183, -0.57899275, -0.80260413, -0.58938997, -0.74785002],
       [-0.07727165,  1.06312962,  0.43960663, -1.31961423, -1.10871173],
       [ 0.61233098,  0.78944256,  0.43960663, -0.60637193, -0.78794577],
       ...,
       [-0.53700674,  2.15787786,  1.50435871, -0.69128173,  0.7757883 ],
       [-0.99674183,  1.06312962,  0.61706531,  0.02196056,  1.37722448],
       [ 0.61233098,  0.24206844,  0.08468927,  1.29560752,  1.17674575]])

In [7]:
nbadf["StdAge"] = z[:,0]
nbadf["StdHeight"] = z[:,1]
nbadf["StdWeight"] = z[:,2]
nbadf["StdAvg Points"] = z[:,3]
nbadf["StdAvg Rebounds"] = z[:,4]
nbadf.head()

Unnamed: 0,Age,Height,Weight,Points,Rebounds,Age <= 25,Points >= 6,Rebounds >= 3,Accepted,StdAge,StdHeight,StdWeight,StdAvg Points,StdAvg Rebounds
0,23.0,195.58,90.7184,4.6,1.7,1,0,0,0,-0.996742,-0.578993,-0.802604,-0.58939,-0.74785
1,27.0,210.82,106.59412,0.3,0.8,0,0,0,0,-0.077272,1.06313,0.439607,-1.319614,-1.108712
2,30.0,208.28,106.59412,4.5,1.6,0,0,0,0,0.612331,0.789443,0.439607,-0.606372,-0.787946
3,29.0,210.82,111.13004,7.8,4.4,0,1,1,0,0.382463,1.06313,0.794524,-0.045967,0.334735
4,22.0,205.74,106.59412,3.7,1.6,1,0,0,0,-1.226609,0.515755,0.439607,-0.742228,-0.787946


In [8]:
indo = [
    {"Name" : "Andakara Prastawa Dyaksa", "Club" : "Pelita Jaya Bakrie", "Country" : "Indonesia", "Age" : 24, "Height" : 190, "Weight" : 90, "Avg Points" : 7, "Avg Rebounds" : 6},
    {"Name" : "Reggie Mononimbar", "Club" : "Pelita Jaya Bakrie", "Country" : "Indonesia", "Age" : 21, "Height" : 185, "Weight" : 86, "Avg Points" : 6, "Avg Rebounds" : 3},
    {"Name" : "Hardianus Lakudu", "Club" : "Satria Muda Pertamina Jakarta", "Country" : "Indonesia", "Age" : 23, "Height" : 178, "Weight" : 83, "Avg Points" : 10, "Avg Rebounds" : 3},
    {"Name" : "Kevin Yonas Sitorus", "Club" : "Satria Muda Pertamina Jakarta", "Country" : "Indonesia", "Age" : 26, "Height" : 185, "Weight" : 75, "Avg Points" : 11, "Avg Rebounds" : 4},
    {"Name" : "Arki Dikania Wisnu", "Club" : "Satria Muda Pertamina Jakarta", "Country" : "Indonesia", "Age" : 20, "Height" : 183, "Weight" : 80, "Avg Points" : 5, "Avg Rebounds" : 2},
    {"Name" : "Laurentius Steven Oei", "Club" : "Satria Muda Pertamina Jakarta", "Country" : "Indonesia", "Age" : 21, "Height" : 191, "Weight" : 85, "Avg Points" : 4, "Avg Rebounds" : 10},
    {"Name" : "Mei Joni", "Club" : "Stapac", "Country" : "Indonesia", "Age" : 25, "Height" : 188, "Weight" : 90, "Avg Points" : 7, "Avg Rebounds" : 5},
    {"Name" : "Vincent Rivaldi Kosasih", "Club" : "Stapac", "Country" : "Indonesia", "Age" : 23, "Height" : 179, "Weight" : 87, "Avg Points" : 1, "Avg Rebounds" : 2},
    {"Name" : "Hardian Wicaksono", "Club" : "Pacific Caesar Surabaya", "Country" : "Indonesia", "Age" : 21, "Height" : 177, "Weight" : 80, "Avg Points" : 9, "Avg Rebounds" : 8},
    {"Name" : "Brandon Jawato", "Club" : "Louvre Surabaya", "Country" : "Indonesia", "Age" : 24, "Height" : 182, "Weight" : 85, "Avg Points" : 6, "Avg Rebounds" : 5}
]

player = pd.DataFrame(indo)
player = player[["Name", "Age", "Height", "Weight", "Avg Points", "Avg Rebounds"]]
player

Unnamed: 0,Name,Age,Height,Weight,Avg Points,Avg Rebounds
0,Andakara Prastawa Dyaksa,24,190,90,7,6
1,Reggie Mononimbar,21,185,86,6,3
2,Hardianus Lakudu,23,178,83,10,3
3,Kevin Yonas Sitorus,26,185,75,11,4
4,Arki Dikania Wisnu,20,183,80,5,2
5,Laurentius Steven Oei,21,191,85,4,10
6,Mei Joni,25,188,90,7,5
7,Vincent Rivaldi Kosasih,23,179,87,1,2
8,Hardian Wicaksono,21,177,80,9,8
9,Brandon Jawato,24,182,85,6,5


In [9]:
scl = StandardScaler()
scl.fit(player[["Age" ,"Height", "Weight", "Avg Points", "Avg Rebounds"]])
a = scl.fit_transform(player[["Age" ,"Height", "Weight", "Avg Points", "Avg Rebounds"]])
a

array([[ 0.63599873,  1.32911403,  1.31632171,  0.14285714,  0.48349378],
       [-0.95399809,  0.25724788,  0.42390021, -0.21428571, -0.72524067],
       [ 0.10599979, -1.24336474, -0.24541591,  1.21428571, -0.72524067],
       [ 1.69599661,  0.25724788, -2.0302589 ,  1.57142857, -0.32232919],
       [-1.48399703, -0.17149859, -0.91473203, -0.57142857, -1.12815215],
       [-0.95399809,  1.54348727,  0.20079484, -0.92857143,  2.09513971],
       [ 1.16599767,  0.90036757,  1.31632171,  0.14285714,  0.0805823 ],
       [ 0.10599979, -1.02899151,  0.64700558, -2.        , -1.12815215],
       [-0.95399809, -1.45773797, -0.91473203,  0.85714286,  1.28931674],
       [ 0.63599873, -0.38587182,  0.20079484, -0.21428571,  0.0805823 ]])

In [10]:
nbadf["Rekomendasi"] = nbadf.apply(lambda nbadf : 1 if (nbadf["Age"] <= 25) and 
                                                (nbadf["Height"] >= 180) and  
                                                (nbadf["Weight"] <= 90) and  
                                                (nbadf["Points"] >= 6) and  
                                                (nbadf["Rebounds"] >= 3) else 0, axis = 1)
nbadf.head()

Unnamed: 0,Age,Height,Weight,Points,Rebounds,Age <= 25,Points >= 6,Rebounds >= 3,Accepted,StdAge,StdHeight,StdWeight,StdAvg Points,StdAvg Rebounds,Rekomendasi
0,23.0,195.58,90.7184,4.6,1.7,1,0,0,0,-0.996742,-0.578993,-0.802604,-0.58939,-0.74785,0
1,27.0,210.82,106.59412,0.3,0.8,0,0,0,0,-0.077272,1.06313,0.439607,-1.319614,-1.108712,0
2,30.0,208.28,106.59412,4.5,1.6,0,0,0,0,0.612331,0.789443,0.439607,-0.606372,-0.787946,0
3,29.0,210.82,111.13004,7.8,4.4,0,1,1,0,0.382463,1.06313,0.794524,-0.045967,0.334735,0
4,22.0,205.74,106.59412,3.7,1.6,1,0,0,0,-1.226609,0.515755,0.439607,-0.742228,-0.787946,0


### __1. K-Neighbors Classifier__

In [11]:
from sklearn.model_selection import train_test_split

xtr, xts, ytr, yts = train_test_split(nbadf[['Age', 'Height', 'Weight', 'Points', 'Rebounds']], nbadf['Rekomendasi'], test_size = .18)

In [12]:
def k_value():
    k = round((len(nbadf['Accepted'])) ** .5)
    if k % 2 == 0:
        return k + 1
    else:
        return k
KNeighbors = KNeighborsClassifier(
    n_neighbors=k_value()
)
print(KNeighbors.fit(xtr, ytr))
print(KNeighbors.score(xts, yts))

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=99, p=2,
                     weights='uniform')
0.9837303893085415


### __2. Random Forest Classifier__

In [13]:
from sklearn.model_selection import train_test_split

xtr, xts, ytr, yts = train_test_split(nbadf[['Age', 'Height', 'Weight', 'Points', 'Rebounds']], nbadf['Rekomendasi'], test_size = .18)

In [14]:
RF = RandomForestClassifier(n_estimators=100)
print(RF.fit(xtr,ytr))
print(RF.score(xts, yts))

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)
1.0


In [15]:
paracetamol = {'bootstrap': [True, False],
          'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, None],
          'max_features': ['auto', 'sqrt'],
          'min_samples_leaf': [1, 2, 4],
          'min_samples_split': [2, 5, 10],
          'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000]}
RF.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

In [16]:
model2 = RandomForestClassifier(n_estimators = 2000, 
                             min_samples_split = 10, 
                             min_samples_leaf = 1, 
                             max_features = "sqrt", 
                             max_depth = 100, 
                             bootstrap = "False")
print(model2.fit(xtr,ytr))
print(model2.score(xts,yts))

RandomForestClassifier(bootstrap='False', ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=100, max_features='sqrt',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=10,
                       min_weight_fraction_leaf=0.0, n_estimators=2000,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)
1.0


#### __Evaluation Metrics__

In [17]:
ypred = model2.predict(xts)
print(balanced_accuracy_score(yts, ypred))
print(precision_score(yts, ypred))
print(recall_score(yts, ypred))
print(f1_score(yts, ypred))
print(roc_auc_score(yts, ypred))
print(ypred[:10])
print(yts[:10])

1.0
1.0
1.0
1.0
1.0
[0 0 0 0 0 0 0 0 0 0]
9173    0
2374    0
2100    0
9533    0
7252    0
3114    0
7479    0
938     0
8885    0
8757    0
Name: Rekomendasi, dtype: int64


### __3. Logistic Regression__

In [18]:
from sklearn.model_selection import train_test_split

xtr, xts, ytr, yts = train_test_split(nbadf[['Age', 'Height', 'Weight', 'Points', 'Rebounds']], nbadf['Rekomendasi'], test_size = .18)

In [19]:
from sklearn.linear_model import LogisticRegression

LR = LogisticRegression(
    solver='lbfgs',
    multi_class='auto',
    max_iter=100000
)
print(LR.fit(xtr,ytr))
print(LR.score(xts, yts))

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100000,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)
0.9843114468332365


In [20]:
penalty = ["l2"]
solver = ["newton-cg", "lbfgs", "liblinear", "sag", "saga"]
max_iter = [1, 10, 100, 1000, 10000]

param = {"penalty": penalty, "solver": solver, "max_iter" : max_iter}
LR.get_params()

{'C': 1.0,
 'class_weight': None,
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'l1_ratio': None,
 'max_iter': 100000,
 'multi_class': 'auto',
 'n_jobs': None,
 'penalty': 'l2',
 'random_state': None,
 'solver': 'lbfgs',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

In [21]:
model3 = LogisticRegression(solver = "newton-cg", penalty = "l2", max_iter = 10000)
print(model3.fit(xtr,ytr))
print(model3.score(xts,yts))

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=10000,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='newton-cg', tol=0.0001, verbose=0,
                   warm_start=False)
0.9843114468332365


#### __Evaluation Matrics__

In [22]:
ypred = model3.predict(xts)
print(balanced_accuracy_score(yts, ypred))
print(precision_score(yts, ypred))
print(recall_score(yts, ypred))
print(f1_score(yts, ypred))
print(roc_auc_score(yts, ypred))
print(ypred[:10])
print(yts[:10])

0.7038076641444959
0.6666666666666666
0.4117647058823529
0.509090909090909
0.7038076641444959
[0 0 0 0 0 0 0 0 0 0]
8417    0
45      0
7659    0
1754    0
3963    0
494     0
244     0
5310    0
2567    0
1128    0
Name: Rekomendasi, dtype: int64


### _**Berdasarkan Model di Atas, score paling besar didapat melalui model Random Forest yang sudah di Hypertuning**_

In [23]:
prediksi = pd.DataFrame(indo, columns=indo[0].keys())
prediksi

Unnamed: 0,Name,Club,Country,Age,Height,Weight,Avg Points,Avg Rebounds
0,Andakara Prastawa Dyaksa,Pelita Jaya Bakrie,Indonesia,24,190,90,7,6
1,Reggie Mononimbar,Pelita Jaya Bakrie,Indonesia,21,185,86,6,3
2,Hardianus Lakudu,Satria Muda Pertamina Jakarta,Indonesia,23,178,83,10,3
3,Kevin Yonas Sitorus,Satria Muda Pertamina Jakarta,Indonesia,26,185,75,11,4
4,Arki Dikania Wisnu,Satria Muda Pertamina Jakarta,Indonesia,20,183,80,5,2
5,Laurentius Steven Oei,Satria Muda Pertamina Jakarta,Indonesia,21,191,85,4,10
6,Mei Joni,Stapac,Indonesia,25,188,90,7,5
7,Vincent Rivaldi Kosasih,Stapac,Indonesia,23,179,87,1,2
8,Hardian Wicaksono,Pacific Caesar Surabaya,Indonesia,21,177,80,9,8
9,Brandon Jawato,Louvre Surabaya,Indonesia,24,182,85,6,5


In [24]:
prediksi['Rekomendasi'] = prediksi[['Age', 'Height', 'Weight', 'Avg Points', 'Avg Rebounds']].apply(
    lambda row: model2.predict([row.values])[0], axis =1
)
prediksi['Rekomendasi'] = prediksi.apply(
    lambda row: 'Diterima' if row['Rekomendasi'] == 1 else 'Tidak Diterima', axis = 1
)
prediksi

Unnamed: 0,Name,Club,Country,Age,Height,Weight,Avg Points,Avg Rebounds,Rekomendasi
0,Andakara Prastawa Dyaksa,Pelita Jaya Bakrie,Indonesia,24,190,90,7,6,Diterima
1,Reggie Mononimbar,Pelita Jaya Bakrie,Indonesia,21,185,86,6,3,Tidak Diterima
2,Hardianus Lakudu,Satria Muda Pertamina Jakarta,Indonesia,23,178,83,10,3,Tidak Diterima
3,Kevin Yonas Sitorus,Satria Muda Pertamina Jakarta,Indonesia,26,185,75,11,4,Tidak Diterima
4,Arki Dikania Wisnu,Satria Muda Pertamina Jakarta,Indonesia,20,183,80,5,2,Tidak Diterima
5,Laurentius Steven Oei,Satria Muda Pertamina Jakarta,Indonesia,21,191,85,4,10,Tidak Diterima
6,Mei Joni,Stapac,Indonesia,25,188,90,7,5,Diterima
7,Vincent Rivaldi Kosasih,Stapac,Indonesia,23,179,87,1,2,Tidak Diterima
8,Hardian Wicaksono,Pacific Caesar Surabaya,Indonesia,21,177,80,9,8,Tidak Diterima
9,Brandon Jawato,Louvre Surabaya,Indonesia,24,182,85,6,5,Tidak Diterima
