Imports

In [9]:
import pandas as pd, numpy as np, sklearn as sklm
import tensorflow as tf, sqlalchemy 
from sklearn.linear_model import LogisticRegression, LinearRegression 
from sklearn.ensemble import RandomForestClassifier 
from sklearn.model_selection import train_test_split 
import joblib 
pd.set_option('display.max_columns', None)
import os, sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname('functions.py'), '..', '..', 'PY')))
import functions 
from itertools import combinations 
from keras.models import load_model 
from sklearn.metrics import accuracy_score

Load Dataset

In [2]:
# db_path = 'sqlite:///C:\\Users\\bchan\\OneDrive\\PersonalProjects\\ballindata\\ballindata\\DB\\ballbase.db' 
db_path = 'postgresql+psycopg2://brandon:access@localhost:5432/ballbase' 
engine = sqlalchemy.create_engine(db_path) 
seasons = functions.generate_seasons(1979, 2024) 
master = pd.read_sql('master_1995_96', con=engine) 
ssn = pd.DataFrame() 
numeric_df = pd.read_sql('numeric_as', con=engine) 
numeric_df

stats = ['ppg', 'rpg', 'apg', 'spg', 'bpg'] 

X_train, X_test, y_train, y_test = train_test_split(numeric_df[stats], numeric_df['as'], random_state=1, test_size=0.2) 

numeric_select = numeric_df.loc[:, stats] 

Stat Combinations 

In [3]:
stat_combinations = [] 

for i in range(len(stats)+1):
    stat_combinations.extend(combinations(stats, i)) 
stat_combinations = stat_combinations[1:]

# Neural Network 

In [4]:
def tuple_to_string(t):
    s = "_".join(t)
    return s 
tuple_to_string(stat_combinations[6])

'ppg_apg'

In [7]:
for combo in stat_combinations:
    file_name = tuple_to_string(combo) 
    nn_path = f"../../MLModels/allstar/nn/{file_name}.keras"
    numeric_select = numeric_df.loc[:, combo] 
    X_train, X_test, y_train, y_test = train_test_split(numeric_df.loc[:,combo], numeric_df['as'], random_state=1, test_size=0.2) 
    if(not(os.path.exists(nn_path))): 
        numeric_tensor = tf.convert_to_tensor(numeric_select) 
        normalizer = tf.keras.layers.Normalization(axis=-1) 
        normalizer.adapt(numeric_tensor) 

        seq_model = tf.keras.models.Sequential() 
        seq_model.add(normalizer) 
        seq_model.add(tf.keras.layers.Dense(units=10, activation='relu', input_shape=(len(combo), ))) 
        seq_model.add(tf.keras.layers.Dense(units=10, activation='relu')) 
        seq_model.add(tf.keras.layers.Dense(units=1, activation='sigmoid')) 

        seq_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) 

        seq_model.fit(x=X_train, y=y_train, epochs=5, batch_size=5) 
        seq_model.save(nn_path) 
    else:
        seq_model = tf.keras.models.load_model(nn_path)
    
    score = seq_model.evaluate(X_test, y_test, verbose=1)
    print('Model: ', file_name)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
    

[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 949us/step - accuracy: 0.9621 - loss: 0.1007
Model:  ppg
Test loss: 0.10303475707769394
Test accuracy: 0.9595851898193359
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9494 - loss: 0.1512
Model:  rpg
Test loss: 0.16566160321235657
Test accuracy: 0.9431002140045166
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9526 - loss: 0.1564
Model:  apg
Test loss: 0.16273905336856842
Test accuracy: 0.9481520652770996
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 858us/step - accuracy: 0.9508 - loss: 0.1620
Model:  spg
Test loss: 0.1732139140367508
Test accuracy: 0.945493221282959
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 822us/step - accuracy: 0.9508 - loss: 0.1817
Model:  bpg
Test loss: 0.1953943520784378
Test accuracy: 0.945493221282959
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83

# Logistic Regression

In [None]:
for combo in stat_combinations:
    file_name = tuple_to_string(combo) 
    lr_path = f"../../MLModels/allstar/lr/{file_name}.pkl"
    numeric_select = numeric_df.loc[:, combo] 
    X_train, X_test, y_train, y_test = train_test_split(numeric_df.loc[:,combo], numeric_df['as'], random_state=1, test_size=0.2) 
 
    if(os.path.exists(lr_path)):
        lr_as = joblib.load(lr_path) 
    else:
        lr_as = LogisticRegression() 
        lr_as.fit(X=X_train, y=y_train) 
        joblib.dump(lr_as, lr_path) 
    
    y_pred = lr_as.predict(X_test)
    score = accuracy_score(y_test, y_pred)
    print('Model: ', file_name)
    print('Test accuracy:', score) 

Model:  ppg
Test accuracy: 0.9595852166976868
Model:  rpg
Test accuracy: 0.9441637862270673
Model:  apg
Test accuracy: 0.9454932198883276
Model:  spg
Test accuracy: 0.9417708056367987
Model:  bpg
Test accuracy: 0.9452273331560755
Model:  ppg_rpg
Test accuracy: 0.960648763626695
Model:  ppg_apg
Test accuracy: 0.9601169901621909
Model:  ppg_spg
Test accuracy: 0.9587875565009306
Model:  ppg_bpg
Test accuracy: 0.9587875565009306
Model:  rpg_apg
Test accuracy: 0.9513427279978729
Model:  rpg_spg
Test accuracy: 0.9428343525658069
Model:  rpg_bpg
Test accuracy: 0.9441637862270673
Model:  apg_spg
Test accuracy: 0.9441637862270673
Model:  apg_bpg
Test accuracy: 0.9481520872108482
Model:  spg_bpg
Test accuracy: 0.9420366923690507
Model:  ppg_rpg_apg
Test accuracy: 0.9630417442169635
Model:  ppg_rpg_spg
Test accuracy: 0.9625099707524595
Model:  ppg_rpg_bpg
Test accuracy: 0.960382876894443
Model:  ppg_apg_spg
Test accuracy: 0.9611805370911991
Model:  ppg_apg_bpg
Test accuracy: 0.9611805370911991
Mo

# Random Forest 

In [None]:
for combo in stat_combinations:
    file_name = tuple_to_string(combo) 
    rf_path = f"../../MLModels/allstar/rf/{file_name}.pkl"
    numeric_select = numeric_df.loc[:, combo] 
    X_train, X_test, y_train, y_test = train_test_split(numeric_df.loc[:,combo], numeric_df['as'], random_state=1, test_size=0.2) 
 
    if(os.path.exists(rf_path)): 
        rf_as = joblib.load(rf_path) 
    else: 
        rf_as = sklm.ensemble.RandomForestClassifier()  
        rf_as.fit(X=X_train, y=y_train) 
        joblib.dump(rf_as, rf_path)
    
    y_pred = rf_as.predict(X_test)
    score = accuracy_score(y_test, y_pred)
    print('Model: ', file_name)
    print('Test accuracy:', score)

Model:  ppg
Test accuracy: 0.9574581228396704
Model:  rpg
Test accuracy: 0.9462908800850838
Model:  apg
Test accuracy: 0.9478862004785961
Model:  spg
Test accuracy: 0.9449614464238234
Model:  bpg
Test accuracy: 0.9449614464238234
Model:  ppg_rpg
Test accuracy: 0.9500132943366126
Model:  ppg_apg
Test accuracy: 0.9484179739431002
Model:  ppg_spg
Test accuracy: 0.9529380483913853
Model:  ppg_bpg
Test accuracy: 0.9510768412656209
Model:  rpg_apg
Test accuracy: 0.938846051582026
Model:  rpg_spg
Test accuracy: 0.9319329965434725
Model:  rpg_bpg
Test accuracy: 0.9441637862270673
Model:  apg_spg
Test accuracy: 0.934060090401489
Model:  apg_bpg
Test accuracy: 0.9372507311885137
Model:  spg_bpg
Test accuracy: 0.9351236373304972
Model:  ppg_rpg_apg
Test accuracy: 0.960648763626695
Model:  ppg_rpg_spg
Test accuracy: 0.9579898963041744
Model:  ppg_rpg_bpg
Test accuracy: 0.9585216697686786
Model:  ppg_apg_spg
Test accuracy: 0.956394575910662
Model:  ppg_apg_bpg
Test accuracy: 0.9579898963041744
Mode

In [27]:
# m = load_model("../../MLMODELS/seq_model.keras")
m = load_model("../../MLMODELS/allstar/nn/ppg.keras")
m.predict(tf.convert_to_tensor([[25]]))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step


array([[0.70785916]], dtype=float32)