In [1]:
import pandas as pd, numpy as np, sklearn as sklm
import tensorflow as tf, sqlalchemy 
from sklearn.linear_model import LogisticRegression, LinearRegression 
from sklearn.ensemble import RandomForestClassifier 
from sklearn.model_selection import train_test_split 
import joblib 
pd.set_option('display.max_columns', None)
import os, sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname('functions.py'), '..', '..', 'PY')))
import functions 
import joblib 
from keras.models import load_model 
# np.suppress 

In [2]:
db_path = 'sqlite:///C:\\Users\\bchan\\OneDrive\\Personal Projects\\BID_Django\\ballindata\\DB\\ballbase.db' 

engine = sqlalchemy.create_engine(db_path) 
seasons = functions.generate_seasons(1979, 2024) 
master = pd.read_sql('master_as', con=engine) 
ssn = pd.DataFrame() 
numeric_df = pd.read_sql('numeric_as', con=engine) 

X_train, X_test, y_train, y_test = train_test_split(numeric_df[['PPG', 'RPG', 'APG', 'SPG', 'BPG']], numeric_df['AS'], random_state=1, test_size=0.2) 

numeric_select = numeric_df.loc[:, ['PPG', 'RPG', 'APG', 'SPG', 'BPG']]


# Neural Network 

In [4]:
nn_path = "../../MLModels/as_nn_tool.keras"

if(not(os.path.exists(nn_path))): 
    numeric_tensor = tf.convert_to_tensor(numeric_df) 
    normalizer = tf.keras.layers.Normalization(axis=-1) 
    normalizer.adapt(numeric_tensor) 

    seq_model = tf.keras.models.Sequential() 
    # seq_model.add(normalizer) 
    seq_model.add(tf.keras.layers.Dense(units=10, activation='relu', input_shape=(5, ))) 
    seq_model.add(tf.keras.layers.Dense(units=10, activation='relu')) 
    seq_model.add(tf.keras.layers.Dense(units=1, activation='sigmoid')) 

    seq_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) 

    seq_model.fit(x=X_train, y=y_train, epochs=10, batch_size=1) 
    seq_model.save(nn_path) 
else:
    seq_model = tf.keras.models.load_model(nn_path)

In [5]:
seq_model.predict(tf.convert_to_tensor([[25,6,1, 1, 0]]))[0][0] 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 268ms/step


0.34940857

# Logistic Regression

In [3]:
lr_path = "../../MLModels/as_lr_tool.pkl"

if(os.path.exists(lr_path)):
    lr_as = joblib.load(lr_path) 
else:
    lr_as = LogisticRegression() 
    lr_as.fit(X=numeric_select, y=numeric_df['AS']) 
    joblib.dump(lr_as, lr_path) 

lr_as = LogisticRegression() 
lr_as.fit(X=numeric_select, y=numeric_df['AS']) 
joblib.dump(lr_as, lr_path) 

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


['../../MLModels/as_lr_tool.pkl']

In [4]:
lr_as.predict([[20, 10, 6, 0, 0.5]])[0] 

1.0

# Random Forest 

In [5]:
rf_path = "../../MLModels/as_rf_tool.pkl"

if(os.path.exists(rf_path)): 
    rf_as = joblib.load(rf_path) 
else: 
    rf_as = sklm.ensemble.RandomForestClassifier()  
    rf_as.fit(X=numeric_select, y=numeric_df['AS'])
    joblib.dump(rf_as, rf_path)

rf_as = sklm.ensemble.RandomForestClassifier()  
rf_as.fit(X=numeric_select, y=numeric_df['AS'])
joblib.dump(rf_as, rf_path)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


['../../MLModels/as_rf_tool.pkl']

In [7]:
rf_as.predict_proba([[20, 10, 5, 1, 0]])[0] 

array([0.65, 0.35])

# Large Models 

In [None]:
models = {
    'logistic_regression': joblib.load("C:\\Users\\bchan\OneDrive\Personal Projects\BID_Django\\ballindata\IPYNB\Analyses\lr_model.pkl"), 
    'random_forest': joblib.load("C:\\Users\\bchan\OneDrive\Personal Projects\BID_Django\\ballindata\IPYNB\Analyses\\rf_model.pkl") 
} 
lr = joblib.load("C:\\Users\\bchan\OneDrive\Personal Projects\BID_Django\\ballindata\IPYNB\Analyses\lr_model.pkl") 
rf = joblib.load("C:\\Users\\bchan\OneDrive\Personal Projects\BID_Django\\ballindata\IPYNB\Analyses\\rf_model.pkl") 
nn = load_model("C:\\Users\\bchan\OneDrive\Personal Projects\BID_Django\\ballindata\IPYNB\Analyses\seq_model.keras") 

example = [numeric_df.iloc[18033].drop('AS').tolist()] 
pred_lr = lr.predict(example)[0] 
pred_rf = rf.predict(example)[0] 
pred_nn= nn.predict(tf.convert_to_tensor(example))[0][0].astype('float64')

print(f"{pred_lr} {pred_rf} {pred_nn}")
pred_lr == pred_rf 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
1.0 1.0 0.936040997505188


True

# Upload to Database 

In [10]:
stat_names = numeric_df.columns.drop('AS').tolist()
stat_names 

['Age',
 'GP',
 'MPG',
 'FG',
 'FGA',
 'FG%',
 '3P',
 '3PA',
 '3P%',
 '2P',
 '2PA',
 '2P%',
 'eFG%',
 'FT',
 'FTA',
 'FT%',
 'ORPG',
 'DRPG',
 'RPG',
 'APG',
 'SPG',
 'BPG',
 'ToPG',
 'PF',
 'PPG',
 'T2P',
 'T2PA',
 'T3P',
 'T3PA',
 'TAST',
 'TBLK',
 'TDRB',
 'TFG',
 'TFGA',
 'TFT',
 'TFTA',
 'TMP',
 'TORB',
 'TPF',
 'TPTS',
 'TSTL',
 'TTOV',
 'TTRB',
 'DRtg',
 'ORtg',
 '3PAr',
 'AST%',
 'BLK%',
 'BPM',
 'DBPM',
 'DRB%',
 'DWS',
 'FTr',
 'OBPM',
 'ORB%',
 'OWS',
 'PER',
 'STL%',
 'TOV%',
 'TRB%',
 'TS%',
 'USG%',
 'VORP',
 'WS',
 'WS/48']

In [17]:
output_html = '<div class="input-row">\n' 
for i in range(len(stat_names)):
    output_html += f'''<label for="{stat_names[i]}" class="input-label"> {stat_names[i]}: </label><br>
    <input type="text" id={stat_names[i]} name={stat_names[i]} class="input-text-box" value=""><br>
    ''' 
    if (i+1)%4==0: 
        output_html += '''\n</div> 
<br> 
<div class="input-row"> ''' 
output_html += '\n</div>'
 
print(output_html) 

<div class="input-row">
<label for="Age" class="input-label"> Age: </label><br>
    <input type="text" id=Age name=Age class="input-text-box" value=""><br>
    <label for="GP" class="input-label"> GP: </label><br>
    <input type="text" id=GP name=GP class="input-text-box" value=""><br>
    <label for="MPG" class="input-label"> MPG: </label><br>
    <input type="text" id=MPG name=MPG class="input-text-box" value=""><br>
    <label for="FG" class="input-label"> FG: </label><br>
    <input type="text" id=FG name=FG class="input-text-box" value=""><br>
    
</div> 
<br> 
<div class="input-row"> <label for="FGA" class="input-label"> FGA: </label><br>
    <input type="text" id=FGA name=FGA class="input-text-box" value=""><br>
    <label for="FG%" class="input-label"> FG%: </label><br>
    <input type="text" id=FG% name=FG% class="input-text-box" value=""><br>
    <label for="3P" class="input-label"> 3P: </label><br>
    <input type="text" id=3P name=3P class="input-text-box" value=""><br>
 