In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('/content/sample_data/KBO increased data_over 30 AB.csv')

In [3]:
R = df["R"].to_numpy()
R = R.reshape(-1, 1)

RBI = df["RBI"].to_numpy()
RBI = RBI.reshape(-1, 1)

WAR = df["WAR"].to_numpy()
WAR = WAR.reshape(-1, 1)

df = np.concatenate((R, RBI, WAR), axis=1)

# Split train set, val set and test set
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(df, test_size=0.2, random_state=42)
train_set, val_set = train_test_split(train_set, test_size=0.2, random_state=42)

In [4]:
X_train = train_set[:, 0:2]
y_train = train_set[:, 2]
X_val = val_set[:, 0:2]
y_val = val_set[:, 2]
X_test = test_set[:, 0:2]
y_test = test_set[:, 2]

In [5]:
%pip install -q -U keras-tuner

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/129.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m122.9/129.1 kB[0m [31m3.9 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [8]:
import tensorflow as tf
import keras_tuner as kt

def rmse(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean(tf.square(y_true - y_pred)))

tf.random.set_seed(42)

def build_model(hp):
    n_hidden = hp.Int("n_hidden", min_value=4, max_value=12, default=4)
    n_neurons = hp.Int("n_neurons", min_value=32, max_value=128)
    learning_rate = hp.Float("learning_rate", min_value=1e-4, max_value=1e-2,
                             sampling="log")
    optimizer = hp.Choice("optimizer", values=["adam", 'NAG', 'RMSprop'])

    if optimizer == "NAG":
        optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9, nesterov=True, weight_decay=1e-4)
    elif optimizer == "RMSprop":
        optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate, rho=0.9, weight_decay=1e-4)
    else:
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, weight_decay=1e-4)

    model = tf.keras.Sequential()

    for _ in range(n_hidden):
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Dense(n_neurons, activation="relu"))

    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dense(1))

    model.compile(loss=rmse, optimizer=optimizer)
    return model

In [10]:
random_search_tuner = kt.RandomSearch(
    build_model, objective='val_loss', max_trials=50, overwrite=True,
    directory='Predict WAR', project_name='my_rnd_search', seed=42
)
random_search_tuner.search(
    X_train, y_train, epochs=100,
    validation_data=(X_val, y_val), batch_size=32,
    callbacks=[tf.keras.callbacks.EarlyStopping(patience=10, monitor='val_loss', restore_best_weights=True)])

Trial 50 Complete [00h 00m 23s]
val_loss: 0.9285913705825806

Best val_loss So Far: 0.9285913705825806
Total elapsed time: 00h 17m 36s


In [11]:
random_search_tuner.results_summary()

Results summary
Results in Predict WAR/my_rnd_search
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 49 summary
Hyperparameters:
n_hidden: 11
n_neurons: 43
learning_rate: 0.005343634220268847
optimizer: RMSprop
Score: 0.9285913705825806

Trial 31 summary
Hyperparameters:
n_hidden: 5
n_neurons: 54
learning_rate: 0.003098269932407016
optimizer: adam
Score: 1.0014435052871704

Trial 43 summary
Hyperparameters:
n_hidden: 8
n_neurons: 36
learning_rate: 0.005218086982564764
optimizer: RMSprop
Score: 1.0239744186401367

Trial 23 summary
Hyperparameters:
n_hidden: 8
n_neurons: 96
learning_rate: 0.0014172041528226799
optimizer: adam
Score: 1.065461277961731

Trial 21 summary
Hyperparameters:
n_hidden: 9
n_neurons: 36
learning_rate: 0.007586966748878234
optimizer: NAG
Score: 1.0750149488449097

Trial 03 summary
Hyperparameters:
n_hidden: 12
n_neurons: 40
learning_rate: 0.008547485565344062
optimizer: adam
Score: 1.0835633277893066

Trial 18 summary
Hyperparameters:
n_hi

In [12]:
top3_params = random_search_tuner.get_best_hyperparameters(num_trials=3)
top3_params[0].values

{'n_hidden': 11,
 'n_neurons': 43,
 'learning_rate': 0.005343634220268847,
 'optimizer': 'RMSprop'}

In [13]:
best_trial = random_search_tuner.oracle.get_best_trials(num_trials=1)[0]
best_trial.summary()

Trial 49 summary
Hyperparameters:
n_hidden: 11
n_neurons: 43
learning_rate: 0.005343634220268847
optimizer: RMSprop
Score: 0.9285913705825806


In [14]:
best_model = random_search_tuner.get_best_models(num_models=1)[0]

  saveable.load_own_variables(weights_store.get(inner_path))


In [15]:
y_pred = best_model.predict(X_test)
rmse = tf.sqrt(tf.reduce_mean(tf.square(y_test - y_pred)))
rmse

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 569ms/step


<tf.Tensor: shape=(), dtype=float64, numpy=2.135623931049975>

In [17]:
# Hwang WAR(-0.30) Prediction
hwang = best_model.predict(np.array([[60, 58]]))
hwang

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 345ms/step


array([[1.4335467]], dtype=float32)

In [18]:
# Rojas WAR(6.50) Prediction
Rojas = best_model.predict(np.array([[108, 112]]))
Rojas

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step


array([[6.7689238]], dtype=float32)

In [19]:
# Kim Do Yeong WAR(8.32) Prediction
DO_YEONG = best_model.predict(np.array([[143, 109]]))
DO_YEONG

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step


array([[11.544644]], dtype=float32)

In [20]:
# Choi Jung WAR(4.55) Prediction
Choi_Jung = best_model.predict(np.array([[93, 107]]))
Choi_Jung

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step


array([[6.072708]], dtype=float32)

In [21]:
# Kang Baek Ho WAR(2.12) Prediction
BaekHo = best_model.predict(np.array([[92, 96]]))
BaekHo

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step


array([[5.411833]], dtype=float32)