In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [45]:
df = pd.read_csv('/content/sample_data/final_df.csv', index_col=0)

In [47]:
# Split train and test
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(df, test_size=0.2, random_state=42)
train_set, val_set = train_test_split(train_set, test_size=0.2, random_state=42)

# split features and labels
X_train = train_set.drop("WAR", axis=1).to_numpy()
y_train = train_set["WAR"].copy().to_numpy()
X_val = val_set.drop("WAR", axis=1).to_numpy()
y_val = val_set["WAR"].copy().to_numpy()
X_test = test_set.drop("WAR", axis=1).to_numpy()
y_test = test_set["WAR"].copy().to_numpy()

In [48]:
X_train.shape

(121, 12)

In [5]:
%pip install -q -U keras-tuner

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/129.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [50]:
import tensorflow as tf
import keras_tuner as kt

def rmse(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean(tf.square(tf.math.exp(y_true) - tf.math.exp(y_pred))))

tf.random.set_seed(42)

def build_model(hp):
    n_hidden = hp.Int("n_hidden", min_value=4, max_value=12, default=4)
    n_neurons = hp.Int("n_neurons", min_value=32, max_value=128)
    learning_rate = hp.Float("learning_rate", min_value=1e-4, max_value=1e-2,
                             sampling="log")
    optimizer = hp.Choice("optimizer", values=["adam", 'NAG', 'RMSprop'])

    if optimizer == "NAG":
        optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=0.9, nesterov=True, weight_decay=1e-4)
    elif optimizer == "RMSprop":
        optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate, rho=0.9, weight_decay=1e-4)
    else:
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, weight_decay=1e-4)

    model = tf.keras.Sequential()

    for _ in range(n_hidden):
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Dense(n_neurons, activation="relu"))

    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dense(1))

    model.compile(loss=rmse, optimizer=optimizer)
    return model

In [51]:
random_search_tuner = kt.RandomSearch(
    build_model, objective='val_loss', max_trials=50, overwrite=True,
    directory='Predict WAR', project_name='my_rnd_search', seed=42
)
random_search_tuner.search(
    X_train, y_train, epochs=100,
    validation_data=(X_val, y_val), batch_size=32,
    callbacks=[tf.keras.callbacks.EarlyStopping(patience=10, monitor='val_loss', restore_best_weights=True)])

Trial 50 Complete [00h 00m 23s]
val_loss: 2.3556551933288574

Best val_loss So Far: 0.8983734846115112
Total elapsed time: 00h 20m 17s


In [58]:
random_search_tuner.results_summary()

Results summary
Results in Predict WAR/my_rnd_search
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 43 summary
Hyperparameters:
n_hidden: 8
n_neurons: 36
learning_rate: 0.005218086982564764
optimizer: RMSprop
Score: 0.8983734846115112

Trial 24 summary
Hyperparameters:
n_hidden: 5
n_neurons: 32
learning_rate: 0.0034928656354620575
optimizer: NAG
Score: 1.0388453006744385

Trial 37 summary
Hyperparameters:
n_hidden: 4
n_neurons: 105
learning_rate: 0.0005580490517344697
optimizer: NAG
Score: 1.082019329071045

Trial 06 summary
Hyperparameters:
n_hidden: 4
n_neurons: 39
learning_rate: 0.00304325168554751
optimizer: RMSprop
Score: 1.1811290979385376

Trial 20 summary
Hyperparameters:
n_hidden: 8
n_neurons: 37
learning_rate: 0.008347597753558379
optimizer: RMSprop
Score: 1.1982266902923584

Trial 40 summary
Hyperparameters:
n_hidden: 10
n_neurons: 40
learning_rate: 0.0006220323522895685
optimizer: NAG
Score: 1.2106966972351074

Trial 27 summary
Hyperparameters:
n_

In [59]:
top3_params = random_search_tuner.get_best_hyperparameters(num_trials=3)
top3_params[0].values

{'n_hidden': 8,
 'n_neurons': 36,
 'learning_rate': 0.005218086982564764,
 'optimizer': 'RMSprop'}

In [60]:
best_trial = random_search_tuner.oracle.get_best_trials(num_trials=1)[0]
best_trial.summary()

Trial 43 summary
Hyperparameters:
n_hidden: 8
n_neurons: 36
learning_rate: 0.005218086982564764
optimizer: RMSprop
Score: 0.8983734846115112


In [88]:
best_model = random_search_tuner.get_best_models(num_models=1)[0]

  saveable.load_own_variables(weights_store.get(inner_path))


In [89]:
from sklearn.metrics import root_mean_squared_error
y_pred = best_model.predict(X_train)
rmse = root_mean_squared_error(np.exp(y_train) + 1.351, np.exp(y_pred) + 1.351)
rmse

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step


0.9084251121791694

In [90]:
y_pred = best_model.predict(X_test)
rmse = root_mean_squared_error(np.exp(y_test) + 1.351, np.exp(y_pred) + 1.351)
rmse

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 


1.1184691675816831

In [91]:
# Chang-Ki Hong (6.67) 2023
ChangKiHong = np.array([400, np.log(109), np.log(1), np.log(216), np.log(65), np.log(88), np.log(3), np.log(6), 0.332, 0.444, 0.412, 0.856])
ChangKiHong = tf.constant(ChangKiHong.reshape(-1, 12))
y_pred = best_model.predict(ChangKiHong)
y_pred = np.exp(y_pred) - 1.351
y_pred

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step


array([[14.139984]], dtype=float32)

In [92]:
# Austin (4.97) 2023
Auntin = np.array([520, np.log(87), np.log(23), np.log(269), np.log(95), np.log(53), np.log(2), np.log(7), 0.313, 0.376, 0.517, 0.893])
Auntin = tf.constant(Auntin.reshape(-1, 12))
y_pred = best_model.predict(Auntin)
y_pred = np.exp(y_pred) - 1.351
y_pred

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step


array([[38.2664]], dtype=float32)

In [93]:
# Jae-Gyun Hwang (3.14) 2023
JaeGyunHwang = np.array([407, np.log(62), np.log(6), np.log(168), np.log(49), np.log(45), np.log(1), np.log(1), 0.295, 0.366, 0.413, 0.779])
JaeGyunHwang = tf.constant(JaeGyunHwang.reshape(-1, 12))
y_pred = best_model.predict(JaeGyunHwang)
y_pred = np.exp(y_pred) - 1.351
y_pred

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step


array([[6.059135]], dtype=float32)

In [94]:
# Shin-Soo Choo (1.72) 2023
Choo = np.array([382, np.log(65), np.log(12), np.log(152), np.log(41), np.log(65), np.log(1), np.log(2), 0.254, 0.379, 0.398, 0.777])
Choo = tf.constant(Choo.reshape(-1, 12))
y_pred = best_model.predict(Choo)
y_pred = np.exp(y_pred) - 1.351
y_pred

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step


array([[7.387658]], dtype=float32)

In [95]:
# EunWonJung (0.74) 2023
EunWonJung = np.array([388, np.log(50), np.log(2), np.log(104), np.log(30), np.log(62), np.log(1), np.log(1), 0.222, 0.333, 0.268, 0.601])
EunWonJung = tf.constant(EunWonJung.reshape(-1, 12))
y_pred = best_model.predict(EunWonJung)
y_pred = np.exp(y_pred) - 1.351
y_pred

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step


array([[5.786073]], dtype=float32)