In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from time import time

In [3]:
df1 = pd.read_csv('../data/match_data.csv')
X1 = df1.drop(columns=['Date', 'Team1', 'Team2', 'Score'])
y1 = df1['Score']
ds1_name = "match_data"

df2 = pd.read_csv('../data/cleaned_rounds_data.csv')
df2 = df2.drop(columns=[f'player_{i}_{suffix}' for i in range(1, 11) for suffix in ['team_name', 'name']])
X2 = df2.drop(['round_winner'], axis=1)
y2 = df2['round_winner']
ds2_name = "rounds_data"

df3 = pd.read_csv('../data/cleaned_rounds_data_with_stats.csv')
df3 = df3.drop(columns=[f'player_{i}_{suffix}' for i in range(1, 11) for suffix in ['team_name', 'name']])
X3 = df3.drop(['round_winner'], axis=1)
y3 = df3['round_winner']
ds3_name = "rounds_data_with_stats"

datasets = [(X1, y1, ds1_name), (X2, y2, ds2_name), (X3, y3, ds3_name)]

In [16]:
learning_rate = [0.001, 0.0005, 0.0001]
dropout_rate = [0.3, 0.4, 0.5]
l2_reg = [0.01, 0.001, 0.0001]
batch_size = [32, 64, 128]
epochs = [100, 200, 300, 500, 750, 1000]

In [23]:
best_params = []
for X, y, ds_name in datasets:
    X = StandardScaler().fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42, stratify=y)
    X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5, random_state=42, stratify=y_test)
    best_acc = 0
    best_lr = 0
    best_dropout = 0
    best_l2 = 0
    best_batch_size = 0
    best_epochs = 0
    time_start = time()
    for lr in learning_rate:
        for dr in dropout_rate:
            for l2_r in l2_reg:
                for bs in batch_size:
                    for ep in epochs:
                        model = Sequential()
                        model.add(Input(shape=(X_train.shape[1],)))
                        model.add(Dense(128, activation='relu', kernel_regularizer=l2(l2_r)))
                        model.add(Dropout(dr))
                        model.add(Dense(64, activation='relu', kernel_regularizer=l2(l2_r)))
                        model.add(Dropout(dr))
                        model.add(Dense(32, activation='relu', kernel_regularizer=l2(l2_r)))
                        model.add(Dropout(dr))
                        model.add(Dense(1, activation='sigmoid'))
                        optimizer = Adam(learning_rate=lr)
                        model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
                        es = EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=10)
                        model.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=bs, epochs=ep, callbacks=[es], verbose=0)
                        _, acc = model.evaluate(X_test, y_test, verbose=0)
                        if acc > best_acc:
                            print(f"{ds_name}: new best accuracy: {acc}, learning rate: {lr}, dropout rate: {dr}, l2: {l2_r}, batch size: {bs}, epochs: {ep}")
                            best_acc = acc
                            best_lr = lr
                            best_dropout = dr
                            best_l2 = l2_r
                            best_batch_size = bs
                            best_epochs = ep

    model = Sequential()
    model.add(Input(shape=(X_train.shape[1],)))
    model.add(Dense(128, activation='relu', kernel_regularizer=l2(best_l2)))
    model.add(Dropout(best_dropout))
    model.add(Dense(64, activation='relu', kernel_regularizer=l2(best_l2)))
    model.add(Dropout(best_dropout))
    model.add(Dense(32, activation='relu', kernel_regularizer=l2(best_l2)))
    model.add(Dropout(best_dropout))
    model.add(Dense(1, activation='sigmoid'))
    optimizer = Adam(learning_rate=best_lr)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=10)
    model.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=best_batch_size, epochs=best_epochs, callbacks=[es], verbose=0)

    _, acc = model.evaluate(X_test, y_test, verbose=0)
    best_params.append((ds_name, best_lr, best_dropout, best_l2, best_batch_size, best_epochs, acc))
    print(f"Dataset: {ds_name}, Accuracy: {acc}, Best_LR: {best_lr}, Best_Dropout: {best_dropout}, Best_L2: {best_l2}, Best_BS: {best_batch_size}, Best_Epochs: {best_epochs}, Duration: {time_end - time_start}")


match_data: new best accuracy: 0.60665363073349, learning rate: 0.001, dropout rate: 0.3, l2: 0.01, batch size: 32, epochs: 100
match_data: new best accuracy: 0.6203522682189941, learning rate: 0.001, dropout rate: 0.3, l2: 0.01, batch size: 32, epochs: 300
match_data: new best accuracy: 0.6223092079162598, learning rate: 0.001, dropout rate: 0.4, l2: 0.0001, batch size: 128, epochs: 1000
match_data: new best accuracy: 0.6301369667053223, learning rate: 0.001, dropout rate: 0.5, l2: 0.0001, batch size: 64, epochs: 500
Dataset: match_data, Accuracy: 0.6203522682189941, Best_LR: 0.001, Best_Dropout: 0.5, Best_L2: 0.0001, Best_BS: 64, Best_Epochs: 500, Duration: 3891.7956733703613
rounds_data: new best accuracy: 0.7697903513908386, learning rate: 0.001, dropout rate: 0.3, l2: 0.01, batch size: 32, epochs: 100
rounds_data: new best accuracy: 0.7732135057449341, learning rate: 0.001, dropout rate: 0.3, l2: 0.01, batch size: 32, epochs: 200
rounds_data: new best accuracy: 0.7890458106994629,

In [24]:
for ds_name, best_lr, best_dropout, best_l2, best_batch_size, best_epochs, acc in best_params:
    print(f"Dataset: {ds_name}, Accuracy: {acc}, Best_LR: {best_lr}, Best_Dropout: {best_dropout}, Best_L2: {best_l2}, Best_BS: {best_batch_size}, Best_Epochs: {best_epochs}")

Dataset: match_data, Accuracy: 0.6203522682189941, Best_LR: 0.001, Best_Dropout: 0.5, Best_L2: 0.0001, Best_BS: 64, Best_Epochs: 500
Dataset: rounds_data, Accuracy: 0.7762088179588318, Best_LR: 0.0005, Best_Dropout: 0.3, Best_L2: 0.001, Best_BS: 64, Best_Epochs: 1000
Dataset: rounds_data_with_stats, Accuracy: 0.7903294563293457, Best_LR: 0.0001, Best_Dropout: 0.3, Best_L2: 0.01, Best_BS: 128, Best_Epochs: 300
