In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import optuna

In [2]:
from preprocessing import *
data_path = "data/game.csv"
X_train, X_test, y_train, y_test = prep_all(data_path)

X_train = X_train.values.reshape(-1, 1, 135)  # Reshape to (32520, 1, 136)
X_test = X_test.values.reshape(-1, 1, 135)    # Reshape to (N, 1, 136), where N is the number of test samples

train_data = TensorDataset(torch.tensor(X_train), torch.tensor(y_train.to_numpy()))
test_data = TensorDataset(torch.tensor(X_test), torch.tensor(y_test.to_numpy()))

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
valid_loader = DataLoader(test_data, batch_size=64, shuffle=False)

  rolling_averages = grouped[columns_to_average].apply(lambda x: x.rolling(window=len(x), min_periods=1).mean().shift(1))
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  rolling_averages = grouped[columns_to_average].apply(lambda x: x.rolling(window=len(x), min_periods=1).mean().shift(1))
  df.columns = df.columns.str.replace('_y$', '')


In [3]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out


In [4]:
input_size = X_train.shape[2]  # The number of features in your preprocessed data
hidden_size = 50
num_layers = 1
output_size = 2  # Win or loss (binary classification)

model = LSTMModel(input_size, hidden_size, num_layers, output_size)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [35]:
num_epochs = 20

total_loss = []
acc = []

for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.float(), labels.long()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    val_loss = 0
    total_correct = 0
    total_samples = 0

    with torch.no_grad():
        correct = 0
        total = 0
        for inputs, labels in valid_loader:
            inputs, labels = inputs.float(), labels.long()
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        total_loss.append(loss.item())
        acc.append(100 * correct / total)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Validation Acc: {100 * correct / total:.2f}%')



Epoch [1/20], Loss: 0.0363, Validation Acc: 60.54%
Epoch [2/20], Loss: 0.0772, Validation Acc: 60.39%
Epoch [3/20], Loss: 0.0650, Validation Acc: 60.38%
Epoch [4/20], Loss: 0.0558, Validation Acc: 60.48%
Epoch [5/20], Loss: 0.0478, Validation Acc: 60.48%
Epoch [6/20], Loss: 0.0846, Validation Acc: 60.08%
Epoch [7/20], Loss: 0.0166, Validation Acc: 60.26%
Epoch [8/20], Loss: 0.0198, Validation Acc: 60.42%
Epoch [9/20], Loss: 0.0534, Validation Acc: 60.18%
Epoch [10/20], Loss: 0.1047, Validation Acc: 60.09%
Epoch [11/20], Loss: 0.0368, Validation Acc: 60.48%
Epoch [12/20], Loss: 0.0368, Validation Acc: 60.57%
Epoch [13/20], Loss: 0.0286, Validation Acc: 60.54%
Epoch [14/20], Loss: 0.0846, Validation Acc: 60.33%
Epoch [15/20], Loss: 0.0326, Validation Acc: 60.31%
Epoch [16/20], Loss: 0.0325, Validation Acc: 60.52%
Epoch [17/20], Loss: 0.0427, Validation Acc: 60.39%
Epoch [18/20], Loss: 0.0319, Validation Acc: 60.29%
Epoch [19/20], Loss: 0.1085, Validation Acc: 60.66%
Epoch [20/20], Loss: 

In [36]:
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in valid_loader:
        inputs, labels = inputs.float(), labels.long()
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Test Accuracy: {100 * correct / total:.2f}%')


Test Accuracy: 60.56%


In [37]:
def objective(trial):
    # define hyperparameters to tune
    hidden_size = trial.suggest_categorical('hidden_size', [32, 64, 128, 256])
    num_layers = trial.suggest_int('num_layers', 1, 3)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1)

    # define the model
    model = LSTMModel(input_size, hidden_size, num_layers, output_size)

    # define the optimizer and loss function
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    # train the model
    num_epochs = 20
    for epoch in range(num_epochs):
        for inputs, labels in train_loader:
            inputs, labels = inputs.float(), labels.long()

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    # evaluate the model on the validation set
    with torch.no_grad():
        total_correct = 0
        total_samples = 0
        for inputs, labels in valid_loader:
            inputs, labels = inputs.float(), labels.long()
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            total_correct += (predicted == labels).sum().item()

    # calculate validation accuracy
    val_acc = 100.0 * total_correct / total_samples

    return val_acc


In [38]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)

# print the best hyperparameters and the best validation accuracy
print("Best hyperparameters: ", study.best_params)
print("Best validation accuracy: ", study.best_value)

[32m[I 2023-05-03 13:03:49,272][0m A new study created in memory with name: no-name-c93f77ec-e098-4fee-99c6-7012f9b83754[0m
[32m[I 2023-05-03 13:13:03,151][0m Trial 0 finished with value: 61.36760668829548 and parameters: {'hidden_size': 256, 'num_layers': 3, 'learning_rate': 0.08681364710277525}. Best is trial 0 with value: 61.36760668829548.[0m
[32m[I 2023-05-03 13:18:55,384][0m Trial 1 finished with value: 64.81157973546294 and parameters: {'hidden_size': 256, 'num_layers': 2, 'learning_rate': 0.06945230449415078}. Best is trial 1 with value: 64.81157973546294.[0m
[32m[I 2023-05-03 13:24:34,055][0m Trial 2 finished with value: 61.91664586972798 and parameters: {'hidden_size': 256, 'num_layers': 2, 'learning_rate': 0.010758727969355912}. Best is trial 1 with value: 64.81157973546294.[0m
[32m[I 2023-05-03 13:26:28,574][0m Trial 3 finished with value: 66.3713501372598 and parameters: {'hidden_size': 32, 'num_layers': 3, 'learning_rate': 0.0520992927851131}. Best is trial 