In [65]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import f1_score, classification_report
from torch import optim, nn
import wandb
from tqdm import tqdm

wandb.login()

True

In [66]:
config = {
    "learning_rate": 0.005,
    "architecture": "NN",
    "dataset": "static_1.1",
    "epochs": 50,
    "classes": 2,
    "batch_size": 64,
    "num_layers": 15,
    "hidden_size": 16384,
    "dropout_prob": 0.1,
    "input_size": 329,
    "output_size": 1,
    "optimizer": "Adam",
    "loss": "CrossEntropyLoss",
    "activation": "ReLU",
    "decrease_size": True,
}

In [67]:
def model_pipeline(hyperparameters):
    with wandb.init(project="leaguify", config=hyperparameters):
        # access all HPs through wandb.config, so logging matches execution!
        config = wandb.config
        # make the model, data, and optimization problem
        model, train_loader, val_loader, test_loader, criterion, optimizer = make(config)
        print(model)

        # and use them to train the model
        train(model, train_loader, val_loader, criterion, optimizer, config)

        # and test its final performance
        test(model, val_loader)
        test(model, test_loader)

    return model

In [68]:
def get_activation(name):
    if name == 'ReLU':
        return nn.ReLU()
    elif name == 'LeakyReLU':
        return nn.LeakyReLU()
    elif name == 'ELU':
        return nn.ELU()
    elif name == 'SELU':
        return nn.SELU()
    elif name == 'Tanh':
        return nn.Tanh()
    elif name == 'Sigmoid':
        return nn.Sigmoid()
    else:
        raise ValueError(f'Activation {name} not supported')

In [69]:
class StaticDataset(Dataset):
    def __init__(self, data_dir, transform=None, target_transform=None):
        self.data = torch.tensor(np.load(data_dir)[:, :-1], dtype=torch.float32, device=device)
        self.labels = torch.tensor(np.load(data_dir)[:, -1], dtype=torch.int64, device=device)
        self.transform = transform
        self.target_transform = target_transform
        self.print_statistics()

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx, 1:]
        label = self.labels[idx]
        if self.transform:
            sample = self.transform(sample)
        if self.target_transform:
            label = self.target_transform(label)
        return sample, label

    def print_statistics(self):
        print(f'Number of samples: {len(self.data)}')
        print(f'Number of features: {len(self.data[0])}')
        print(f'Number of classes: {len(np.unique(self.labels.cpu().numpy()))}')
        print(f'Number of samples per class: {np.bincount(self.labels.cpu().numpy())}')

In [70]:
def make(config):
    train_data, val_data = get_train_data()
    train_loader = make_loader(train_data, batch_size=config.batch_size)
    val_loader = make_loader(val_data, batch_size=config.batch_size)
    test_loader = make_loader(get_test_data(), batch_size=config.batch_size)
    activation = get_activation(config.activation)

    model = NeuralNetwork(config.input_size, config.hidden_size, config.num_layers, config.dropout_prob,
                          config.classes, activation, config.decrease_size).to(device)
    criterion = nn.CrossEntropyLoss()
    if config.optimizer == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=config.learning_rate)
    else:
        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    return model, train_loader, val_loader, test_loader, criterion, optimizer

In [71]:
def get_train_data(val_split=0.8):
    dataset = StaticDataset('../data/processed/train_static.npy')
    train_len = int(len(dataset) * val_split)
    val_len = len(dataset) - train_len
    print(f'train_len: {train_len}, val_len: {val_len}')
    return torch.utils.data.random_split(dataset, [train_len, val_len])

In [72]:
def get_test_data():
    return StaticDataset('../data/processed/test_static.npy')

In [73]:
def make_loader(dataset, batch_size=64):
    return DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0)

In [74]:
device = (
    "cuda" if torch.cuda.is_available()
    else "cpu"
)
if torch.cuda.is_available():
    print(f'PyTorch version: {torch.__version__}')
    print('*' * 10)
    print(f'_CUDA version: ')
    !nvcc --version
    print('*' * 10)
    print(f'CUDNN version: {torch.backends.cudnn.version()}')
    print(f'Available GPU devices: {torch.cuda.device_count()}')
    print(f'Device Name: {torch.cuda.get_device_name()}')
print(f"Using {device} device")

PyTorch version: 2.1.0+cu121
**********
_CUDA version: 
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:09:35_Pacific_Daylight_Time_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0
**********
CUDNN version: 8801
Available GPU devices: 1
Device Name: NVIDIA GeForce RTX 2080
Using cuda device


In [75]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout_prob, output_size=1, activation=nn.ReLU(),
                 decrease_size=False):
        super(NeuralNetwork, self).__init__()
        self.dropout = nn.Dropout(dropout_prob)
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.input_size = input_size
        self.output_size = output_size
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential()
        self.linear_relu_stack.append(nn.Linear(input_size, hidden_size))
        for i in range(num_layers - 1):
            if decrease_size:
                next_hidden_size = int(self.hidden_size // 2)
            else:
                next_hidden_size = self.hidden_size
            self.linear_relu_stack.append(self.dropout)
            self.linear_relu_stack.append(nn.BatchNorm1d(self.hidden_size))
            self.linear_relu_stack.append(nn.Linear(self.hidden_size, next_hidden_size))
            self.linear_relu_stack.append(activation)
            self.hidden_size = next_hidden_size
        self.linear_relu_stack.append(nn.Linear(self.hidden_size, self.output_size))
        self.linear_relu_stack.append(nn.Sigmoid())

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [76]:
def train(model, train_loader, val_loader, criterion, optimizer, config):
    wandb.watch(model, criterion, log="all", log_freq=10)
    example_count = 0
    batch_count = 0
    for epoch in tqdm(range(config.epochs)):
        model.train()
        for _, (matches, labels) in enumerate(train_loader):
            matches, labels = matches.to(device), labels.to(device)
            loss = train_batch(matches, labels, model, optimizer, criterion)
            example_count += len(matches)
            batch_count += 1
            if ((batch_count + 1) % 25) == 0:
                train_log(loss, example_count, epoch)
                test(model, val_loader)

In [77]:
def train_batch(matches, labels, model, optimizer, criterion):
    matches, labels = matches.to(device), labels.to(device)

    # Forward pass ➡
    outputs = model(matches)
    loss = criterion(outputs, labels)

    # Backward pass ⬅
    optimizer.zero_grad()
    loss.backward()

    # Step with optimizer
    optimizer.step()

    return loss

In [78]:
def train_log(loss, example_count, epoch):
    wandb.log({"epoch": epoch, "loss": loss}, step=example_count)
    print(f"Loss after {str(example_count).zfill(5)} examples: {loss:.3f}")

In [79]:
def test(model, test_loader):
    model.eval()

    # Run the model on some test examples
    with torch.no_grad():
        correct, total = 0, 0
        y_pred = []
        y_true = []
        for matches, labels in test_loader:
            matches, labels = matches.to(device), labels.to(device)
            outputs = model(matches)
            _, predicted = torch.max(outputs.data, 1)
            y_pred.extend(predicted.cpu().numpy())
            y_true.extend(labels.cpu().numpy())
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        print(classification_report(y_pred, y_true))

        print(f"Accuracy of the model on the {total} " +
              f"test matches: {correct / total:%}")

        wandb.log({"test_accuracy": correct / total})

In [80]:
model = model_pipeline(config)

Number of samples: 16159
Number of features: 330
Number of classes: 2
Number of samples per class: [7692 8467]
train_len: 12927, val_len: 3232
Number of samples: 1995
Number of features: 330
Number of classes: 2
Number of samples per class: [ 959 1036]
NeuralNetwork(
  (dropout): Dropout(p=0.1, inplace=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=329, out_features=16384, bias=True)
    (1): Dropout(p=0.1, inplace=False)
    (2): BatchNorm1d(16384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Linear(in_features=16384, out_features=8192, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.1, inplace=False)
    (6): BatchNorm1d(8192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): Linear(in_features=8192, out_features=4096, bias=True)
    (8): ReLU()
    (9): Dropout(p=0.1, inplace=False)
    (10): BatchNorm1d(4096, eps=1e-05, momentum=0.1, affine=True, track_running_sta

  0%|          | 0/50 [00:00<?, ?it/s]

Loss after 01536 examples: 0.698


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       1.00      0.48      0.64      3232
           1       0.00      0.00      0.00         0

    accuracy                           0.48      3232
   macro avg       0.50      0.24      0.32      3232
weighted avg       1.00      0.48      0.64      3232

Accuracy of the model on the 3232 test matches: 47.586634%
Loss after 03136 examples: 0.693
              precision    recall  f1-score   support

           0       0.55      0.49      0.52      1732
           1       0.48      0.54      0.50      1500

    accuracy                           0.51      3232
   macro avg       0.51      0.51      0.51      3232
weighted avg       0.51      0.51      0.51      3232

Accuracy of the model on the 3232 test matches: 50.990099%
Loss after 04736 examples: 0.693
              precision    recall  f1-score   support

           0       0.98      0.48      0.64      3151
           1       0.03      0.54      0.05        81

  2%|▏         | 1/50 [04:37<3:46:23, 277.21s/it]

Loss after 14335 examples: 0.702
              precision    recall  f1-score   support

           0       0.36      0.46      0.40      1198
           1       0.62      0.52      0.56      2034

    accuracy                           0.50      3232
   macro avg       0.49      0.49      0.48      3232
weighted avg       0.52      0.50      0.50      3232

Accuracy of the model on the 3232 test matches: 49.504950%
Loss after 15935 examples: 0.692
              precision    recall  f1-score   support

           0       0.84      0.48      0.61      2727
           1       0.16      0.52      0.24       505

    accuracy                           0.48      3232
   macro avg       0.50      0.50      0.42      3232
weighted avg       0.74      0.48      0.55      3232

Accuracy of the model on the 3232 test matches: 48.298267%
Loss after 17535 examples: 0.695
              precision    recall  f1-score   support

           0       0.98      0.48      0.64      3148
           1       0

  2%|▏         | 1/50 [06:50<5:35:03, 410.27s/it]
Traceback (most recent call last):
  File "C:\Users\morit\AppData\Local\Temp\ipykernel_20652\2392872062.py", line 10, in model_pipeline
    train(model, train_loader, val_loader, criterion, optimizer, config)
  File "C:\Users\morit\AppData\Local\Temp\ipykernel_20652\1603276510.py", line 9, in train
    loss = train_batch(matches, labels, model, optimizer, criterion)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\morit\AppData\Local\Temp\ipykernel_20652\146055341.py", line 5, in train_batch
    outputs = model(matches)
              ^^^^^^^^^^^^^^
  File "C:\Users\morit\AppData\Local\pypoetry\Cache\virtualenvs\leaguify-VaCbhr8h-py3.11\Lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\morit\AppData\Local\pypoetry\Cache\virtualenvs\leaguify-VaCbhr8h-py3.11\Lib\site-pac

0,1
epoch,▁▁▁▁▁▁▁▁█████
loss,▅▂▂▂▂▂▂▂█▁▃▂▂
test_accuracy,▁█▂▁▂▂▂▂▅▃▂▁▁

0,1
epoch,1.0
loss,0.69315
test_accuracy,0.47556


KeyboardInterrupt: 