In [88]:
import numpy as np
import torch
import wandb
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import f1_score, classification_report
from torch import optim, nn
import torchmetrics
from tqdm import tqdm
import lightning as L
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.loggers import TensorBoardLogger, WandbLogger, CSVLogger

if torch.cuda.is_available():
    print(f'PyTorch version: {torch.__version__}')
    print('*' * 10)
    print(f'_CUDA version: ')
    !nvcc --version
    print('*' * 10)
    print(f'CUDNN version: {torch.backends.cudnn.version()}')
    print(f'Available GPU devices: {torch.cuda.device_count()}')
    print(f'Device Name: {torch.cuda.get_device_name()}')
    device = "gpu"
else:
    device = "cpu"
print(f"Using {device} device")
wandb.login()
logger1 = WandbLogger(project='leaguify', log_model='all')
logger2 = TensorBoardLogger('lightning_logs')
csv_logger = CSVLogger('logs', name='leaguify')

PyTorch version: 2.1.0+cu121
**********
_CUDA version: 
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:09:35_Pacific_Daylight_Time_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0
**********
CUDNN version: 8801
Available GPU devices: 1
Device Name: NVIDIA GeForce RTX 2080
Using gpu device


In [89]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout_prob, output_size=1, activation=nn.ReLU(),
                 decrease_size=False):
        super(NeuralNetwork, self).__init__()
        self.dropout = nn.Dropout(dropout_prob)
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.input_size = input_size
        self.output_size = output_size
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential()
        self.linear_relu_stack.append(nn.Linear(input_size, hidden_size))
        for i in range(num_layers - 1):
            if decrease_size:
                next_hidden_size = int(self.hidden_size // 2)
            else:
                next_hidden_size = self.hidden_size
            self.linear_relu_stack.append(self.dropout)
            self.linear_relu_stack.append(nn.BatchNorm1d(self.hidden_size))
            self.linear_relu_stack.append(nn.Linear(self.hidden_size, next_hidden_size))
            self.linear_relu_stack.append(activation)
            self.hidden_size = next_hidden_size
        self.linear_relu_stack.append(nn.Linear(self.hidden_size, self.output_size))
        self.linear_relu_stack.append(nn.Sigmoid())

    def forward(self, x):
        """
        
        :param x: 
        :return: 
        """
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [90]:
class LNN(L.LightningModule):
    def __init__(self, input_size, hidden_size, num_layers, dropout_prob, output_size=1, activation=nn.ReLU(),
                 decrease_size=False):
        super().__init__()
        self.model = NeuralNetwork(input_size, hidden_size, num_layers, dropout_prob, output_size, activation,
                                   decrease_size)
        self.criterion = nn.BCELoss()
        self.save_hyperparameters()
        self.accuracy = torchmetrics.classification.BinaryAccuracy()
        self.f1 = torchmetrics.classification.BinaryF1Score()
        self.confusion_matrix = torchmetrics.classification.BinaryConfusionMatrix()

    def training_step(self, batch, batch_idx):
        x, y = batch
        x = x.type(torch.float32)
        y = y.type(torch.float32)
        y_hat = self.model(x).squeeze(-1)
        loss = self.criterion(y_hat, y)
        self.log('train_loss', loss, prog_bar=True)
        self.log('train_acc', self.accuracy(y_hat, y), prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        x = x.type(torch.float32)
        y = y.type(torch.float32)
        y_hat = self.model(x).squeeze(-1)
        loss = self.criterion(y_hat, y)
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', self.accuracy(y_hat, y), prog_bar=True)
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        x = x.type(torch.float32)
        y = y.type(torch.float32)
        y_hat = self.model(x).squeeze(-1)
        loss = self.criterion(y_hat, y)
        self.log('test_loss', loss, prog_bar=True)
        self.log('test_acc', self.accuracy(y_hat, y), prog_bar=True)
        self.log('test_f1', self.f1(y_hat, y), prog_bar=True)
        self.log('test_confusion_matrix', self.confusion_matrix(y_hat, y), prog_bar=True)
        return loss

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=1e-3)
        return optimizer

In [91]:
class StaticDataset(Dataset):
    def __init__(self, data_dir, transform=None, target_transform=None):
        self.data = torch.tensor(np.load(data_dir)[:, :-1], dtype=torch.float32, )
        self.labels = torch.tensor(np.load(data_dir)[:, -1], dtype=torch.int64)
        self.transform = transform
        self.target_transform = target_transform
        self.print_statistics()

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx, 1:]
        label = self.labels[idx]
        if self.transform:
            sample = self.transform(sample)
        if self.target_transform:
            label = self.target_transform(label)
        return sample, label

    def print_statistics(self):
        print(f'Number of samples: {len(self.data)}')
        print(f'Number of features: {len(self.data[0])}')
        print(f'Number of labels: {len(self.labels)}')
        print(f'Number of classes: {len(np.unique(self.labels.cpu().numpy()))}')
        print(f'Number of samples per class: {np.bincount(self.labels.cpu().numpy())}')

In [92]:
def get_train_val_data(val_split=0.8):
    """
    :param val_split: 
    :return: 
    """
    dataset = StaticDataset('../data/processed/train_static.npy')
    train_len = int(len(dataset) * val_split)
    val_len = len(dataset) - train_len
    print(f'train_len: {train_len}, val_len: {val_len}')
    return torch.utils.data.random_split(dataset, [train_len, val_len])

In [93]:
def get_test_data():
    """
    
    :return: 
    """
    return StaticDataset('../data/processed/test_static.npy')

In [94]:
def make_loader(dataset, batch_size=64, shuffle=True):
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=0)

In [95]:
train_data, val_data = get_train_val_data()
train_loader = make_loader(train_data)
val_loader = make_loader(val_data, shuffle=False)
test_loader = make_loader(get_test_data(), shuffle=False)

Number of samples: 16159
Number of features: 330
Number of labels: 16159
Number of classes: 2
Number of samples per class: [7692 8467]
train_len: 12927, val_len: 3232
Number of samples: 1995
Number of features: 330
Number of labels: 1995
Number of classes: 2
Number of samples per class: [ 959 1036]


In [96]:
model = LNN(input_size=329, hidden_size=256, num_layers=5, dropout_prob=0.5, output_size=1, activation=nn.ReLU(),
            decrease_size=True)

In [97]:
trainer = L.Trainer(max_epochs=100, accelerator=device, callbacks=[EarlyStopping(monitor='val_loss', patience=10,
                                                                                 mode='min')], profiler='simple',
                    logger=[logger1, logger2, csv_logger])
trainer.fit(model, train_loader, val_loader)
trainer.test(model, test_loader)

wandb.finish()

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name             | Type                  | Params
-----------------------------------------------------------
0 | model            | NeuralNetwork         | 129 K 
1 | criterion        | BCELoss               | 0     
2 | accuracy         | BinaryAccuracy        | 0     
3 | f1               | BinaryF1Score         | 0     
4 | confusion_matrix | BinaryConfusionMatrix | 0     
-----------------------------------------------------------
129 K     Trainable params
0         Non-trainable params
129 K     Total params
0.517     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

FIT Profiler Report

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|  Action                                                                                                                                                         	|  Mean duration (s)	|  Num calls      	|  Total time (s) 	|  Percentage %   	|
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|  Total                                                                                                                                                          	|  -              	|  242965    

Testing: |          | 0/? [00:00<?, ?it/s]

TEST Profiler Report

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|  Action                                                                                                                                                         	|  Mean duration (s)	|  Num calls      	|  Total time (s) 	|  Percentage %   	|
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|  Total                                                                                                                                                          	|  -              	|  243513   

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc            0.7157894968986511
        test_loss           0.5450858473777771
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
test_acc,▁
test_loss,▁
train_acc,▁▂▃▄▅▃▅▆▃▅▆▅▃▆▇▆▇▆▆▄▅▆▆▅▅▆▄▆▆▅▇▆█▅▆▇▅▄█▇
train_loss,█▇▇▆▄▇▄▄▅▄▄▅▇▄▄▃▂▃▄▄▄▇▃▅▃▂▅▂▆▄▂▄▂▅▄▄▄█▁▂
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
val_acc,▁▅▆▆▇█▆▆▆▇██▆▇▆█▇▄▇▅▅▅▆▃▄▆
val_loss,█▃▃▂▂▂▁▂▂▂▂▁▂▂▂▁▂▂▂▂▂▂▂▃▂▂

0,1
epoch,26.0
test_acc,0.71579
test_loss,0.54509
train_acc,0.8125
train_loss,0.44383
trainer/global_step,5252.0
val_acc,0.70606
val_loss,0.56249
