## Plans for training

### 1. Train city_block to predict cities correctly

### 2. Train country_block to predict countries given the predicted cities

##### I belive passing city to country_block instead of country to city_block will be beneficial to the model, as it is easier to predict country given the city than vice versa. Our model could predict countries with 12% accuracy, so let's be optimistic and say it can predict cities with also 12% given correct country. Our final accuracy will be 12% * 12% = 1.44%, as their's 12% chance that model receives correct country and 12% chance it predict correct country. Now let's say our model can predict cities also with 12% accuracy, but it doesn't need the country, and instead we have a lookup table for which coutnry has which city. Then our accuracy will be 12% as we don't have to predict the country. Sadly we don't have the lookup table, so we have to create a model that acts as the lookup table. Let's say the model can predict country with 50% accuracy given the city. Then our accuracy is 12% * 50% = 6%, which is a lot better and easier that previous idea.

In [1]:
import sys

sys.path.append('../')

import os
import shutil

if os.path.exists('./runs/booking'):
    shutil.rmtree('./runs/booking')


os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ['TORCH_USE_CUDA_DSA'] = '1'

In [2]:
from Model import Model_2 as Model

import torch
import torch.nn as nn
import torch.functional as F

from tqdm import tqdm

from torch.utils.tensorboard import SummaryWriter

from sklearn.model_selection import KFold

writer = SummaryWriter('./runs/booking')

torch.autograd.set_detect_anomaly(True)

2024-01-29 21:03:17.817487: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-01-29 21:03:17.817532: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-01-29 21:03:17.818384: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-01-29 21:03:17.824062: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


<torch.autograd.anomaly_mode.set_detect_anomaly at 0x7f854c7a9990>

In [3]:
BATCH_SIZE = 1024
EPOCHS = 10
LEARNING_RATE = 0.001
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
FOLDS = 10
K_ACCURACY = 4

In [4]:
from torch.utils.data import SubsetRandomSampler, DataLoader
from Dataset import PartDataset

X = torch.load('X.pt')
y_city = torch.load('y_city.pt')

dataset = PartDataset(X, y_city)

In [5]:
def reset_weights(m):
    for layer in m.children():
        if hasattr(layer, 'reset_parameters'):
            layer.reset_parameters()

In [6]:
def accuracy_at_k(outputs, labels, k = 1):
    batch_size = labels.size(0)

    _, pred_indices = outputs.topk(k, 1, True, True)
    correct = torch.sum(torch.argmax(
        labels, dim=1).view(-1, 1) == pred_indices)

    accuracy = correct.item() / batch_size
    return accuracy * 100, correct.item()

In [7]:
def test(test_loader: torch.utils.data.DataLoader, model: nn.Module, criterion: nn.Module):
    loss = 0
    total_correct = 0
    total_samples = 0

    test_loader = tqdm(test_loader, desc='Testing')
    
    with torch.no_grad():
        model.eval()
        for inputs, labels in test_loader:
            inputs = torch.unsqueeze(inputs, 2)

            inputs = inputs.to(torch.float32).to(device)
            labels = labels.to(torch.int64)

            labels = nn.functional.one_hot(labels, num_classes=11987).to(torch.float32).to(device)

            outputs = model(inputs)

            loss += criterion(outputs, labels).item()

            _, correct = accuracy_at_k(outputs, labels, K_ACCURACY)
            total_correct += correct
            total_samples += labels.size(0)

    loss = loss / len(test_loader)
    accuracy = total_correct / total_samples * 100

    return loss, accuracy

In [8]:
def train(epochs: int, train_loader: torch.utils.data.DataLoader, model: nn.Module, optimizer: torch.optim.Optimizer, criterion: nn.Module, save_model: bool = False) -> None:
    for epoch in range(epochs):
        model.train()

        train_loader = tqdm(train_loader, desc='Training')

        running_loss = 0.0
        total_correct = 0
        batch = 0
        total_samples = 0

        for i, (inputs, labels) in enumerate(train_loader, 1):
            inputs = torch.unsqueeze(inputs, 2)

            inputs = inputs.to(torch.float32).to(device)
            labels = labels.to(torch.int64)

            labels = nn.functional.one_hot(labels, num_classes=11987).to(torch.float32).to(device)

            optimizer.zero_grad()

            outputs = model(inputs)

            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            _, correct = accuracy_at_k(outputs, labels, K_ACCURACY)
            total_correct += correct
            total_samples += labels.size(0)

            running_loss += loss.item()

            if i % 100 == 0:
                epoch_loss = running_loss / i
                batch_accuracy = total_correct / total_samples * 100

                writer.add_scalar(f'Loss/train/batch/{epoch}', epoch_loss, batch)
                writer.add_scalar(f'Accuracy/train/batch/{epoch}', batch_accuracy, batch)

                batch += 1

                writer.flush()

        epoch_loss = running_loss / len(train_loader)
        epoch_accuracy = total_correct / total_samples * 100

        print(f"Epoch {epoch} loss: {epoch_loss:.4f}, accuracy: {epoch_accuracy:.2f}%")

        writer.add_scalar('Loss/train/epoch', epoch_loss, epoch)
        writer.add_scalar('Accuracy/train/epoch', epoch_accuracy, epoch)

        writer.flush()

        
        torch.save(model, f'./model_epoch_{epoch}')

In [9]:
def k_fold_cv(k: int, dataset: torch.utils.data.Dataset, model: nn.Module, optimizer: torch.optim.Optimizer, criterion: nn.Module):
    folds = KFold(n_splits=k, shuffle=True)

    for fold, (train_ids, test_ids) in enumerate(folds.split(dataset), 1):
        print(f"Fold {fold}")

        model.apply(reset_weights)

        train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, sampler=SubsetRandomSampler(train_ids))
        test_loader = DataLoader(dataset, batch_size=BATCH_SIZE, sampler=SubsetRandomSampler(test_ids))

        train(EPOCHS, train_loader, model, optimizer, criterion)

        loss, accuracy = test(test_loader, model, criterion)

        print(f"Fold {fold} loss: {loss:.4f}, accuracy: {accuracy:.2f}%")

        writer.add_scalar('Loss/test/fold', loss, fold)
        writer.add_scalar('Accuracy/test/fold', accuracy, fold)

        writer.flush()

        torch.save(model.state_dict(), f'./models/booking/fold_{fold}.pt')        

In [12]:
model = Model().to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss()

In [13]:
# k_fold_cv(FOLDS, dataset, model, optimizer, criterion)
train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
train(EPOCHS, train_loader, model, optimizer, criterion)

Training: 100%|██████████| 1082/1082 [12:10<00:00,  1.48it/s]


Epoch 0 loss: 9.3916, accuracy: 8.00%


Training:   0%|          | 1/1082 [00:01<25:28,  1.41s/it]


OutOfMemoryError: CUDA out of memory. Tried to allocate 376.00 MiB. GPU 0 has a total capacty of 7.78 GiB of which 316.88 MiB is free. Including non-PyTorch memory, this process has 7.04 GiB memory in use. Of the allocated memory 5.26 GiB is allocated by PyTorch, and 1.63 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF