In [1]:
import torchvision
import torch
import torch.nn as nn

from torchvision import transforms

from tqdm import tqdm

from botnet.model.botnet import BoTNet

In [2]:
model = BoTNet(image_size=224, in_channels=3, num_classes=10, layers=50)
print(model)

BoTNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
  (max_pool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
  (stages): ModuleList(
    (0): BoTNetStage(
      (blocks): ModuleList(
        (0): BottleneckBlock(
          (conv1): ConvBNReLULayer(
            (conv): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1))
            (bn): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (act): ReLU()
          )
          (conv2): ConvBNReLULayer(
            (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
            (bn): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (act): ReLU()
          )
          (conv3): ConvBNReLULayer(
            (conv): Conv2d(16, 256, kernel_size=(1, 1), stride=(1, 1))
         

In [3]:
test_in = torch.rand(1, 3, 224, 224)
print(model(test_in).shape)

torch.Size([1, 10])


In [4]:
transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])
dataset1 = torchvision.datasets.CIFAR10('./data', train=True, download=True,
                                        transform=transform)
dataset2 = torchvision.datasets.CIFAR10('./data', train=False, download=True,
                                        transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
train_params = {
    'epochs': 16,
    'lr': 0.1,
    'eval_portion': 0.2,
    'batch_size': 16
}

device = torch.device('cuda')

In [6]:
from torch.utils.data import DataLoader, random_split

EVAL_LENGTH = int(len(dataset1) * train_params['eval_portion'])

train_set, eval_set = random_split(dataset1, [len(dataset1) - EVAL_LENGTH, EVAL_LENGTH])
train_loader = DataLoader(train_set, batch_size=train_params['batch_size'],
                          shuffle=True)

eval_loader = DataLoader(eval_set, batch_size=train_params['batch_size'])

test_loader = DataLoader(dataset2, batch_size=train_params['batch_size'],
                         shuffle=True)


In [7]:
model = model.to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=train_params['lr'], momentum=0.9, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=0.9)

cross_entropy = nn.CrossEntropyLoss()

best_accuracy = 0.0

for e in range(train_params['epochs']):
    train_loss = 0.0
    model.train()
    for images, labels in tqdm(iter(train_loader), desc='Training...'):
        optimizer.zero_grad()

        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = cross_entropy(outputs, labels)
        train_loss += loss.cpu().detach().numpy()

        loss.backward()
        optimizer.step()

    print(f"Training average loss: {train_loss / len(train_loader)}")

    test_acc_count = 0.0
    eval_loss = 0.0

    model.eval()
    with torch.no_grad():
        for images, labels in tqdm(iter(eval_loader), desc='Eval...'):
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)

            loss = cross_entropy(outputs, labels)
            eval_loss += loss.cpu().detach().numpy()

            pred = torch.argmax(outputs, 1)
            test_acc_count += float(torch.sum(pred == labels))

    test_accuracy = float(test_acc_count) / float(len(eval_set))
    eval_loss /= len(eval_loader)

    print(f'Epoch: {e + 1}, eval accuracy {test_accuracy}, eval loss {eval_loss}')
    if test_accuracy > best_accuracy:
        torch.save(model.state_dict(), 'checkpoints/model.pth')
        best_accuracy = test_accuracy

Training...: 100%|██████████| 2500/2500 [05:19<00:00,  7.83it/s]


Training average loss: 2.7398944984912874


Eval...: 100%|██████████| 625/625 [00:33<00:00, 18.56it/s]


Epoch: 1, eval accuracy 0.2634, eval loss 1.9531444427490234


FileNotFoundError: [Errno 2] No such file or directory: 'checkpoints/model.pth'