In [1]:
import torch
import Dataset
import Augmentation
import Optimizer
import Model
import Loss
import Validation
from pytorch_multilabel_balanced_sampler.sampler import MultilabelBalancedRandomSampler
from torch.utils.data import DataLoader

In [2]:
# hyper parameter
TOTAL_EPOCH = 50
LEARNING_RATE = 1e-4
BATCH_SIZE = 16
CLASSES_SIZE = 28
device = torch.device('cuda:0')

In [3]:
# initial dataset
transform = Augmentation.get_transform()
dataset_path = '/mnt/train-data1/howard/cvfinal/human-protein-atlas-image-classification/'
train_dataset, validation_dataset = Dataset.get_train_val_dataset(dataset_path + 'train.csv', CLASSES_SIZE,
                                                                  img_folder=dataset_path + 'train/',
                                                                  transform=transform)
train_data_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=36, shuffle=True)
validation_data_loader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=36)

In [4]:
# initial model
model = Model.get_model(CLASSES_SIZE)
model.to(device)

Sequential(
  (0): Conv2d(4, 3, kernel_size=(1, 1), stride=(1, 1))
  (1): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act1): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act1): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act2): ReLU(inplace=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0

In [5]:
# initial optimizer and scheduler
optimizer = Optimizer.get_optimizer(model, LEARNING_RATE)
scheduler = Optimizer.get_scheduler(optimizer, TOTAL_EPOCH)

In [6]:
# loss
loss_func = Loss.BCELoss()

In [7]:
import wandb

wandb.init(project="cvfinal", entity="suyihao1999",
           config={
               "learning_rate": LEARNING_RATE,
               "epochs": TOTAL_EPOCH,
               "batch_size": BATCH_SIZE,
               "loss": 'BCE',
               "model": 'resnet50'
           })


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33msuyihao1999[0m (use `wandb login --relogin` to force relogin)


In [None]:
# training loop
model.train()
cnt=0
for epoch in range(TOTAL_EPOCH):
    # train
    for images, targets in train_data_loader:
        y_pred = model(images.to(device))
        loss = loss_func(y_pred, targets, device)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        wandb.log({"loss": loss.item(),
                   "step": cnt,
                   "epoch": epoch})
        cnt += 1

    scheduler.step()
        
    # validation
    val_loss, metrics = Validation.validation(model, validation_data_loader, loss_func, device)
    val_log = {"val_loss": val_loss,
               "step": cnt,
               "epoch": epoch}
    wandb.log(val_log)
    print('epoch:', epoch)
    print('==================================================================')
    print(metrics)
    print('==================================================================')
    
    # save model
    torch.save(model.state_dict(), '/mnt/train-data1/howard/cvfinal/model/v7/epoch' + str(epoch) + '.pth')

In [None]:
torch.cuda.empty_cache()