In [1]:
import os
import random
import numpy as np

import torch
import torch.nn as nn
import torchvision

from tqdm import tqdm

from src import arch
from src import utils
from config import cfg
from src import pytorch_utils as ptu

import warnings
warnings.filterwarnings("ignore")

# assert torch.cuda.is_available(), "no CUDA"

In [2]:
cfg.seed = 42
random.seed(cfg.seed)
torch.random.manual_seed(cfg.seed)
torch.manual_seed(cfg.seed)
torch.backends.cudnn.deterministic = True
cfg.num_workers = 0

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
if device.type == 'cuda':
    torch.backends.cudnn.benchmark = True
    print(torch.cuda.get_device_properties(device))

cuda
_CudaDeviceProperties(name='TITAN Xp', major=6, minor=1, total_memory=12196MB, multi_processor_count=30)


In [4]:
print('clf.version\n', cfg.clf.version)

clf.version
 resnext50_32x4d_bs32_queue16384_wd0.0001_t0.2_cos_epochbest_clf_wd0.0_cos


In [5]:
cfg.clf.load = 'best'
cfg.preload_data = False
# cfg.prints = 'display'
# cfg.tqdm_bar = True

In [6]:
if cfg.clf.load is not None and os.path.exists(os.path.join(cfg.models_dir, cfg.clf.version, ptu.naming_scheme(cfg.clf.version, epoch=cfg.clf.load)) + '.pth'):
    checkpoint = ptu.load_model(version=cfg.clf.version, models_dir=cfg.models_dir, epoch=cfg.clf.load)

In [7]:
train_dataset = utils.Dataset(os.path.join(cfg.data_path, 'train'), cfg.clf.train_transforms, preload_data=cfg.preload_data, tqdm_bar=cfg.tqdm_bar)
val_dataset = utils.Dataset(os.path.join(cfg.data_path, 'val'), cfg.clf.val_transforms, preload_data=cfg.preload_data, tqdm_bar=cfg.tqdm_bar)

train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=checkpoint.model.batch_size,
                                           num_workers=cfg.num_workers,
                                           drop_last=True, shuffle=True, pin_memory=True)

val_loader = torch.utils.data.DataLoader(val_dataset,
                                         batch_size=checkpoint.model.batch_size,
                                         num_workers=cfg.num_workers,
                                         drop_last=True, shuffle=False, pin_memory=True)

In [8]:
train_loss, train_score, train_results = checkpoint.evaluate(train_loader,
                                                             device=device,
                                                             tqdm_bar=cfg.tqdm_bar)

100%|██████████| 295/295 [02:16<00:00,  2.16it/s, score=0.906, avg_score=0.919, loss=0.259, avg_loss=0.25]  


In [9]:
val_loss, val_score, val_results = checkpoint.evaluate(val_loader,
                                                       device=device,
                                                       tqdm_bar=cfg.tqdm_bar)

100%|██████████| 122/122 [01:03<00:00,  1.93it/s, score=0.938, avg_score=0.928, loss=0.169, avg_loss=0.246]


In [10]:
print(f'train | loss: {train_loss:.4f} | top-1 acc: {train_score:.6f}')
print(f'val   | loss: {val_loss:.4f} | top-1 acc: {val_score:.6f}')

train | loss: 0.2497 | top-1 acc: 0.918962
val   | loss: 0.2453 | top-1 acc: 0.927766
