In [1]:
%load_ext autoreload
%autoreload 2
%load_ext tensorboard

In [2]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

# Model

In [None]:
# #model
# from src.models.resnets import ResNet18

# NUM_CLASSES = 10
# model = ResNet18(num_classes=NUM_CLASSES).to(device)

In [5]:
from src.models.models import MLP

NUM_FEATURES = 32 * 32 * 3
NUM_CLASSES = 10
DIMS = [NUM_FEATURES, 512, NUM_CLASSES]

model = MLP(DIMS, 'relu').to(device)

# Criterion

In [6]:
from src.common.common import LOSS_NAME_MAP
from src.models.losses import ClassificationLoss, FERLOSS

# criterion = ClassificationLoss(LOSS_NAME_MAP['ce']())
criterion = FERLOSS(model, 'ce', NUM_CLASSES).to(device)

# Dataset

In [7]:
from torch.utils.data import DataLoader
from src.data.datasets import get_cifar10

train_dataset, _, test_dataset = get_cifar10('data/')

BATCH_SIZE = 64

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True, num_workers=4)

loaders = {
    'train': train_loader,
    'test': test_loader
}

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


# Optimizer

In [8]:
from src.common.common import OPTIMIZER_NAME_MAP, SCHEDULER_NAME_MAP
from src.common.utils import prepare_optim_and_scheduler

GRAD_ACCUM_STEPS = 1
EPOCHS = 100
T_max = (len(train_loader) // GRAD_ACCUM_STEPS) * EPOCHS

optim_params = {'lr': 0.01, 'weight_decay': 0.001}
scheduler_params = {'T_max': T_max, 'eta_min': 1e-6}

# optim, lr_scheduler = prepare_optim_and_scheduler(model, 'adamw', 'cosine', optim_params, scheduler_params, whether_exclude=False)

optim = OPTIMIZER_NAME_MAP['sgd'](filter(lambda p: p.requires_grad, model.parameters()), **optim_params)
lr_scheduler = None #SCHEDULER_NAME_MAP['cosine'](optim, **scheduler_params)

In [2]:
%tensorboard --logdir=reports/optuna_lr_wd__mlp_sgd_

# Trainer

In [10]:
from src.trainer.trainer_classification import TrainerClassification

params_trainer = {
    'model': model,
    'criterion': criterion,
    'loaders': loaders,
    'optim': optim,
    'lr_scheduler': lr_scheduler,
}

trainer = TrainerClassification(**params_trainer)

In [11]:
from src.common.utils import AttrDict

EXP_NAME = 'cifar_10_mlp_ferloss'

config = {
    'epoch_start_at': 0,
    'epoch_end_at': EPOCHS,
    'grad_accum_steps': GRAD_ACCUM_STEPS,
    'save_multi': T_max // 10,
    'log_multi': 100,
    'whether_clip': False,
    'clip_value': 2.0,
    'base_path': 'reports',
    'exp_name': EXP_NAME,
    'logger_name': 'tensorboard',
    'logger_config': {'api_token': "07a2cd842a6d792d578f8e6c0978efeb8dcf7638", 'project': 'early_exit', 'hyperparameters': {}},
    'random_seed': 42,
    'device': device

}
config = AttrDict(config)

trainer.run_exp(config)

run_epoch: train: 100%|█| 782/782 [00:08<00:00, 88      | 0/100 [00:00<?, ?it/s]
run_epoch: test: 100%|█| 157/157 [00:00<00:00, 174
run_epoch: train: 100%|█| 782/782 [00:08<00:00, 921/100 [00:09<15:59,  9.69s/it]
run_epoch: test: 100%|█| 157/157 [00:00<00:00, 164
run_epoch: train: 100%|█| 782/782 [00:09<00:00, 862/100 [00:19<15:35,  9.54s/it]
run_epoch: test: 100%|█| 157/157 [00:00<00:00, 170
run_epoch: train: 100%|█| 782/782 [00:08<00:00, 883/100 [00:29<15:45,  9.75s/it]
run_epoch: test: 100%|█| 157/157 [00:00<00:00, 169
run_epoch: train: 100%|█| 782/782 [00:08<00:00, 904/100 [00:38<15:34,  9.74s/it]
run_epoch: test: 100%|█| 157/157 [00:00<00:00, 172
run_epoch: train: 100%|█| 782/782 [00:08<00:00, 925/100 [00:48<15:20,  9.69s/it]
run_epoch: test: 100%|█| 157/157 [00:00<00:00, 177
run_epoch: train: 100%|█| 782/782 [00:09<00:00, 856/100 [00:57<15:00,  9.58s/it]
run_epoch: test: 100%|█| 157/157 [00:00<00:00, 174
run_epoch: train: 100%|█| 782/782 [00:08<00:00, 937/100 [01:07<15:06,  9.74s

In [None]:
y_true = torch.randint(0, 10, (5,1)).to(device).squeeze()
y_pred = model(torch.randn(5, 3, 32, 32).to(device))

In [None]:
criterion1 = ClassificationLoss(LOSS_NAME_MAP['ce']())
criterion2 = ClassificationLoss(LOSS_NAME_MAP['ce']())

In [None]:
# y_pred = model(torch.randn(5, 3, 32, 32).to(device))
loss, _ = criterion1(y_pred, y_true)
loss.backward(retain_graph=True)
optim.zero_grad()


In [None]:
loss, _ = criterion2(y_pred, y_true)
loss.backward()