# Importing the libraries

In [1]:
import numpy as np
from collections import OrderedDict
import os
import torch
from torch.utils.data import random_split
from torch import nn
from torchvision import transforms, models, datasets

# Importing the dataset

In [2]:
DATASET_DIRECTORY = '../Datasets/Intel/'
TRAINING_PATH = DATASET_DIRECTORY + 'seg_train/seg_train/'
TEST_PATH = DATASET_DIRECTORY + 'seg_test/seg_test/'

train_cat_counts = {}
for cat in os.listdir(TRAINING_PATH):
    counts = len(os.listdir(os.path.join(TRAINING_PATH, cat)))
    train_cat_counts[cat] = counts

test_cat_counts = {}
for cat in os.listdir(TEST_PATH):
    counts = len(os.listdir(os.path.join(TEST_PATH, cat)))    
    test_cat_counts[cat] = counts

print("Size of the training set:", sum(train_cat_counts.values()))    
print("Label frequencies of the training set:", train_cat_counts)
print(10*'-')
print("Size of the test set:", sum(test_cat_counts.values()))
print("Label frequencies of the test set:", test_cat_counts)

Size of the training set: 14034
Label frequencies of the training set: {'buildings': 2191, 'forest': 2271, 'glacier': 2404, 'mountain': 2512, 'sea': 2274, 'street': 2382}
----------
Size of the test set: 3000
Label frequencies of the test set: {'buildings': 437, 'forest': 474, 'glacier': 553, 'mountain': 525, 'sea': 510, 'street': 501}


# Splitting & Preprocessing & Loading

In [3]:
random_seed = 42
batch_size = 64
# mean and std which for ResNet50
mean = [0.485, 0.456, 0.406] 
std = [0.229, 0.224, 0.225]

train_transforms = transforms.Compose([transforms.Resize((150, 150)), # Resize all images 
                                       transforms.RandomResizedCrop(150),# Crop
                                       transforms.RandomRotation(30), # Rotate 
                                       transforms.RandomHorizontalFlip(), # Flip
                                       transforms.ToTensor(), # Convert
                                       transforms.Normalize(torch.Tensor(mean), torch.Tensor(std)) # Normalize
                                       ])



test_transforms = transforms.Compose([transforms.Resize((150, 150)),
                                     transforms.CenterCrop(150),
                                     transforms.ToTensor(),
                                     transforms.Normalize(torch.Tensor(mean),torch.Tensor(std))
                                     ])

# Tmp torchvision datasets.Image folder to split into train and validation sets
tmp_data = datasets.ImageFolder(TRAINING_PATH, transform=train_transforms)
# len(tmp_data): 14034

# Randomsplit tmp data based on length of dataset and set seed for reproducable split
train_data, val_data = random_split(tmp_data, [10000, 4034], generator=torch.Generator().manual_seed(random_seed))
# Test set with with test transforms 
test_data = datasets.ImageFolder(TEST_PATH, transform=test_transforms)


# Set Pytorch dataloaders, batch_size, training set shuffle
train_dataloader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val_data, batch_size=batch_size)
test_dataloader = torch.utils.data.DataLoader(test_data, batch_size=batch_size)

# Resnet50 pretrained on ImageNet

In [4]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
def init(random_seed): 
    torch.manual_seed(random_seed)
    torch.cuda.manual_seed(random_seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(random_seed)

def create_model():
    random_seed = 42
    init(random_seed)
    resnet = models.resnet50(pretrained=True)
    # Freeze model params 
    for param in resnet.parameters():
        param.required_grad = False
    # Pull final fc layer feature dimensions
    features = resnet.fc.in_features


    # Build custom classifier which reduces Resnets 1000 out_features to 6
    classifier = nn.Sequential(OrderedDict([('fc1', nn.Linear(features, 512)),
                                            ('relu', nn.ReLU()),
                                            ('drop', nn.Dropout(0.05)),
                                            ('fc2', nn.Linear(512, 6)),
                                            ]))
    resnet.fc = classifier
    # Pushing the model to cuda
    resnet.to(device)
    return resnet    

## The train() & evalaute() functions: 
The evalute() function returns the avg model loss on the validation set.   

In [5]:
import gc
from torch.nn import Softmax
from torch.optim import Adam, AdamW

def train(model, loss_fn, optimizer):
    model.train()
    total_loss = 0
    total_len = len(train_dataloader)
    softmax_func = Softmax(dim=1)

    for i, batch in enumerate(train_dataloader):
        step = i+1
        percent = "{0:.5f}".format(100 * (step / float(total_len)))
        lossp = "{0:.5f}".format(total_loss/step)
        filledLength = int(100 * step // total_len)
        bar = '█' * filledLength + '>'  *(filledLength < 100) + '.' * (99 - filledLength)
        print(f'\rBatch {step}/{total_len} |{bar}| {percent}% complete, loss={lossp}', end='')
        imgs, labels = batch[0].to(device), batch[1].to(device)
        del batch
        gc.collect()
        torch.cuda.empty_cache()
        model.zero_grad()
        preds = model(imgs)
        preds = softmax_func(preds) 
        loss = loss_fn(preds.double(), labels.double())
        total_loss += float(loss.item())
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()

    gc.collect()
    torch.cuda.empty_cache()

    avg_loss = total_loss / total_len
    
    return avg_loss

def evaluate(model, loss_fn):
    val_loss = 0
    softmax_func = Softmax(dim=1)
    model.eval()
    with torch.no_grad():
        for batch in val_dataloader:
            imgs, labels = batch[0].to(device), batch[1].to(device)
            gc.collect()
            torch.cuda.empty_cache()
            preds = model(imgs)
            preds = softmax_func(preds)
            loss = loss_fn(preds.double(), labels.double())
            val_loss += loss.item()
        return  val_loss / len(val_dataloader)

# Loss functions

$$ \ell_{CE}(\mathrm{\hat{y}}, \mathrm{y}) = -\sum_{c=1}^{C} y_c\log(\hat{y}_c),\,\, \ell_{RJM}(\mathrm{\hat{y}}, \mathrm{y}) = \sum_{c=1}^{C} y_c(1-\sqrt{\hat{y}_c}). $$

In [6]:
def CE(y_hat, y):
    return torch.sum(-1*torch.log(y_hat[range(y.size()[0]), y.long()]) / y.size()[0])

def RJM(y_hat, y):
    return torch.sum(1 - torch.sqrt(y_hat[range(y.size()[0]), y.long()])) / y.size()[0]

# Optimzier: Adam

## CE loss

In [11]:
resnet_adam_ce = create_model()
history_resnet_adam_ce = []
optimizer = Adam(resnet_adam_ce.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10], gamma=0.2)
loss_fn = CE
epochs = 20
current = 1
# for each epoch
while current <= epochs:

    print(f'\nEpoch {current} / {epochs}:')

    # train model
    train_loss = train(model=resnet_adam_ce, loss_fn = loss_fn, optimizer=optimizer)

    # evaluate model
    val_loss = evaluate(model=resnet_adam_ce, loss_fn=loss_fn)
    
    scheduler.step()
    
    print(f'\n\nTraining Loss: {train_loss:.5f}')
    print(f'Val Loss: {val_loss:.5f}')

    history_resnet_adam_ce.append((train_loss, val_loss))

    current = current + 1


Epoch 1 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=0.51958

Training Loss: 0.52112
Val Loss: 0.36628

Epoch 2 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=0.37112

Training Loss: 0.37383
Val Loss: 0.34848

Epoch 3 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=0.35193

Training Loss: 0.35617
Val Loss: 0.40140

Epoch 4 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=0.32811

Training Loss: 0.33007
Val Loss: 0.34848

Epoch 5 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=0.30757

T

### Save the model

In [18]:
torch.save(resnet_adam_ce.state_dict(), 'checkpoints/resnet_adam_ce.pth')

## RJM loss

In [13]:
resnet_adam_rjm = create_model()
history_resnet_adam_rjm = []
optimizer = Adam(resnet_adam_rjm.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10], gamma=0.2)
loss_fn = RJM
epochs = 20
current = 1
# for each epoch
while current <= epochs:

    print(f'\nEpoch {current} / {epochs}:')

    # train model
    train_loss = train(model=resnet_adam_rjm, loss_fn = loss_fn, optimizer=optimizer)

    # evaluate model
    val_loss = evaluate(model=resnet_adam_rjm, loss_fn=loss_fn)
    
    scheduler.step()
    
    print(f'\n\nTraining Loss: {train_loss:.5f}')
    print(f'Val Loss: {val_loss:.5f}')

    history_resnet_adam_rjm.append((train_loss, val_loss))

    current = current + 1


Epoch 1 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=0.17035

Training Loss: 0.17109
Val Loss: 0.12418

Epoch 2 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=0.12047

Training Loss: 0.12119
Val Loss: 0.11796

Epoch 3 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=0.11635

Training Loss: 0.11737
Val Loss: 0.11145

Epoch 4 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=0.10658

Training Loss: 0.10695
Val Loss: 0.10335

Epoch 5 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=0.10089

T

### Save the model

In [17]:
torch.save(resnet_adam_rjm.state_dict(), 'checkpoints/resnet_adam_rjm.pth')

## Evaluation

In [12]:
from sklearn.metrics import accuracy_score, f1_score
y_true_adam_ce = np.array([])
y_pred_adam_ce = np.array([])
resnet_adam_ce.eval()
with torch.no_grad():
    for batch in test_dataloader:
        imgs, labels = batch[0].to(device), batch[1].to(device)
        preds = resnet_adam_ce(imgs).detach().cpu().numpy()
        labels = labels.detach().cpu().numpy()
        preds_indices = preds.argmax(axis=1)
        y_true_adam_ce = np.concatenate((y_true_adam_ce, labels))
        y_pred_adam_ce = np.concatenate((y_pred_adam_ce, preds_indices))
    
print('ResNet50: Optimzier = Adam, Loss = CE')
print(10*'-')    
print('Accuracy: {0:0.4f}'.format(accuracy_score(y_true_adam_ce, y_pred_adam_ce)))
print('Macro F1: {0:0.4f}'.format(f1_score(y_true_adam_ce, y_pred_adam_ce, average='macro')))

ResNet50: Optimzier = Adam, Loss = CE
----------
Accuracy: 0.9303
Macro F1: 0.9316


In [14]:
y_true_adam_rjm = np.array([])
y_pred_adam_rjm = np.array([])
resnet_adam_rjm.eval()
with torch.no_grad():
    for batch in test_dataloader:
        imgs, labels = batch[0].to(device), batch[1].to(device)
        preds = resnet_adam_rjm(imgs).detach().cpu().numpy()
        labels = labels.detach().cpu().numpy()
        preds_indices = preds.argmax(axis=1)
        y_true_adam_rjm = np.concatenate((y_true_adam_rjm, labels))
        y_pred_adam_rjm = np.concatenate((y_pred_adam_rjm, preds_indices))
    
print('ResNet50: Optimzier = Adam, Loss = RJM')
print(10*'-')    
print('Accuracy: {0:0.4f}'.format(accuracy_score(y_true_adam_rjm, y_pred_adam_rjm)))
print('Macro F1: {0:0.4f}'.format(f1_score(y_true_adam_rjm, y_pred_adam_rjm, average='macro')))

ResNet50: Optimzier = Adam, Loss = RJM
----------
Accuracy: 0.9333
Macro F1: 0.9344


In [20]:
history_resnet_adam_ce

[(0.5211236263324392, 0.3662788840660633),
 (0.3738337056340272, 0.34847557853925804),
 (0.3561661091429295, 0.4014042705021646),
 (0.3300709551630286, 0.34847921219710604),
 (0.3110317377231622, 0.37393639802587),
 (0.30426419212868333, 0.3615676990753051),
 (0.2840873839409673, 0.37849040857422317),
 (0.2749438934988312, 0.3821292815661561),
 (0.2535405663117322, 0.3832281326770191),
 (0.2566287950289117, 0.35674530480115507),
 (0.2076587384739474, 0.30871358199585364),
 (0.18693415855161793, 0.3028861485514029),
 (0.17251129265662526, 0.31791534975557095),
 (0.17094331536051044, 0.3258771827141669),
 (0.15913319602446072, 0.3285852458541301),
 (0.15723968090885623, 0.3287524144916472),
 (0.15358784511969228, 0.31542618946321743),
 (0.14899157869579233, 0.3136909447593698),
 (0.13528603577670514, 0.3138545838512712),
 (0.14016297370950762, 0.3294708106245238)]

In [19]:
history_resnet_adam_rjm

[(0.1710909175229467, 0.1241820521825257),
 (0.12119477126037126, 0.1179565539324267),
 (0.11737331433576148, 0.11144754334875963),
 (0.10695210480865146, 0.10335126070817457),
 (0.10222649812319123, 0.12181143858622763),
 (0.10283162556468133, 0.10403545881925864),
 (0.0961085461179765, 0.11408896312014379),
 (0.09511420006330132, 0.11512799966423011),
 (0.09009385966730193, 0.10393370109421442),
 (0.09180569817967296, 0.10939756850627085),
 (0.0776474704193143, 0.08951829057188145),
 (0.07149813548880722, 0.08997332833752728),
 (0.06746424187664365, 0.09047830009891578),
 (0.06551926781118288, 0.08745963771106847),
 (0.06268502617540031, 0.08764346090863134),
 (0.06361823495819843, 0.08992950103637859),
 (0.06006716849491907, 0.08695714598403825),
 (0.06140084702460135, 0.0859764061338285),
 (0.05668552570250547, 0.08971488299421546),
 (0.05862545204495123, 0.08373929366282151)]

# Optimizer: AdamW

## CE loss

In [7]:
resnet_adamw_ce = create_model()
history_resnet_adamw_ce = []
optimizer = AdamW(resnet_adamw_ce.parameters(), lr=1e-4, weight_decay=0.1)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10], gamma=0.2)
loss_fn = CE
epochs = 20
current = 1
# for each epoch
while current <= epochs:

    print(f'\nEpoch {current} / {epochs}:')

    # train model
    train_loss = train(model=resnet_adamw_ce, loss_fn = loss_fn, optimizer=optimizer)

    # evaluate model
    val_loss = evaluate(model=resnet_adamw_ce, loss_fn=loss_fn)
    
    scheduler.step()
    
    print(f'\n\nTraining Loss: {train_loss:.5f}')
    print(f'Val Loss: {val_loss:.5f}')

    history_resnet_adamw_ce.append((train_loss, val_loss))

    current = current + 1


Epoch 1 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=0.51902

Training Loss: 0.52119
Val Loss: 0.36622

Epoch 2 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=0.37341

Training Loss: 0.37638
Val Loss: 0.35029

Epoch 3 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=0.34797

Training Loss: 0.35178
Val Loss: 0.40574

Epoch 4 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=0.33230

Training Loss: 0.33477
Val Loss: 0.34854

Epoch 5 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=0.31152

T

### Save the model

In [13]:
torch.save(resnet_adamw_ce.state_dict(), './checkpoints/resnet_adamw_ce.pth')

## RJM loss

In [9]:
resnet_adamw_rjm = create_model()
history_resnet_adamw_rjm = []
optimizer = AdamW(resnet_adamw_rjm.parameters(), lr=1e-4, weight_decay=0.1)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10], gamma=0.2)
loss_fn = RJM
epochs = 20
current = 1
# for each epoch
while current <= epochs:

    print(f'\nEpoch {current} / {epochs}:')

    # train model
    train_loss = train(model=resnet_adamw_rjm, loss_fn = loss_fn, optimizer=optimizer)

    # evaluate model
    val_loss = evaluate(model=resnet_adamw_rjm, loss_fn=loss_fn)
    
    scheduler.step()
    
    print(f'\n\nTraining Loss: {train_loss:.5f}')
    print(f'Val Loss: {val_loss:.5f}')

    history_resnet_adamw_rjm.append((train_loss, val_loss))

    current = current + 1


Epoch 1 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=0.17024

Training Loss: 0.17100
Val Loss: 0.12623

Epoch 2 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=0.12112

Training Loss: 0.12219
Val Loss: 0.10990

Epoch 3 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=0.11454

Training Loss: 0.11572
Val Loss: 0.11382

Epoch 4 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=0.10851

Training Loss: 0.10944
Val Loss: 0.11014

Epoch 5 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=0.10553

T

### Save the model

In [14]:
torch.save(resnet_adamw_rjm.state_dict(), './checkpoints/resnet_adamw_rjm.pth')

## Evaluation

In [8]:
from sklearn.metrics import accuracy_score, f1_score
y_true_adamw_ce = np.array([])
y_pred_adamw_ce = np.array([])
resnet_adamw_ce.eval()
with torch.no_grad():
    for batch in test_dataloader:
        imgs, labels = batch[0].to(device), batch[1].to(device)
        preds = resnet_adamw_ce(imgs).detach().cpu().numpy()
        labels = labels.detach().cpu().numpy()
        preds_indices = preds.argmax(axis=1)
        y_true_adamw_ce = np.concatenate((y_true_adamw_ce, labels))
        y_pred_adamw_ce = np.concatenate((y_pred_adamw_ce, preds_indices))
    
print('ResNet50: Optimzier = AdamW, Loss = CE')
print(10*'-')    
print('Accuracy: {0:0.4f}'.format(accuracy_score(y_true_adamw_ce, y_pred_adamw_ce)))
print('Macro F1: {0:0.4f}'.format(f1_score(y_true_adamw_ce, y_pred_adamw_ce, average='macro')))

ResNet50: Optimzier = AdamW, Loss = CE
----------
Accuracy: 0.9240
Macro F1: 0.9250


In [10]:
y_true_adamw_rjm = np.array([])
y_pred_adamw_rjm = np.array([])
resnet_adamw_rjm.eval()
with torch.no_grad():
    for batch in test_dataloader:
        imgs, labels = batch[0].to(device), batch[1].to(device)
        preds = resnet_adamw_rjm(imgs).detach().cpu().numpy()
        labels = labels.detach().cpu().numpy()
        preds_indices = preds.argmax(axis=1)
        y_true_adamw_rjm = np.concatenate((y_true_adamw_rjm, labels))
        y_pred_adamw_rjm = np.concatenate((y_pred_adamw_rjm, preds_indices))
    
print('ResNet50: Optimzier = AdamW, Loss = RJM')
print(10*'-')    
print('Accuracy: {0:0.4f}'.format(accuracy_score(y_true_adamw_rjm, y_pred_adamw_rjm)))
print('Macro F1: {0:0.4f}'.format(f1_score(y_true_adamw_rjm, y_pred_adamw_rjm, average='macro')))

ResNet50: Optimzier = AdamW, Loss = RJM
----------
Accuracy: 0.9327
Macro F1: 0.9338


In [11]:
history_resnet_adamw_ce

[(0.5211869197761237, 0.3662214832224571),
 (0.37638492305927757, 0.35029315083552093),
 (0.35177524966134227, 0.4057437941621591),
 (0.3347677654928292, 0.3485434510306495),
 (0.3151344466616101, 0.3511142694684835),
 (0.30980044429886844, 0.3494145638771621),
 (0.292052720914867, 0.3548440974087757),
 (0.27839101560643914, 0.3734225654620497),
 (0.271529129905734, 0.36657615121041026),
 (0.2664250858989775, 0.36371731570640004),
 (0.21335830179964949, 0.2962864963447788),
 (0.19545648789146963, 0.30376670435865705),
 (0.1802044813985988, 0.3120615026225489),
 (0.174840092438375, 0.30989352864124614),
 (0.16222698908425598, 0.31467837200729054),
 (0.16137420706240307, 0.32031461923311616),
 (0.155363040968585, 0.2995059490467846),
 (0.15633707315679796, 0.30768371706274195),
 (0.14034787741642693, 0.30949955248220246),
 (0.14277555606997505, 0.3149612655513181)]

In [12]:
history_resnet_adamw_rjm

[(0.1709992818184276, 0.1262312866976228),
 (0.12218630932306652, 0.10989512855088995),
 (0.11572456102301705, 0.11382152341737252),
 (0.10944051819728895, 0.11013725477811928),
 (0.10652974558401317, 0.12236079714127424),
 (0.10294787696165203, 0.10673688507216358),
 (0.09724366576050189, 0.11481460440476159),
 (0.09688954996342249, 0.11789443674625173),
 (0.09473631664699365, 0.10734031896739087),
 (0.09429680993091291, 0.12005224361164359),
 (0.08104422484290903, 0.0923537567588797),
 (0.07335685441677582, 0.08817151918668342),
 (0.06841700600245851, 0.09601770695038743),
 (0.06672805331099702, 0.0876779710650151),
 (0.06250444439937522, 0.08689891580334023),
 (0.06465917335596445, 0.08994873000682596),
 (0.059789863130357536, 0.08609395633328665),
 (0.06230710119849958, 0.0885274862162387),
 (0.05516688605892541, 0.0895703604390005),
 (0.05809657829438773, 0.08351975861865313)]

# Optimizer: NovoGrad

## CE loss

In [8]:
from torch_optimizer import NovoGrad
resnet_novograd_ce = create_model()
history_resnet_novograd_ce = []
optimizer = NovoGrad(resnet_novograd_ce.parameters(), lr=2e-5, weight_decay=0.1)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10], gamma=0.2)
loss_fn = CE
epochs = 20
current = 1
# for each epoch
while current <= epochs:

    print(f'\nEpoch {current} / {epochs}:')

    # train model
    train_loss = train(model=resnet_novograd_ce, loss_fn = loss_fn, optimizer=optimizer)

    # evaluate model
    val_loss = evaluate(model=resnet_novograd_ce, loss_fn=loss_fn)
    
    scheduler.step()
    
    print(f'\n\nTraining Loss: {train_loss:.5f}')
    print(f'Val Loss: {val_loss:.5f}')

    history_resnet_novograd_ce.append((train_loss, val_loss))

    current = current + 1


Epoch 1 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=1.75857

Training Loss: 1.76961
Val Loss: 1.72659

Epoch 2 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=1.67372

Training Loss: 1.68438
Val Loss: 1.63673

Epoch 3 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=1.58073

Training Loss: 1.59077
Val Loss: 1.53326

Epoch 4 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=1.46942

Training Loss: 1.47815
Val Loss: 1.41173

Epoch 5 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=1.35215

T

### Save the model

In [None]:
torch.save(resnet_novograd_ce.state_dict(), './checkpoints/resnet_novograd_ce.pth')

## RJM loss

In [12]:
resnet_novograd_rjm = create_model()
history_resnet_novograd_rjm = []
optimizer = NovoGrad(resnet_novograd_rjm.parameters(), lr=2e-5, weight_decay=0.1)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10], gamma=0.2)
loss_fn = RJM
epochs = 20
current = 1
# for each epoch
while current <= epochs:

    print(f'\nEpoch {current} / {epochs}:')

    # train model
    train_loss = train(model=resnet_novograd_rjm, loss_fn = loss_fn, optimizer=optimizer)

    # evaluate model
    val_loss = evaluate(model=resnet_novograd_rjm, loss_fn=loss_fn)
    
    scheduler.step()
    
    print(f'\n\nTraining Loss: {train_loss:.5f}')
    print(f'Val Loss: {val_loss:.5f}')

    history_resnet_novograd_rjm.append((train_loss, val_loss))

    current = current + 1


Epoch 1 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=0.58230

Training Loss: 0.58599
Val Loss: 0.57761

Epoch 2 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=0.56410

Training Loss: 0.56770
Val Loss: 0.55679

Epoch 3 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=0.54057

Training Loss: 0.54404
Val Loss: 0.52746

Epoch 4 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=0.50732

Training Loss: 0.51036
Val Loss: 0.49058

Epoch 5 / 20:
Batch 157/157 |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.00000% complete, loss=0.47341

T

### Save the model

In [55]:
torch.save(resnet_novograd_rjm.state_dict(), './checkpoints/resnet_novograd_rjm.pth')

## Evaluation

In [16]:
from sklearn.metrics import accuracy_score, f1_score
y_true_novograd_ce = np.array([])
y_pred_novograd_ce = np.array([])
resnet_novograd_ce.eval()
with torch.no_grad():
    for batch in test_dataloader:
        imgs, labels = batch[0].to(device), batch[1].to(device)
        preds = resnet_novograd_ce(imgs).detach().cpu().numpy()
        labels = labels.detach().cpu().numpy()
        preds_indices = preds.argmax(axis=1)
        y_true_novograd_ce = np.concatenate((y_true_novograd_ce, labels))
        y_pred_novograd_ce = np.concatenate((y_pred_novograd_ce, preds_indices))
    
print('ResNet50: Optimzier = NovoGrad, Loss = CE')
print(10*'-')    
print('Accuracy: {0:0.4f}'.format(accuracy_score(y_true_novograd_ce, y_pred_novograd_ce)))
print('Macro F1: {0:0.4f}'.format(f1_score(y_true_novograd_ce, y_pred_novograd_ce, average='macro')))

ResNet50: Optimzier = NovoGrad, Loss = CE
----------
Accuracy: 0.9000
Macro F1: 0.9017


In [17]:
y_true_novograd_rjm = np.array([])
y_pred_novograd_rjm = np.array([])
resnet_novograd_rjm.eval()
with torch.no_grad():
    for batch in test_dataloader:
        imgs, labels = batch[0].to(device), batch[1].to(device)
        preds = resnet_novograd_rjm(imgs).detach().cpu().numpy()
        labels = labels.detach().cpu().numpy()
        preds_indices = preds.argmax(axis=1)
        y_true_novograd_rjm = np.concatenate((y_true_novograd_rjm, labels))
        y_pred_novograd_rjm = np.concatenate((y_pred_novograd_rjm, preds_indices))
    
print('ResNet50: Optimzier = NovoGrad, Loss = RJM')
print(10*'-')    
print('Accuracy: {0:0.4f}'.format(accuracy_score(y_true_novograd_rjm, y_pred_novograd_rjm)))
print('Macro F1: {0:0.4f}'.format(f1_score(y_true_novograd_rjm, y_pred_novograd_rjm, average='macro')))

ResNet50: Optimzier = NovoGrad, Loss = RJM
----------
Accuracy: 0.8613
Macro F1: 0.8628


In [18]:
history_resnet_novograd_ce

[(1.7696074359300304, 1.726591405714355),
 (1.684379395772915, 1.6367316107849061),
 (1.5907694074404792, 1.533257928219406),
 (1.4781478356833422, 1.4117321305508121),
 (1.360569136828987, 1.2968722753024338),
 (1.2501271722670664, 1.18726661701816),
 (1.1525533763915345, 1.0984912266225924),
 (1.068300479561597, 1.020982341542207),
 (1.0046463363896139, 0.9622732391073091),
 (0.9563823065469463, 0.9103249969695046),
 (0.9255152381911919, 0.9037718805141852),
 (0.9164737442737289, 0.897812675911224),
 (0.9107325062079915, 0.8876433375518588),
 (0.9062128241900171, 0.8799535433505083),
 (0.8972670967855446, 0.8731199655559423),
 (0.8946993114058152, 0.8561358873947457),
 (0.885156897096847, 0.8484871495052847),
 (0.879242255836833, 0.8568448588709593),
 (0.872013505382355, 0.8536890657165954),
 (0.8697220123423104, 0.844154556461869)]

In [19]:
history_resnet_novograd_rjm

[(0.5859933427719729, 0.5776148333454614),
 (0.5677035854923087, 0.5567878678087081),
 (0.5440380473387673, 0.527462957492915),
 (0.510355845195563, 0.4905752614488739),
 (0.4762738775749818, 0.45964539859941295),
 (0.44631414922084367, 0.4290971843377364),
 (0.41921969057332586, 0.4024792419603608),
 (0.3943487968522131, 0.3800920659241325),
 (0.3756393402137429, 0.36096390840167236),
 (0.361460909539164, 0.34744530580228616),
 (0.35211009271668914, 0.345150455258615),
 (0.348885664183551, 0.3437631526000867),
 (0.3473474513903913, 0.3416255913895775),
 (0.34571239384645736, 0.3386168456172915),
 (0.3430538903946337, 0.33747130707216205),
 (0.34189168894582084, 0.329347959510822),
 (0.3395442778039845, 0.3264184131821964),
 (0.33747545617756347, 0.33259250069513163),
 (0.33564703500869947, 0.32836778322758325),
 (0.33422996045346276, 0.328383730624638)]