In [1]:
import warnings
warnings.filterwarnings('ignore')

import gc
import timm
import wandb
import random
import numpy as np

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader 
from torch.cuda.amp import autocast, GradScaler
from torchvision.datasets import ImageFolder

from multiprocessing import cpu_count
from sklearn.model_selection import train_test_split

from utils.utils import ImageLoader, TransformsCE


def seed_everything(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    cudnn.benchmark = False
    cudnn.deterministic = True
    random.seed(seed)

#### Set Params

In [2]:
SEED = 333
seed_everything(SEED)

IMG_SIZE = (224, 224)
BATCH_SIZE = 8
LEARNING_RATE = 1e-4
EPOCHS = 10

RUN_NAME = 'mobilenetv3_small_050'
WANDB_PRJ = 'public'

WANDB_CONFIG = {
    'seed': SEED,
    'model': RUN_NAME,
    'batch_size': BATCH_SIZE,
    'learning_rate': LEARNING_RATE,  
}

#### Load Dataset and Make Loader

In [3]:
dataset = ImageFolder("./sample_datasets/cat-and-dog/training_set/training_set/")
trn_data, val_data, trn_label, val_label = train_test_split(dataset.imgs, dataset.targets, test_size=0.2, random_state=SEED)

trn_ds, val_ds = list(map(lambda x, y: ImageLoader(dataset=x, phase=y, transform=TransformsCE(IMG_SIZE)), 
                          [trn_data, val_data], ['train', 'valid']))
trn_dl, val_dl = list(map(lambda x, y: DataLoader(x, batch_size=BATCH_SIZE, num_workers=cpu_count(), shuffle=y, drop_last=True), 
                          [trn_ds, val_ds], [True, False]))

img_datasets = {'train' : trn_ds, 'valid': val_ds}
dataloaders = {'train': trn_dl, 'valid': val_dl}

dataset_sizes = {x: len(img_datasets[x]) for x in ['train', 'valid']}

#### Prepare Model

In [4]:
model = timm.create_model(RUN_NAME, pretrained=True) # pretrained mobilenetv3
optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50, eta_min=0, last_epoch=-1, verbose=False)
criterion = nn.CrossEntropyLoss()
scaler = GradScaler()

#### Training

In [5]:
wandb.init(name=RUN_NAME, project=WANDB_PRJ, config=WANDB_CONFIG, reinit=True) # connect

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'model running on {device}')

model = model.to(device)

for e in range(EPOCHS):
    
    gc.collect()
    torch.cuda.empty_cache()
    running_log = {'epoch': e+1} # logging
    
    for phase in ['train', 'valid']: 
        running_loss = 0.0
        running_corrects = 0
        
        model.train() if phase == 'train' else model.eval()

        for idx, (features, labels) in enumerate(dataloaders[phase]):
            features = features.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
        
            with torch.set_grad_enabled(phase=='train'):
                # forward
                with autocast():
                    logits = model(features)
                    _, preds = torch.max(logits, 1)
                    loss = criterion(logits, labels)
                    
                # backward    
                if phase == 'train' and 'cuda' in device:
                    scaler.scale(loss).backward()
                    scaler.step(optimizer)
                    scaler.update()
                elif phase == 'train':
                    loss.backward()
                    optimizer.step()
                        
                running_loss += loss.item() * features.size(0) # loss
                running_corrects += torch.sum(preds == labels.data).item() # acc
                        
        if phase == 'train' and e >= 10:
            scheduler.step()
    
        epoch_loss = running_loss / dataset_sizes[phase] # loss
        epoch_acc = running_corrects / dataset_sizes[phase] # acc
        
        running_log.update({f'{phase}_loss': epoch_loss, f'{phase}_acc': epoch_acc}) # logging
        print(f'epoch {e+1} {phase} | Loss: {epoch_loss:.4f} | Acc: {epoch_acc:.4f}') 
        
    wandb.log(running_log) # logging

wandb.finish()

[34m[1mwandb[0m: Currently logged in as: [33m33h002[0m. Use [1m`wandb login --relogin`[0m to force relogin


model running on cuda
epoch 1 train | Loss: 0.8787 | Acc: 0.7342
epoch 1 valid | Loss: 0.2672 | Acc: 0.8988
epoch 2 train | Loss: 0.3068 | Acc: 0.8626
epoch 2 valid | Loss: 0.1888 | Acc: 0.9325
epoch 3 train | Loss: 0.2628 | Acc: 0.8887
epoch 3 valid | Loss: 0.1667 | Acc: 0.9419
epoch 4 train | Loss: 0.2322 | Acc: 0.9033
epoch 4 valid | Loss: 0.1568 | Acc: 0.9469
epoch 5 train | Loss: 0.2315 | Acc: 0.9040
epoch 5 valid | Loss: 0.1510 | Acc: 0.9388
epoch 6 train | Loss: 0.2099 | Acc: 0.9111
epoch 6 valid | Loss: 0.1388 | Acc: 0.9513
epoch 7 train | Loss: 0.1894 | Acc: 0.9199
epoch 7 valid | Loss: 0.1413 | Acc: 0.9500
epoch 8 train | Loss: 0.1806 | Acc: 0.9235
epoch 8 valid | Loss: 0.1253 | Acc: 0.9469
epoch 9 train | Loss: 0.1774 | Acc: 0.9252
epoch 9 valid | Loss: 0.1336 | Acc: 0.9488
epoch 10 train | Loss: 0.1619 | Acc: 0.9318
epoch 10 valid | Loss: 0.1261 | Acc: 0.9507


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_acc,▁▆▆▇▇▇████
train_loss,█▂▂▂▂▁▁▁▁▁
valid_acc,▁▅▇▇▆██▇██
valid_loss,█▄▃▃▂▂▂▁▁▁

0,1
epoch,10.0
train_acc,0.93176
train_loss,0.16193
valid_acc,0.95066
valid_loss,0.12609
