## Import Libraries

In [4]:
from __future__ import print_function

import os
import time
import random
import zipfile
from itertools import chain

import timm
import numpy as np
from PIL import Image
from tqdm.notebook import tqdm
from collections import OrderedDict

import torch
import torch.nn as nn
from torch.nn import init
import torch.optim as optim
from torchvision import models
import torch.nn.functional as F
from torch.autograd import Variable
from torch.optim.lr_scheduler import StepLR
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset

from LATransformer.model import ClassBlock, LATransformer
from LATransformer.utils import save_network, update_summary

os.environ['CUDA_VISIBLE_DEVICES']='0'
#os.environ["PYTORCH_NO_CUDA_MEMORY_CACHING"]="1"
#device = "cpu"
device = "cuda"


ModuleNotFoundError: No module named 'LATransformer'

### Set Config Parameters

In [2]:
batch_size = 32
num_epochs = 30
lr = 3e-4
gamma = 0.7
unfreeze_after=2
lr_decay=.8
lmbd = 8

## Load Data

In [3]:
transform_train_list = [
    transforms.Resize((224,224), interpolation=3),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]
transform_val_list = [
    transforms.Resize(size=(224,224),interpolation=3), #Image.BICUBIC
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]
data_transforms = {
'train': transforms.Compose( transform_train_list ),
'val': transforms.Compose(transform_val_list),
}



In [4]:
image_datasets = {}
data_dir = "data/Market-Pytorch/Market/"

image_datasets['train'] = datasets.ImageFolder(os.path.join(data_dir, 'train'),
                                          data_transforms['train'])
image_datasets['val'] = datasets.ImageFolder(os.path.join(data_dir, 'val'),
                                          data_transforms['val'])
train_loader = DataLoader(dataset = image_datasets['train'], batch_size=batch_size, shuffle=True )
valid_loader = DataLoader(dataset = image_datasets['val'], batch_size=batch_size, shuffle=True)
# dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=opt.batchsize,
#                                              shuffle=True, num_workers=8, pin_memory=True) # 8 workers may work faster
#               for x in ['train', 'val']}
# dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes
print(len(class_names))

751


## Load Model

In [5]:
# Load pre-trained ViT
vit_base = timm.create_model('vit_base_patch16_224', pretrained=True, num_classes=751)
vit_base= vit_base.to(device)
vit_base.eval()

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=768, out_features=2304, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=768, out_features=768, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (drop_path): Identity()
      (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=768, out_features=3072, bias=True)
        (act): GELU()
        (fc2): Linear(in_features=3072, out_features=768, bias=True)
        (drop): Dropout(p=0.0, inplace=False)
      )
    )
    (1): Block(
      (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (attn



###  Train

In [6]:
class AverageMeter:
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [7]:
def validate(model, loader, loss_fn):
    batch_time_m = AverageMeter()
    losses_m = AverageMeter()
    top1_m = AverageMeter()
    top5_m = AverageMeter()

    model.eval()
    epoch_accuracy = 0
    epoch_loss = 0
    end = time.time()
    last_idx = len(loader) - 1
    
    running_loss = 0.0
    running_corrects = 0.0

    with torch.no_grad():
        for input, target in tqdm(loader):

            input, target = input.to(device), target.to(device)
            
            output = model(input)
            
            score = 0.0
            sm = nn.Softmax(dim=1)
            for k, v in output.items():
                score += sm(output[k])
            _, preds = torch.max(score.data, 1)

            loss = 0.0
            for k,v in output.items():
                loss += loss_fn(output[k], target)


            batch_time_m.update(time.time() - end)
            acc = (preds == target.data).float().mean()
            epoch_loss += loss/len(loader)
            epoch_accuracy += acc / len(loader)
            
            print(f"Epoch : {epoch+1} - val_loss : {epoch_loss:.4f} - val_acc: {epoch_accuracy:.4f}", end="\r")
    print()    
    metrics = OrderedDict([('val_loss', epoch_loss.data.item()), ("val_accuracy", epoch_accuracy.data.item())])


    return metrics

In [8]:
def train_one_epoch(
        epoch, model, loader, optimizer, loss_fn,
        lr_scheduler=None, saver=None, output_dir='', 
        loss_scaler=None, model_ema=None, mixup_fn=None):

 

    
    batch_time_m = AverageMeter()
    data_time_m = AverageMeter()
    losses_m = AverageMeter()

    model.train()
    epoch_accuracy = 0
    epoch_loss = 0
    end = time.time()
    last_idx = len(loader) - 1
    num_updates = epoch * len(loader)
    running_loss = 0.0
    running_corrects = 0.0

    for data, target in tqdm(loader):
        data, target = data.to(device), target.to(device)

            
        data_time_m.update(time.time() - end)

        optimizer.zero_grad()
        output = model(data)
        score = 0.0
        sm = nn.Softmax(dim=1)
        for k, v in output.items():
            score += sm(output[k])
        _, preds = torch.max(score.data, 1)
        
        loss = 0.0
        for k,v in output.items():
            loss += loss_fn(output[k], target)
        loss.backward()

        optimizer.step()

        batch_time_m.update(time.time() - end)
        
#         print(preds, target.data)
        acc = (preds == target.data).float().mean()
        
#         print(acc)
        epoch_loss += loss/len(loader)
        epoch_accuracy += acc / len(loader)
#         if acc:
#             print(acc, epreds, target.data)
        print(
    f"Epoch : {epoch+1} - loss : {epoch_loss:.4f} - acc: {epoch_accuracy:.4f}"
, end="\r")

    print()

    return OrderedDict([('train_loss', epoch_loss.data.item()), ("train_accuracy", epoch_accuracy.data.item())])


In [9]:
def freeze_all_blocks(model):
    frozen_blocks = 12
    for block in model.model.blocks[:frozen_blocks]:
        for param in block.parameters():
            param.requires_grad=False
    

In [10]:
def unfreeze_blocks(model, amount= 1):
    
    for block in model.model.blocks[11-amount:]:
        for param in block.parameters():
            param.requires_grad=True
    return model

## Training Loop

In [11]:
# Create LA Transformer
model = LATransformer(vit_base, lmbd).to(device)
print(model.eval())

# loss function
criterion = nn.CrossEntropyLoss()

# optimizer
optimizer = optim.Adam(model.parameters(),weight_decay=5e-4, lr=lr)

# scheduler
scheduler = StepLR(optimizer, step_size=1, gamma=gamma)
freeze_all_blocks(model)

LATransformer(
  (model): VisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      (norm): Identity()
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (blocks): Sequential(
      (0): Block(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (drop_path): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU()
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (drop): Dropout(p=0.0, inplace=False)
        )
      )
      (1): Block(
      

In [12]:
best_acc = 0.0
y_loss = {} # loss history
y_loss['train'] = []
y_loss['val'] = []
y_err = {}
y_err['train'] = []
y_err['val'] = []
print("training...")
output_dir = ""
best_acc = 0
name = "la_with_lmbd_{}".format(lmbd)

try:
    os.mkdir("model/" + name)

except:
    pass
output_dir = "model/" + name
unfrozen_blocks = 0

for epoch in range(num_epochs):

    if epoch%unfreeze_after==0:
        unfrozen_blocks += 1
        model = unfreeze_blocks(model, unfrozen_blocks)
        optimizer.param_groups[0]['lr'] *= lr_decay 
        trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
        print("Unfrozen Blocks: {}, Current lr: {}, Trainable Params: {}".format(unfrozen_blocks, 
                                                                             optimizer.param_groups[0]['lr'], 
                                                                             trainable_params))

    train_metrics = train_one_epoch(
        epoch, model, train_loader, optimizer, criterion,
        lr_scheduler=None, saver=None)

    eval_metrics = validate(model, valid_loader, criterion)


    # update summary
    update_summary(epoch, train_metrics, eval_metrics, os.path.join(output_dir, 'summary.csv'),
                   write_header=True)

    # deep copy the model
    last_model_wts = model.state_dict()
    if eval_metrics['val_accuracy'] > best_acc:
        best_acc = eval_metrics['val_accuracy']
        save_network(model, epoch,name)
        print("SAVED!")


print ("training finished.")

training...
Unfrozen Blocks: 1, Current lr: 0.00023999999999999998, Trainable Params: 20962817


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 1 - loss : 83.1845 - acc: 0.0561


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 1 - val_loss : 78.0688 - val_acc: 0.0383
SAVED!


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 2 - loss : 62.8372 - acc: 0.1727


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 2 - val_loss : 63.7556 - val_acc: 0.1291
SAVED!
Unfrozen Blocks: 2, Current lr: 0.000192, Trainable Params: 28050689


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 3 - loss : 47.5584 - acc: 0.3567


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 3 - val_loss : 52.2792 - val_acc: 0.2585
SAVED!


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 4 - loss : 35.6833 - acc: 0.5379


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 4 - val_loss : 41.8731 - val_acc: 0.4122
SAVED!
Unfrozen Blocks: 3, Current lr: 0.00015360000000000002, Trainable Params: 35138561


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 5 - loss : 26.2950 - acc: 0.6881


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 5 - val_loss : 33.0956 - val_acc: 0.5062
SAVED!


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 6 - loss : 18.6836 - acc: 0.8078


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 6 - val_loss : 25.6590 - val_acc: 0.6299
SAVED!
Unfrozen Blocks: 4, Current lr: 0.00012288000000000002, Trainable Params: 42226433


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 7 - loss : 13.3700 - acc: 0.8816


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 7 - val_loss : 20.2436 - val_acc: 0.7272
SAVED!


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 8 - loss : 9.2728 - acc: 0.9376


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 8 - val_loss : 15.9082 - val_acc: 0.7911
SAVED!
Unfrozen Blocks: 5, Current lr: 9.830400000000001e-05, Trainable Params: 49314305


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 9 - loss : 6.6554 - acc: 0.9654


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 9 - val_loss : 12.7681 - val_acc: 0.8325
SAVED!


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 10 - loss : 4.7406 - acc: 0.9820


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 10 - val_loss : 9.5009 - val_acc: 0.8773
SAVED!
Unfrozen Blocks: 6, Current lr: 7.864320000000001e-05, Trainable Params: 56402177


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 11 - loss : 3.4629 - acc: 0.9910


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 11 - val_loss : 8.3553 - val_acc: 0.8944
SAVED!


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 12 - loss : 2.7161 - acc: 0.9939


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 12 - val_loss : 7.1852 - val_acc: 0.9062
SAVED!
Unfrozen Blocks: 7, Current lr: 6.291456000000001e-05, Trainable Params: 63490049


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 13 - loss : 2.0961 - acc: 0.9965


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 13 - val_loss : 6.2077 - val_acc: 0.9230
SAVED!


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 14 - loss : 2.0524 - acc: 0.9966


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 14 - val_loss : 6.2184 - val_acc: 0.9178
Unfrozen Blocks: 8, Current lr: 5.0331648000000016e-05, Trainable Params: 70577921


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 15 - loss : 1.5178 - acc: 0.9983


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 15 - val_loss : 5.9309 - val_acc: 0.9204


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 16 - loss : 1.1886 - acc: 0.9994


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 16 - val_loss : 5.1447 - val_acc: 0.9306
SAVED!
Unfrozen Blocks: 9, Current lr: 4.026531840000002e-05, Trainable Params: 77665793


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 17 - loss : 0.9901 - acc: 0.9995


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 17 - val_loss : 4.9131 - val_acc: 0.9280


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 18 - loss : 0.8655 - acc: 0.9995


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 18 - val_loss : 4.7907 - val_acc: 0.9388
SAVED!
Unfrozen Blocks: 10, Current lr: 3.221225472000002e-05, Trainable Params: 84753665


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 19 - loss : 0.7778 - acc: 0.9997


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 19 - val_loss : 4.6099 - val_acc: 0.9386


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 20 - loss : 0.7184 - acc: 0.9997


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 20 - val_loss : 4.4937 - val_acc: 0.9362
Unfrozen Blocks: 11, Current lr: 2.5769803776000016e-05, Trainable Params: 91841537


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 21 - loss : 0.6598 - acc: 0.9998


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 21 - val_loss : 4.4499 - val_acc: 0.9464
SAVED!


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 22 - loss : 0.6169 - acc: 0.9997


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 22 - val_loss : 3.9936 - val_acc: 0.9479
SAVED!
Unfrozen Blocks: 12, Current lr: 2.0615843020800013e-05, Trainable Params: 91841537


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 23 - loss : 0.5782 - acc: 0.9997


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 23 - val_loss : 4.5400 - val_acc: 0.9362


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 24 - loss : 0.5359 - acc: 0.9998


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 24 - val_loss : 4.4945 - val_acc: 0.9362
Unfrozen Blocks: 13, Current lr: 1.649267441664001e-05, Trainable Params: 91841537


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 25 - loss : 0.5096 - acc: 0.9998


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 25 - val_loss : 4.3318 - val_acc: 0.9412


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 26 - loss : 0.4890 - acc: 0.9999


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 26 - val_loss : 4.4599 - val_acc: 0.9412
Unfrozen Blocks: 14, Current lr: 1.319413953331201e-05, Trainable Params: 91841537


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 27 - loss : 0.4726 - acc: 0.9998


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 27 - val_loss : 4.1944 - val_acc: 0.9398


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 28 - loss : 0.4616 - acc: 0.9998


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 28 - val_loss : 4.4745 - val_acc: 0.9321
Unfrozen Blocks: 15, Current lr: 1.0555311626649608e-05, Trainable Params: 91841537


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 29 - loss : 0.4496 - acc: 0.9998


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 29 - val_loss : 4.4600 - val_acc: 0.9344


  0%|          | 0/381 [00:00<?, ?it/s]

Epoch : 30 - loss : 0.4341 - acc: 0.9999


  0%|          | 0/24 [00:00<?, ?it/s]

Epoch : 30 - val_loss : 4.2006 - val_acc: 0.9412
