In [1]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import glob
import os
import numpy as np
from PIL import Image
import math
import sys
import random
from byol_pytorch import BYOL
from torchvision import models
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
config = {
    'device':'cuda' if torch.cuda.is_available() else 'cpu',
    'train_pth':'/data/dlcv/hw4/office/train/',
    'val_pth':'/data/dlcv/hw4/office/val/',
    'train_csv_pth':'/data/dlcv/hw4/office/train.csv',
    'val_csv_pth':'/data/dlcv/hw4/office/val.csv',
    'best_save_pth':'/data/allen/hw4model/setting_e.pth',
    'last_save_pth':'/data/allen/hw4model/setting_e_last.pth',
    'backbone_pth':'/data/allen/hw4model/longep/backbone2_last.pth',
    'bsz':16,
    'epochs':80,
    'imgsz':128,
    'numofclass':65
}
train_transform = transforms.Compose([
    transforms.Resize((config['imgsz'], config['imgsz'])),
    transforms.RandomRotation(15),
    transforms.ColorJitter(0.2, 0.2, 0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=torch.tensor([0.485, 0.456, 0.406]), std=torch.tensor([0.229, 0.224, 0.225]))
])
val_transform = transforms.Compose([
    transforms.Resize((config['imgsz'], config['imgsz'])),
    transforms.ToTensor(),
    transforms.Normalize(mean=torch.tensor([0.485, 0.456, 0.406]), std=torch.tensor([0.229, 0.224, 0.225]))
])
if config["device"] == "cuda":
    torch.cuda.set_device(7)
print('Device used :', config['device'])
label2class = {'Alarm_Clock': 0, 'Backpack': 1, 'Batteries': 2, 'Bed': 3, 'Bike': 4, 'Bottle': 5, 'Bucket': 6, 'Calculator': 7, 'Calendar': 8, 'Candles': 9, 'Chair': 10, 'Clipboards': 11, 'Computer': 12, 
    'Couch': 13, 'Curtains': 14, 'Desk_Lamp': 15, 'Drill': 16, 'Eraser': 17, 'Exit_Sign': 18, 'Fan': 19, 'File_Cabinet': 20, 'Flipflops': 21, 'Flowers': 22, 'Folder': 23, 'Fork': 24, 'Glasses': 25,
    'Hammer': 26, 'Helmet': 27, 'Kettle': 28, 'Keyboard': 29, 'Knives': 30, 'Lamp_Shade': 31, 'Laptop': 32, 'Marker': 33, 'Monitor': 34, 'Mop': 35, 'Mouse': 36, 'Mug': 37, 'Notebook': 38,
    'Oven': 39, 'Pan': 40, 'Paper_Clip': 41, 'Pen': 42, 'Pencil': 43, 'Postit_Notes': 44, 'Printer': 45, 'Push_Pin': 46, 'Radio': 47, 'Refrigerator': 48, 'Ruler': 49, 'Scissors': 50, 'Screwdriver': 51,
    'Shelf': 52, 'Sink': 53, 'Sneakers': 54, 'Soda': 55, 'Speaker': 56, 'Spoon': 57, 'TV': 58, 'Table': 59, 'Telephone': 60, 'ToothBrush': 61, 'Toys': 62, 'Trash_Can': 63, 'Webcam': 64}
class2label = {0: 'Alarm_Clock', 1: 'Backpack', 2: 'Batteries', 3: 'Bed', 4: 'Bike', 5: 'Bottle', 6: 'Bucket', 7: 'Calculator', 8: 'Calendar', 9: 'Candles', 10: 'Chair', 11: 'Clipboards', 12: 'Computer', 
    13: 'Couch', 14: 'Curtains', 15: 'Desk_Lamp', 16: 'Drill', 17: 'Eraser', 18: 'Exit_Sign', 19: 'Fan', 20: 'File_Cabinet', 21: 'Flipflops', 22: 'Flowers', 23: 'Folder', 24: 'Fork', 25: 'Glasses', 
    26: 'Hammer', 27: 'Helmet', 28: 'Kettle', 29: 'Keyboard', 30: 'Knives', 31: 'Lamp_Shade', 32: 'Laptop', 33: 'Marker', 34: 'Monitor', 35: 'Mop', 36: 'Mouse', 37: 'Mug', 38: 'Notebook', 
    39: 'Oven', 40: 'Pan', 41: 'Paper_Clip', 42: 'Pen', 43: 'Pencil', 44: 'Postit_Notes', 45: 'Printer', 46: 'Push_Pin', 47: 'Radio', 48: 'Refrigerator', 49: 'Ruler', 50: 'Scissors', 51: 'Screwdriver', 
    52: 'Shelf', 53: 'Sink', 54: 'Sneakers', 55: 'Soda', 56: 'Speaker', 57: 'Spoon', 58: 'TV', 59: 'Table', 60: 'Telephone', 61: 'ToothBrush', 62: 'Toys', 63: 'Trash_Can', 64: 'Webcam'}

Device used : cuda


In [3]:
def save_checkpoint(checkpoint_path, model, optimizer, scheduler, ep, best_loss):
    state = {'model_state_dict': model.state_dict(),
             'optimizer_state_dict' : optimizer.state_dict(),
             'scheduler_state_dict':scheduler.state_dict(),
             'last_ep':ep,
             'best_loss':best_loss
             }
    torch.save(state, checkpoint_path)
    print('checkpoint saved to {}'.format(checkpoint_path))

def save_model_only(checkpoint_path, model):
    state = {'model_state_dict': model.state_dict(),}
    torch.save(state, checkpoint_path)
    print('model saved to {}'.format(checkpoint_path))

def load_checkpoint(checkpoint_path, device='cpu'):
    checkpoint = torch.load(checkpoint_path, map_location=device)
    return checkpoint["model_state_dict"], checkpoint["optimizer_state_dict"], checkpoint["scheduler_state_dict"], checkpoint['last_ep'], checkpoint['best_loss']

def load_model_only(checkpoint_path, device='cpu'):
    checkpoint = torch.load(checkpoint_path, map_location=device)
    return checkpoint["model_state_dict"]

In [4]:
class DS(Dataset):
    def __init__(self, datapath, csvpath, transform=None) -> None:
        self.transform = transform
        self.data = [] #(imgpath, imgname, #label)
        if csvpath is not None:
            if os.path.exists(csvpath):
                df = pd.read_csv(csvpath)
                self.data = [(os.path.join(datapath, name), name, label2class[label]) for name, label in zip(df['filename'], df['label'])]
            else:
                print(f"Can't find {csvpath}")
                exit(-1)
        else:
            if os.path.exists(datapath):
                paths = glob.glob(os.path.join(datapath, "*"))
                for path in paths:
                    imgname = os.path.split(path)[-1]
                    self.data.append((path, imgname, None))
            else:
                print(f"Can't open {datapath}")
                exit(-1)
        self.len = len(self.data)
        print(self.len)

    def __getitem__(self, index):
        imgpath, imgname, label = self.data[index]
        img = Image.open(imgpath)
        if self.transform:
            img = self.transform(img)
        return img, label if label is not None else img

    def __len__(self):
        return self.len

In [5]:
train_loader = DataLoader(DS(config['train_pth'], config['train_csv_pth'], transform=train_transform), batch_size=config['bsz'], shuffle=True, pin_memory=True, num_workers=4)
val_loader = DataLoader(DS(config['val_pth'], config['val_csv_pth'], transform=val_transform), batch_size=config['bsz'], pin_memory=True, num_workers=4)

3951
406


In [6]:
def BuildLabelDict(train_csv):
    label2class, class2label = {}, {}
    train_df = pd.read_csv(train_csv)
    labellist = sorted(list(dict.fromkeys([name for name in train_df['label']])))
    label2class = {label : idx for (idx, label) in enumerate(labellist)}
    class2label = {idx : label for (idx, label) in enumerate(labellist)}
    print(label2class, class2label)

In [7]:
class ClassifierC(nn.Module):
    def __init__(self, backbonepth=None) -> None:
        super().__init__()
        self.backbone = models.resnet50(weights=None)
        if backbonepth is not None:
            self.backbone.load_state_dict(load_model_only(backbonepth, device=config['device']))
            print(f'load backbone from {backbonepth}')
        self.backbone = nn.Sequential(*list(self.backbone.children())[:-1])
        for param in (self.backbone).parameters():
            param.requires_grad = False
        self.classifier = nn.Linear(2048, config['numofclass'])
    
    def forward(self, x):
        y = self.backbone(x).flatten(1)
        return self.classifier(y)

In [8]:
model = ClassifierC(config['backbone_pth']).to(config['device'])
opt = torch.optim.RAdam(model.parameters(), lr=1.e-3, weight_decay=1.5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=config['epochs'])
#print(model)

load backbone from /data/allen/hw4model/longep/backbone2_last.pth


In [9]:
best_acc = 0.
criterion = nn.CrossEntropyLoss()
for ep in range(config['epochs']):
    model.train()
    train_loss, train_acc, val_loss, val_acc = 0., 0., 0., 0.
    for idx, (img, label) in enumerate(train_loader):
        img = img.to(config['device'])
        logit = model(img).cpu()
        loss = criterion(logit, label)
        opt.zero_grad()
        loss.backward()
        opt.step()
        train_loss += loss.item()
        train_acc += (logit.argmax(-1) == label).float().sum()
    train_loss /= idx
    train_acc /= len(train_loader.dataset)

    model.eval()
    with torch.no_grad():
        for idx, (img, label) in enumerate(val_loader):
            img = img.to(config['device'])
            logit = model(img).cpu()
            loss = criterion(logit, label)
            val_loss += loss.item()
            val_acc += (logit.argmax(-1) == label).float().sum()
        val_loss /= idx
        val_acc /= len(val_loader.dataset)
    
    if val_acc > best_acc:
        save_model_only(config['best_save_pth'], model)
        best_acc = val_acc
    save_checkpoint(config['last_save_pth'], model, opt, scheduler, ep, best_acc)
    print(f"Epoch [{ep+1}/{config['epochs']}] train_loss : {train_loss} train_acc : {train_acc:.4%} val_loss : {val_loss} val_acc : {val_acc:.4%}")


model saved to /data/allen/hw4model/setting_e.pth
checkpoint saved to /data/allen/hw4model/setting_e_last.pth
Epoch [1/80] train_loss : 4.139132396961616 train_acc : 6.0997% val_loss : 4.186125993728638 val_acc : 11.5764%
model saved to /data/allen/hw4model/setting_e.pth
checkpoint saved to /data/allen/hw4model/setting_e_last.pth
Epoch [2/80] train_loss : 3.9853234824126327 train_acc : 11.2883% val_loss : 3.995730094909668 val_acc : 17.2414%
model saved to /data/allen/hw4model/setting_e.pth
checkpoint saved to /data/allen/hw4model/setting_e_last.pth
Epoch [3/80] train_loss : 3.8276389788805956 train_acc : 14.9329% val_loss : 3.8134273719787597 val_acc : 22.1675%
checkpoint saved to /data/allen/hw4model/setting_e_last.pth
Epoch [4/80] train_loss : 3.670520966615134 train_acc : 18.9572% val_loss : 3.6421076393127443 val_acc : 21.4286%
model saved to /data/allen/hw4model/setting_e.pth
checkpoint saved to /data/allen/hw4model/setting_e_last.pth
Epoch [5/80] train_loss : 3.5418625100841368 

## Finetune

In [None]:
model_state = load_model_only(config['best_save_pth'], config['device'])
maxep = 100
model = ClassifierC().to(config['device'])
model.load_state_dict(model_state)
opt = torch.optim.RAdam(model.parameters(), lr=4.e-5, weight_decay=5.e-3)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=maxep)
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
best_acc = 0.408867
for ep in range(maxep):
    model.train()
    train_loss, train_acc, val_loss, val_acc = 0., 0., 0., 0.
    for idx, (img, label) in enumerate(train_loader):
        img = img.to(config['device'])
        logit = model(img).cpu()
        loss = criterion(logit, label)
        opt.zero_grad()
        loss.backward()
        opt.step()
        train_loss += loss.item()
        train_acc += (logit.argmax(-1) == label).float().sum()
    scheduler.step()
    train_loss /= idx
    train_acc /= len(train_loader.dataset)

    model.eval()
    with torch.no_grad():
        for idx, (img, label) in enumerate(val_loader):
            img = img.to(config['device'])
            logit = model(img).cpu()
            loss = criterion(logit, label)
            val_loss += loss.item()
            val_acc += (logit.argmax(-1) == label).float().sum()
        val_loss /= idx
        val_acc /= len(val_loader.dataset)
    
    if val_acc > best_acc:
        save_model_only(config['best_save_pth'], model)
        best_acc = val_acc
    save_checkpoint(config['last_save_pth'], model, opt, scheduler, ep, best_acc)
    print(f"Epoch [{ep+1}/{maxep}] train_loss : {train_loss:.6f} train_acc : {train_acc:.6%} val_loss : {val_loss:.6f} val_acc : {val_acc:.6%}")