In [1]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import glob
import os
import numpy as np
from PIL import Image
import math
import sys
import random
from byol_pytorch import BYOL
from torchvision import models
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
config = {
    'device':'cuda' if torch.cuda.is_available() else 'cpu',
    'train_pth':'/data/dlcv/hw4/office/train/',
    'val_pth':'/data/dlcv/hw4/office/val/',
    'train_csv_pth':'/data/dlcv/hw4/office/train.csv',
    'val_csv_pth':'/data/dlcv/hw4/office/val.csv',
    'save_pth':'/data/allen/hw4model/setting_a.pth',
    'bsz':32,
    'lr':1.e-3,
    'epochs':100,
    'imgsz':128,
    'numofclass':65
}
backbone_transform = transforms.Compose([
    transforms.Resize((config['imgsz'], config['imgsz'])),
    transforms.CenterCrop(config['imgsz']),
    transforms.ToTensor(),
    transforms.Normalize(mean=torch.tensor([0.485, 0.456, 0.406]), std=torch.tensor([0.229, 0.224, 0.225]))
])
if config["device"] == "cuda":
    torch.cuda.set_device(5)
print('Device used :', config['device'])
label2class = {'Alarm_Clock': 0, 'Backpack': 1, 'Batteries': 2, 'Bed': 3, 'Bike': 4, 'Bottle': 5, 'Bucket': 6, 'Calculator': 7, 'Calendar': 8, 'Candles': 9, 'Chair': 10, 'Clipboards': 11, 'Computer': 12, 
    'Couch': 13, 'Curtains': 14, 'Desk_Lamp': 15, 'Drill': 16, 'Eraser': 17, 'Exit_Sign': 18, 'Fan': 19, 'File_Cabinet': 20, 'Flipflops': 21, 'Flowers': 22, 'Folder': 23, 'Fork': 24, 'Glasses': 25,
    'Hammer': 26, 'Helmet': 27, 'Kettle': 28, 'Keyboard': 29, 'Knives': 30, 'Lamp_Shade': 31, 'Laptop': 32, 'Marker': 33, 'Monitor': 34, 'Mop': 35, 'Mouse': 36, 'Mug': 37, 'Notebook': 38,
    'Oven': 39, 'Pan': 40, 'Paper_Clip': 41, 'Pen': 42, 'Pencil': 43, 'Postit_Notes': 44, 'Printer': 45, 'Push_Pin': 46, 'Radio': 47, 'Refrigerator': 48, 'Ruler': 49, 'Scissors': 50, 'Screwdriver': 51,
    'Shelf': 52, 'Sink': 53, 'Sneakers': 54, 'Soda': 55, 'Speaker': 56, 'Spoon': 57, 'TV': 58, 'Table': 59, 'Telephone': 60, 'ToothBrush': 61, 'Toys': 62, 'Trash_Can': 63, 'Webcam': 64}
class2label = {0: 'Alarm_Clock', 1: 'Backpack', 2: 'Batteries', 3: 'Bed', 4: 'Bike', 5: 'Bottle', 6: 'Bucket', 7: 'Calculator', 8: 'Calendar', 9: 'Candles', 10: 'Chair', 11: 'Clipboards', 12: 'Computer', 
    13: 'Couch', 14: 'Curtains', 15: 'Desk_Lamp', 16: 'Drill', 17: 'Eraser', 18: 'Exit_Sign', 19: 'Fan', 20: 'File_Cabinet', 21: 'Flipflops', 22: 'Flowers', 23: 'Folder', 24: 'Fork', 25: 'Glasses', 
    26: 'Hammer', 27: 'Helmet', 28: 'Kettle', 29: 'Keyboard', 30: 'Knives', 31: 'Lamp_Shade', 32: 'Laptop', 33: 'Marker', 34: 'Monitor', 35: 'Mop', 36: 'Mouse', 37: 'Mug', 38: 'Notebook', 
    39: 'Oven', 40: 'Pan', 41: 'Paper_Clip', 42: 'Pen', 43: 'Pencil', 44: 'Postit_Notes', 45: 'Printer', 46: 'Push_Pin', 47: 'Radio', 48: 'Refrigerator', 49: 'Ruler', 50: 'Scissors', 51: 'Screwdriver', 
    52: 'Shelf', 53: 'Sink', 54: 'Sneakers', 55: 'Soda', 56: 'Speaker', 57: 'Spoon', 58: 'TV', 59: 'Table', 60: 'Telephone', 61: 'ToothBrush', 62: 'Toys', 63: 'Trash_Can', 64: 'Webcam'}

Device used : cuda


In [3]:
def save_checkpoint(checkpoint_path, model, optimizer):
    state = {'model_state_dict': model.state_dict(),
             'optimizer_state_dict' : optimizer.state_dict()}
    torch.save(state, checkpoint_path)
    print('model saved to {}'.format(checkpoint_path))
    
def load_checkpoint(checkpoint_path, device='cpu'):
    checkpoint = torch.load(checkpoint_path, map_location=device)
    return checkpoint["model_state_dict"], checkpoint["optimizer_state_dict"]

In [4]:
class DS(Dataset):
    def __init__(self, datapath, csvpath, transform=None) -> None:
        self.transform = transform
        self.data = [] #(imgpath, imgname, #label)
        if csvpath is not None:
            if os.path.exists(csvpath):
                df = pd.read_csv(csvpath)
                self.data = [(os.path.join(datapath, name), name, label2class[label]) for name, label in zip(df['filename'], df['label'])]
            else:
                print(f"Can't find {csvpath}")
                exit(-1)
        else:
            if os.path.exists(datapath):
                paths = glob.glob(os.path.join(datapath, "*"))
                for path in paths:
                    imgname = os.path.split(path)[-1]
                    self.data.append((path, imgname, None))
            else:
                print(f"Can't open {datapath}")
                exit(-1)
        self.len = len(self.data)
        print(self.len)

    def __getitem__(self, index):
        imgpath, imgname, label = self.data[index]
        img = Image.open(imgpath)
        if self.transform:
            img = self.transform(img)
        return img, label if label is not None else img

    def __len__(self):
        return self.len

In [5]:
train_loader = DataLoader(DS(config['train_pth'], config['train_csv_pth'], transform=backbone_transform), batch_size=config['bsz'], shuffle=True, pin_memory=True)
val_loader = DataLoader(DS(config['val_pth'], config['val_csv_pth'], transform=backbone_transform), batch_size=config['bsz'], pin_memory=True)

3951
406


In [6]:
def BuildLabelDict(train_csv):
    label2class, class2label = {}, {}
    train_df = pd.read_csv(train_csv)
    labellist = sorted(list(dict.fromkeys([name for name in train_df['label']])))
    label2class = {label : idx for (idx, label) in enumerate(labellist)}
    class2label = {idx : label for (idx, label) in enumerate(labellist)}
    print(label2class, class2label)

In [7]:
class ClassifierA(nn.Module):
    def __init__(self, backbone_pth=None) -> None:
        super().__init__()
        self.backbone = models.resnet50(weights=None)
        self.backbone.fc = nn.Linear(2048, config['numofclass'])
    
    def forward(self, x):
        return self.backbone(x)

In [8]:
model = ClassifierA().to(config['device'])
opt = torch.optim.Adam(model.parameters(), lr=config['lr'])
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(opt, T_max=config['epochs'])
print(model)

ClassifierA(
  (backbone): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
     

In [9]:
best_acc = 0.
criterion = nn.CrossEntropyLoss()
for ep in range(config['epochs']):
    model.train()
    train_loss, train_acc, val_loss, val_acc = 0., 0., 0., 0.
    for idx, (img, label) in enumerate(train_loader):
        img = img.to(config['device'])
        logit = model(img).cpu()
        loss = criterion(logit, label)
        opt.zero_grad()
        loss.backward()
        opt.step()
        train_loss += loss.item()
        train_acc += (logit.argmax(-1) == label).float().sum()
    scheduler.step()
    train_loss /= idx
    train_acc /= len(train_loader.dataset)

    model.eval()
    with torch.no_grad():
        for idx, (img, label) in enumerate(val_loader):
            img = img.to(config['device'])
            logit = model(img).cpu()
            loss = criterion(logit, label)
            val_loss += loss.item()
            val_acc += (logit.argmax(-1) == label).float().sum()
        val_loss /= idx
        val_acc /= len(val_loader.dataset)

    if val_acc > best_acc:
        save_checkpoint(config['save_pth'], model, opt)
        best_acc = val_acc
    print(f"Epoch [{ep+1}/{config['epochs']}] train_loss : {train_loss} train_acc : {train_acc:.2%} val_loss : {val_loss} val_acc : {val_acc:.2%}")


model saved to /data/allen/hw4model/setting_a.pth
Epoch [1/100] train_loss : 4.520236540616043 train_acc : 4.25% val_loss : 6.559268613656362 val_acc : 6.16%
model saved to /data/allen/hw4model/setting_a.pth
Epoch [2/100] train_loss : 4.014544576164184 train_acc : 6.40% val_loss : 4.225553035736084 val_acc : 7.14%
Epoch [3/100] train_loss : 3.8990919977668823 train_acc : 8.02% val_loss : 4.23883597056071 val_acc : 7.14%
model saved to /data/allen/hw4model/setting_a.pth
Epoch [4/100] train_loss : 3.8321592865920646 train_acc : 9.36% val_loss : 4.488479673862457 val_acc : 11.08%
Epoch [5/100] train_loss : 3.7840810868798234 train_acc : 10.17% val_loss : 4.353418409824371 val_acc : 10.10%
Epoch [6/100] train_loss : 3.685094282879093 train_acc : 12.15% val_loss : 4.887636800607045 val_acc : 10.10%
model saved to /data/allen/hw4model/setting_a.pth
Epoch [7/100] train_loss : 3.5832484892713343 train_acc : 13.06% val_loss : 3.7691844701766968 val_acc : 14.53%
model saved to /data/allen/hw4mod

KeyboardInterrupt: 