In [1]:
import sys
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms as transforms
import glob
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_batchsz, val_batchsz = 64, 64
epoch = 70
lr = 1.2e-3
momentum = 0.92
numofclass = 50
hw1train_path = "/data/dlcv/hw1/hw1_data/p1_data/train_50/"
hw1val_path = "/data/dlcv/hw1/hw1_data/p1_data/val_50/"
checkpoint_path = "resnext50.pth"
log_path = "resnext50_log.txt"

device = "cuda" if torch.cuda.is_available() else "cpu"
if device == "cuda":
    torch.cuda.set_device(2)
print('Device used:', device)

Device used: cuda


In [3]:
class Mydataset(Dataset):
    def __init__(self, dirpath, transform=None):
        self.data = []
        self.transform = transform
        filenames = glob.glob(os.path.join(dirpath, "*.png"))
        for filename in filenames:
            image_fn = os.path.split(filename)[1]
            label = int(image_fn.split("_")[0])
            self.data.append((filename, label)) #filename, label
        self.len = len(self.data)

    def __getitem__(self, index):
        """ Get a sample from the dataset """
        image_fn, label = self.data[index]
        image = Image.open(image_fn)
        if self.transform is not None:
            image = self.transform(image)
        return image, label

    def __len__(self):
        """ Total number of samples in the dataset """
        return self.len

In [4]:
train_tfm = transforms.Compose([
    transforms.Resize((224,224), interpolation=torchvision.transforms.InterpolationMode.BICUBIC),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=5),
    transforms.RandomAdjustSharpness(sharpness_factor=1.5, p=0.5),
    transforms.ToTensor()
])
val_tfm = transforms.Compose([
    transforms.Resize((224,224), interpolation=torchvision.transforms.InterpolationMode.BICUBIC),
    transforms.ToTensor()
])
trainset, valset = Mydataset(hw1train_path, transform=train_tfm), Mydataset(hw1val_path, transform=val_tfm)
trainset_loader = DataLoader(trainset, batch_size=train_batchsz, shuffle=True, num_workers=1)
valset_loader = DataLoader(valset, batch_size=val_batchsz, shuffle=False, num_workers=1)

In [5]:
def save_checkpoint(checkpoint_path, model, optimizer):
    state = {'model_state_dict': model.state_dict(),
             'optimizer_state_dict' : optimizer.state_dict()}
    torch.save(state, checkpoint_path)
    print('model saved to {}'.format(checkpoint_path))

In [6]:
def loadbestacc():
    best_acc = 0.
    if os.path.exists(log_path):
        with open(log_path, "r") as f:
            for line in f.readlines():
                linelist = line.split(" ")
                if linelist[0] == checkpoint_path:
                    best_acc = float(linelist[-1].strip("%"))
                    break
    return best_acc / 100.

In [7]:
def train(model):
    optimizer = optim.SGD(model.parameters(), lr, momentum)
    lrscheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10,20,25], gamma=0.6)
    criterion = nn.CrossEntropyLoss()
    best_acc = loadbestacc()
    print("best_acc = {:.3%}".format(best_acc))
    for ep in range(epoch):
        model.train()  # set training mode
        train_loss, train_acc = 0., 0.
        for batch_idx, (data, target) in enumerate(trainset_loader):
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            train_acc += (output.argmax(-1) == target.to(device)).float().sum()
        lrscheduler.step()
        train_loss /= len(trainset_loader.dataset)
        train_acc /= len(trainset_loader.dataset)
        
        model.eval()
        val_loss, val_acc = 0., 0.
        with torch.no_grad():
            for batch_idx, (data, target) in enumerate(valset_loader):
                data, target = data.to(device), target.to(device)
                output = model(data)
                loss = criterion(output, target)
                val_loss += loss.item()
                val_acc += (output.argmax(-1) == target.to(device)).float().sum()
        val_loss /= len(valset_loader.dataset)
        val_acc /= len(valset_loader.dataset)
        print("Epoch {} train loss = {:.6f}, train acc = {:.3%}, valid loss = {:.6f}, valid acc = {:.3%}".format(ep + 1, train_loss, train_acc, val_loss, val_acc))        
        if val_acc > best_acc:
            save_checkpoint(checkpoint_path, model, optimizer)
            with open(log_path, "w") as f:    
                f.write("{} : {:.3%}".format(checkpoint_path, val_acc))
            best_acc = val_acc

## Pretrained

In [8]:
resnext50 = torchvision.models.resnext50_32x4d(weights='DEFAULT')
resnext50.fc = nn.Linear(resnext50.fc.in_features, numofclass)
resnext50 = resnext50.to(device)
print(resnext50)
# train(resnext50)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1

## Finetune

In [None]:
finetune_model = torchvision.models.resnext50_32x4d()
finetune_model.fc = nn.Linear(finetune_model.fc.in_features, numofclass)
optimizer = optim.SGD()
checkpoint = torch.load(checkpoint_path)
finetune_model.load_state_dict(checkpoint["model_state_dict"])
optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
finetune_model.to(device)
print(finetune_model.state_dict(), optimizer.state_dict())