这里我们使用VGGNet测试Transfer Learning.

In [1]:
from hdd.device.utils import get_device
from hdd.dataset.imagenette_in_memory import ImagenetteInMemory

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms

# 设置训练数据的路径
DATA_ROOT = "~/workspace/hands-dirty-on-dl/dataset"
# 设置TensorBoard的路径
TENSORBOARD_ROOT = "~/workspace/hands-dirty-on-dl/dataset"
# 设置预训练模型参数路径
TORCH_HUB_PATH = "~/workspace/hands-dirty-on-dl/pretrained_models"
torch.hub.set_dir(TORCH_HUB_PATH)
# 挑选最合适的训练设备
DEVICE = get_device(["cuda", "cpu"])
print("Use device: ", DEVICE)

Use device:  cuda


加载VGG模型

In [2]:
from torchvision.models import vgg19_bn, VGG19_BN_Weights
from torchsummary import summary

model = vgg19_bn(weights=VGG19_BN_Weights.IMAGENET1K_V1)
model = model.to(DEVICE)
print(model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256

创建与VGG预训练模型匹配的Data Transform

In [3]:
from torchvision.transforms._presets import ImageClassification
from hdd.data_util.transforms import RandomMetaTransform
from hdd.dataset.imagenette_in_memory import get_imagenette_label_to_imagenet_label

train_transform = RandomMetaTransform(
    ImageClassification(crop_size=224, resize_size=224),
    ImageClassification(crop_size=224, resize_size=238),
    ImageClassification(crop_size=224, resize_size=296),
)
val_transform = VGG19_BN_Weights.IMAGENET1K_V1.transforms()
BATCH_SIZE = 32
train_dataloader = torch.utils.data.DataLoader(
    ImagenetteInMemory(
        root=DATA_ROOT,
        split="train",
        size="full",
        download=True,
        transform=train_transform,
    ),
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=8,
    pin_memory=True,
)
val_dataloader = torch.utils.data.DataLoader(
    ImagenetteInMemory(
        root=DATA_ROOT,
        split="val",
        size="full",
        download=True,
        transform=val_transform,
    ),
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=8,
    pin_memory=True,
)

#### 在没有任何Fine Tuning的情况下,准确率为**85.78%**

In [4]:
from hdd.train.classification_utils import eval_image_classifier

imagenette_to_imagenet = get_imagenette_label_to_imagenet_label()
eval_result = eval_image_classifier(model, val_dataloader.dataset, DEVICE)
ss = [
    imagenette_to_imagenet[result.gt_label] == result.predicted_label
    for result in eval_result
]
print(f"Accuracy without any fine tuning: {sum(ss) / len(ss)}")

Accuracy without any fine tuning: 0.8578343949044586


#### 微调Classifier,准确率为**98.55%**

In [5]:
from hdd.train.early_stopping import EarlyStoppingInMem
from hdd.train.classification_utils import naive_train_classification_model

model = vgg19_bn(weights=VGG19_BN_Weights.IMAGENET1K_V1)
model.features.requires_grad_(False)
dropout = 0.5
num_classes = 10
# 仅修改Classifier部分
model.classifier = nn.Sequential(
    nn.Linear(512 * 7 * 7, 4096),
    nn.ReLU(True),
    nn.Dropout(p=dropout),
    nn.Linear(4096, 4096),
    nn.ReLU(True),
    nn.Dropout(p=dropout),
    nn.Linear(4096, num_classes),
)
model = model.to(DEVICE)

criteria = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9)
max_epochs = 10
_ = naive_train_classification_model(
    model,
    criteria,
    max_epochs,
    train_dataloader,
    val_dataloader,
    DEVICE,
    optimizer,
    verbose=True,
)

Epoch: 1/10 Train Loss: 0.4495 Accuracy: 0.8852 Time: 14.13914  | Val Loss: 0.0627 Accuracy: 0.9801
Epoch: 2/10 Train Loss: 0.0550 Accuracy: 0.9845 Time: 14.06363  | Val Loss: 0.0551 Accuracy: 0.9837
Epoch: 3/10 Train Loss: 0.0304 Accuracy: 0.9908 Time: 14.07111  | Val Loss: 0.0491 Accuracy: 0.9834
Epoch: 4/10 Train Loss: 0.0180 Accuracy: 0.9958 Time: 14.10516  | Val Loss: 0.0491 Accuracy: 0.9862
Epoch: 5/10 Train Loss: 0.0120 Accuracy: 0.9968 Time: 13.99336  | Val Loss: 0.0477 Accuracy: 0.9852
Epoch: 6/10 Train Loss: 0.0095 Accuracy: 0.9975 Time: 14.03133  | Val Loss: 0.0543 Accuracy: 0.9850
Epoch: 7/10 Train Loss: 0.0110 Accuracy: 0.9969 Time: 13.93512  | Val Loss: 0.0650 Accuracy: 0.9822
Epoch: 8/10 Train Loss: 0.0069 Accuracy: 0.9983 Time: 13.95940  | Val Loss: 0.0548 Accuracy: 0.9847
Epoch: 9/10 Train Loss: 0.0063 Accuracy: 0.9981 Time: 13.81623  | Val Loss: 0.0556 Accuracy: 0.9860
Epoch: 10/10 Train Loss: 0.0037 Accuracy: 0.9992 Time: 13.85679  | Val Loss: 0.0519 Accuracy: 0.9855

#### 微调全部参数,准确率为**97.86%**

In [9]:
model = vgg19_bn(weights=VGG19_BN_Weights.IMAGENET1K_V1)
dropout = 0.5
num_classes = 10
model.classifier = nn.Sequential(
    nn.Linear(512 * 7 * 7, 4096),
    nn.ReLU(True),
    nn.Dropout(p=dropout),
    nn.Linear(4096, 4096),
    nn.ReLU(True),
    nn.Dropout(p=dropout),
    nn.Linear(4096, num_classes),
)
model = model.to(DEVICE)

criteria = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer, step_size=10, gamma=0.1, last_epoch=-1
)
max_epochs = 20
_ = naive_train_classification_model(
    model,
    criteria,
    max_epochs,
    train_dataloader,
    val_dataloader,
    DEVICE,
    optimizer,
    scheduler,
    verbose=True,
)

Epoch: 1/20 Train Loss: 0.4179 Accuracy: 0.8835 Time: 36.22130  | Val Loss: 0.1110 Accuracy: 0.9651
Epoch: 2/20 Train Loss: 0.0698 Accuracy: 0.9789 Time: 35.92898  | Val Loss: 0.0908 Accuracy: 0.9715
Epoch: 3/20 Train Loss: 0.0466 Accuracy: 0.9833 Time: 35.94310  | Val Loss: 0.1304 Accuracy: 0.9638
Epoch: 4/20 Train Loss: 0.0339 Accuracy: 0.9893 Time: 36.14090  | Val Loss: 0.1331 Accuracy: 0.9679
Epoch: 5/20 Train Loss: 0.0328 Accuracy: 0.9905 Time: 36.03434  | Val Loss: 0.1596 Accuracy: 0.9557
Epoch: 6/20 Train Loss: 0.0340 Accuracy: 0.9894 Time: 35.97279  | Val Loss: 0.1985 Accuracy: 0.9483
Epoch: 7/20 Train Loss: 0.0260 Accuracy: 0.9922 Time: 36.00828  | Val Loss: 0.1182 Accuracy: 0.9699
Epoch: 8/20 Train Loss: 0.0290 Accuracy: 0.9923 Time: 36.04504  | Val Loss: 0.1182 Accuracy: 0.9674
Epoch: 9/20 Train Loss: 0.0201 Accuracy: 0.9942 Time: 36.17785  | Val Loss: 0.1121 Accuracy: 0.9694
Epoch: 10/20 Train Loss: 0.0143 Accuracy: 0.9959 Time: 36.29167  | Val Loss: 0.1257 Accuracy: 0.9707

#### 从头训练,准确率为**85%**

注意,我们并没有仔细调整相关的超参数,所以结果看起来有些糟糕,根据[VGGNet.ipyth](./VGGNet.ipynb),应该可以达到**91%**

In [11]:
net = vgg19_bn(num_classes=10, dropout=0.5)
net = net.to(DEVICE)
criteria = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.005, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer, step_size=20, gamma=0.5, last_epoch=-1
)
early_stopper = EarlyStoppingInMem(patience=25, verbose=False)
max_epochs = 150
_ = naive_train_classification_model(
    net,
    criteria,
    max_epochs,
    train_dataloader,
    val_dataloader,
    DEVICE,
    optimizer,
    scheduler,
    early_stopper,
    verbose=True,
)

Epoch: 1/150 Train Loss: 2.5135 Accuracy: 0.1973 Time: 36.43307  | Val Loss: 2.0769 Accuracy: 0.2456
Epoch: 2/150 Train Loss: 1.9793 Accuracy: 0.3075 Time: 35.91563  | Val Loss: 1.8639 Accuracy: 0.3811
Epoch: 3/150 Train Loss: 1.7788 Accuracy: 0.3878 Time: 36.59997  | Val Loss: 1.6883 Accuracy: 0.4382
Epoch: 4/150 Train Loss: 1.5756 Accuracy: 0.4730 Time: 36.77549  | Val Loss: 1.4284 Accuracy: 0.5231
Epoch: 5/150 Train Loss: 1.3437 Accuracy: 0.5653 Time: 36.72153  | Val Loss: 1.2293 Accuracy: 0.6005
Epoch: 6/150 Train Loss: 1.1960 Accuracy: 0.6163 Time: 37.17435  | Val Loss: 1.3629 Accuracy: 0.5837
Epoch: 7/150 Train Loss: 1.0645 Accuracy: 0.6608 Time: 36.62234  | Val Loss: 1.0919 Accuracy: 0.6428
Epoch: 8/150 Train Loss: 0.9639 Accuracy: 0.6947 Time: 36.77001  | Val Loss: 1.0148 Accuracy: 0.6884
Epoch: 9/150 Train Loss: 0.9033 Accuracy: 0.7162 Time: 36.14575  | Val Loss: 0.9161 Accuracy: 0.7213
Epoch: 10/150 Train Loss: 0.8419 Accuracy: 0.7340 Time: 35.76139  | Val Loss: 0.8367 Accura