## 自监督学习
框架：MoCo
数据集：tiny_imagenet, caltech256

In [1]:
import os
import subprocess
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

# 导入自定义模块
from data_preparation import download_and_extract_tiny_imagenet, download_and_extract_caltech256, download_and_preprocess_cifar100
from model import get_resnet18_model
from training_finetuning import train, tune_hyperparameters

c:\Users\heyh0\.conda\envs\deeplearning39\lib\site-packages\numpy\.libs\libopenblas.FB5AE2TYXYH2IJRDKGDGQ3XBKLKTF43H.gfortran-win_amd64.dll
c:\Users\heyh0\.conda\envs\deeplearning39\lib\site-packages\numpy\.libs\libopenblas.XWYDX2IKJW2NMTWSFYNGFUWKQU3LYTCZ.gfortran-win_amd64.dll


# 设置参数

In [2]:
# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 数据集存储路径
DATA_DIR = './data'

# 下载数据集

tiny-imagenet 数据集

In [3]:
# 下载 tiny-imagenet 数据集
train_dir = download_and_extract_tiny_imagenet(data_dir=DATA_DIR)

Tiny ImageNet zip file already exists.
Tiny ImageNet directory already exists.
Tiny ImageNet is ready to use.


Caltech256 数据集

In [None]:
# 下载 caltech256 数据集
train_dir = download_and_extract_caltech256(data_dir=DATA_DIR)

# MoCo 预训练（分布式）

In [None]:
# 注意这个脚本只能分布式训练
result = subprocess.run([
    'python', 'main_moco.py',
    train_dir,  # tiny-imagenet的地址是
    '-a', 'resnet18',
    '--epochs', '200',
    '--batch-size', '256',
    '--lr', '0.03',
    '--mlp',
    '--moco-t', '0.2',
    '--aug-plus',
    '--cos'
], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

print("stdout:", result.stdout)
print("stderr:", result.stderr)
print("returncode:", result.returncode)

# 下游任务（分类）

In [7]:
# 获取CIFAR-100数据集
train_cifar100, test_cifar100 = download_and_preprocess_cifar100(data_dir=DATA_DIR)

# 创建DataLoader
batch_size = 64
train_loader_cifar100 = DataLoader(train_cifar100, batch_size=batch_size, shuffle=True, num_workers=4)
test_loader_cifar100 = DataLoader(test_cifar100, batch_size=batch_size, shuffle=False, num_workers=4)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data\cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:31<00:00, 5294278.65it/s]


Extracting ./data\cifar-100-python.tar.gz to ./data
Files already downloaded and verified


超参数

In [8]:
# 定义超参数范围
lr_values = [0.001, 0.01, 0.1]
weight_decay_values = [1e-4, 1e-3, 1e-2]
epochs = 100

## 实验1：使用MoCo预训练模型

导入预训练权重

In [None]:
# 实验1：使用MoCo预训练模型
moco_model = get_resnet18_model(pretrained=False, num_classes=100)
checkpoint = torch.load('checkpoint_0199.pth.tar')
moco_model.load_state_dict(checkpoint['state_dict'])

调参

In [None]:
print("Tuning hyperparameters for MoCo pre-trained model")
best_hyperparams_moco = tune_hyperparameters(moco_model, train_loader_cifar100, test_loader_cifar100, epochs, lr_values, weight_decay_values, device, finetune=True)
print(f"Best hyperparameters for MoCo pre-trained model: {best_hyperparams_moco}")

In [None]:
# 可以在这里直接设置超参数
best_hyperparams_moco = (1e-3, 0)

In [None]:
# 使用最佳超参数进行最终训练
optimizer_moco = optim.Adam([
            {'params': moco_model.fc.parameters(), 'lr': best_hyperparams_moco[0]},
            {'params': [param for name, param in moco_model.named_parameters() if 'fc' not in name], 'lr': best_hyperparams_moco[0] / 10}
        ], weight_decay=best_hyperparams_moco[1])
criterion = nn.CrossEntropyLoss()
writer_moco = SummaryWriter()

print("Final training with best hyperparameters for MoCo pre-trained model")
train(moco_model.to(device), train_loader_cifar100, test_loader_cifar100, optimizer_moco, criterion, epochs, device, writer=writer_moco)

## 实验2：使用ImageNet预训练模型

In [4]:
# 实验2：使用ImageNet预训练模型
imagenet_model = get_resnet18_model(pretrained=True, num_classes=100)



调参

In [None]:
print("Tuning hyperparameters for ImageNet pre-trained model")
best_hyperparams_imagenet = tune_hyperparameters(imagenet_model, train_loader_cifar100, test_loader_cifar100, epochs, lr_values, weight_decay_values, device, finetune=True)
print(f"Best hyperparameters for ImageNet pre-trained model: {best_hyperparams_imagenet}")

In [5]:
# 可以在这里直接设置超参数
best_hyperparams_imagenet = (1e-3, 0)

In [10]:
# 使用最佳超参数进行最终训练
optimizer_imagenet = optim.Adam([
            {'params': imagenet_model.fc.parameters(), 'lr': best_hyperparams_imagenet[0]},
            {'params': [param for name, param in imagenet_model.named_parameters() if 'fc' not in name], 'lr': best_hyperparams_imagenet[0] / 10}
        ], weight_decay=best_hyperparams_imagenet[1])
criterion = nn.CrossEntropyLoss()
writer_imagenet = SummaryWriter()

print("Final training with best hyperparameters for ImageNet pre-trained model")
train(imagenet_model.to(device), train_loader_cifar100, test_loader_cifar100, optimizer_imagenet, criterion, epochs, device, writer=writer_imagenet)

Final training with best hyperparameters for ImageNet pre-trained model
Epoch [1/100] Train Loss: 3.0121, Accuracy: 26.36%, Validation Loss: 2.2897, Accuracy: 39.31%
Epoch [2/100] Train Loss: 2.2430, Accuracy: 40.60%, Validation Loss: 1.9929, Accuracy: 46.47%
Epoch [3/100] Train Loss: 1.9871, Accuracy: 46.26%, Validation Loss: 1.9043, Accuracy: 48.68%
Epoch [4/100] Train Loss: 1.8169, Accuracy: 50.13%, Validation Loss: 1.8244, Accuracy: 51.19%
Epoch [5/100] Train Loss: 1.6944, Accuracy: 52.84%, Validation Loss: 1.7578, Accuracy: 52.43%
Epoch [6/100] Train Loss: 1.5771, Accuracy: 55.77%, Validation Loss: 1.7254, Accuracy: 53.97%
Epoch [7/100] Train Loss: 1.4797, Accuracy: 57.73%, Validation Loss: 1.7577, Accuracy: 54.07%
Epoch [8/100] Train Loss: 1.4139, Accuracy: 59.75%, Validation Loss: 1.7158, Accuracy: 55.37%


KeyboardInterrupt: 

## 实验3：使用随机初始化模型

In [None]:
# 实验3：使用随机初始化模型
random_model = get_resnet18_model(pretrained=False, num_classes=100)

调参

In [None]:
print("Tuning hyperparameters for randomly initialized model")
best_hyperparams_random = tune_hyperparameters(random_model, train_loader_cifar100, test_loader_cifar100, epochs, lr_values, weight_decay_values, device, finetune=False)
print(f"Best hyperparameters for randomly initialized model: {best_hyperparams_random}")

In [None]:
# 可以在这里直接设置超参数
best_hyperparams_random = (1e-3, 0)

In [None]:
# 使用最佳超参数进行最终训练
optimizer_random = optim.Adam(random_model.parameters(), lr=best_hyperparams_random[0], weight_decay=best_hyperparams_random[1])
criterion = nn.CrossEntropyLoss()
writer_random = SummaryWriter()

print("Final training with best hyperparameters for randomly initialized model")
train(random_model.to(device), train_loader_cifar100, test_loader_cifar100, optimizer_random, criterion, epochs, device, writer=writer_random)