In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision import models
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
import time
import os
import psutil
import json

# ==== 参数 ====
with open("config/exp1_resnet50_bs32_lr1e-3.json") as f:
    cfg = json.load(f)

BATCH_SIZE = cfg["batch_size"]
LR = cfg["learning_rate"]
EPOCHS = cfg["epochs"]
IMG_SIZE = cfg["img_size"]
NUM_CLASSES = cfg["num_classes"]
EXPERIMENT_NAME = f"PyTorch_{cfg['model_name']}_bs{BATCH_SIZE}_lr{LR}_e{EPOCHS}"
MODEL_DIR = os.path.join("experiments", EXPERIMENT_NAME)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ==== 记录资源 ====
process = psutil.Process()
memory_before = process.memory_info().rss / (1024 * 1024)  # MB
start_time = time.time()

# ==== 数据加载（Flowers102）====
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

train_dataset = torchvision.datasets.Flowers102(
    root="./data", split="train", download=True, transform=transform)
val_dataset = torchvision.datasets.Flowers102(
    root="./data", split="val", download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)


# ==== 构建模型（冻结 base）====
base_model = models.resnet50(pretrained=True)
for param in base_model.parameters():
    param.requires_grad = False  

# 替换分类头（102类）
base_model.fc = nn.Sequential(
    nn.Linear(base_model.fc.in_features, 128),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(128, 102)
)
base_model = base_model.to(DEVICE)

# ==== 损失函数与优化器 ====
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(base_model.fc.parameters(), lr=LR)

# ==== 模型训练 ====
from tqdm import tqdm

for epoch in range(EPOCHS):
    base_model.train()
    total_loss = 0.0
    total_correct = 0

    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}")
    for images, labels in loop:
        images, labels = images.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = base_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        total_correct += (preds == labels).sum().item()

        acc = total_correct / len(train_dataset)
        loop.set_postfix(loss=loss.item(), acc=acc)

    print(f"Epoch {epoch+1} Done - Avg Loss: {total_loss:.4f} - Acc: {acc:.4f}")


training_duration = time.time() - start_time
if DEVICE.type == "cuda":
    memory_usage_mb = torch.cuda.max_memory_allocated() / (1024 * 1024)
else:
    memory_after = process.memory_info().rss / (1024 * 1024)
    memory_usage_mb = memory_after - memory_before

# ==== 保存模型 ====
os.makedirs(MODEL_DIR, exist_ok=True)
torch.save(base_model.state_dict(), os.path.join(MODEL_DIR, "resnet50_flower.pt"))

# ==== 模型评估 ====
base_model.eval()
correct = 0
total_loss = 0.0
inference_start = time.time()
with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(DEVICE), labels.to(DEVICE)
        outputs = base_model(images)
        loss = criterion(outputs, labels)
        total_loss += loss.item() * images.size(0)  
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
inference_end = time.time()

val_loss = total_loss / len(val_dataset)
val_acc = correct / len(val_dataset)


# ==== 记录性能指标 ====
metrics = {
    "training_time_seconds": training_duration,
    "memory_usage_mb": memory_after - memory_before,
    "inference_latency_seconds": inference_end - inference_start,
    "val_loss": val_loss,
    "val_accuracy": val_acc,
}
with open(os.path.join(MODEL_DIR, "metrics.json"), "w") as f:
    json.dump(metrics, f, indent=2)
with open(os.path.join(MODEL_DIR, "config.json"), "w") as f:
    json.dump(cfg, f, indent=2)

print("Done. Model and metrics saved in:", MODEL_DIR)


Epoch 1/10: 100%|██████████| 32/32 [04:07<00:00,  7.74s/it, acc=0.0127, loss=4.58] 


Epoch 1 Done - Avg Loss: 149.4919 - Acc: 0.0127


Epoch 2/10: 100%|██████████| 32/32 [04:13<00:00,  7.92s/it, acc=0.0275, loss=4.56] 


Epoch 2 Done - Avg Loss: 146.2408 - Acc: 0.0275


Epoch 3/10: 100%|██████████| 32/32 [04:03<00:00,  7.62s/it, acc=0.0275, loss=4.46] 


Epoch 3 Done - Avg Loss: 143.3066 - Acc: 0.0275


Epoch 4/10: 100%|██████████| 32/32 [04:12<00:00,  7.88s/it, acc=0.0461, loss=4.42]


Epoch 4 Done - Avg Loss: 138.7973 - Acc: 0.0461


Epoch 5/10: 100%|██████████| 32/32 [04:11<00:00,  7.86s/it, acc=0.0529, loss=4.27]


Epoch 5 Done - Avg Loss: 136.3688 - Acc: 0.0529


Epoch 6/10: 100%|██████████| 32/32 [04:05<00:00,  7.68s/it, acc=0.0725, loss=4.26]


Epoch 6 Done - Avg Loss: 132.0986 - Acc: 0.0725


Epoch 7/10: 100%|██████████| 32/32 [04:14<00:00,  7.94s/it, acc=0.0647, loss=4.02]


Epoch 7 Done - Avg Loss: 130.2710 - Acc: 0.0647


Epoch 8/10: 100%|██████████| 32/32 [04:05<00:00,  7.67s/it, acc=0.0706, loss=3.91]


Epoch 8 Done - Avg Loss: 128.6890 - Acc: 0.0706


Epoch 9/10: 100%|██████████| 32/32 [04:14<00:00,  7.96s/it, acc=0.0706, loss=3.99]


Epoch 9 Done - Avg Loss: 125.7033 - Acc: 0.0706


Epoch 10/10: 100%|██████████| 32/32 [04:12<00:00,  7.89s/it, acc=0.0892, loss=3.9] 


Epoch 10 Done - Avg Loss: 123.6617 - Acc: 0.0892
Done. Model and metrics saved in: experiments/PyTorch_resnet50_bs32_lr0.001_e10


In [2]:
pip install numpy==1.26.4


Collecting numpy==1.26.4
  Downloading numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Downloading numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.2/18.2 MB[0m [31m74.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hInstalling collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.1.3
    Uninstalling numpy-2.1.3:
      Successfully uninstalled numpy-2.1.3
  You can safely remove it manually.[0m[33m
[0mSuccessfully installed numpy-1.26.4
Note: you may need to restart the kernel to use updated packages.


In [2]:
torch.__version__

'1.13.1+cu117'