## 任务1：构建一个简单CNN模型，训练有标签数据

In [3]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from PIL import Image
import numpy as np

device = "cuda" if torch.cuda.is_available() else "cpu"

In [4]:
# 定义一个函数，用于设置随机种子
def set_seed(seed):
    # 设置PyTorch的随机种子
    torch.manual_seed(seed)
    # 设置NumPy的随机种子
    np.random.seed(seed)
    # 如果设备是CUDA，则设置所有CUDA设备的随机种子
    if device == "cuda":
        torch.cuda.manual_seed_all(seed)

set_seed(42)
    

In [5]:
# 图像预处理

train_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, ),(0.5, ))
])

test_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, ),(0.5, ))
])


In [6]:
# 数据加载

train_dir = "F:/Ai/Data/food-11/training/labeled"
val_dir = "F:/Ai/Data/food-11/validation"

In [7]:
train_set = datasets.ImageFolder(train_dir, transform=train_transform)
val_set = datasets.ImageFolder(val_dir, transform=test_transform)

trian_loader = DataLoader(train_set, batch_size=256, shuffle=True)
val_loader = DataLoader(val_set, batch_size=256, shuffle=False)

In [6]:
train_set

Dataset ImageFolder
    Number of datapoints: 3080
    Root location: F:/Ai/Data/food-11/training/labeled
    StandardTransform
Transform: Compose(
               Resize(size=(128, 128), interpolation=bilinear, max_size=None, antialias=True)
               ToTensor()
               Normalize(mean=(0.5,), std=(0.5,))
           )

In [7]:
# 简单CNN网络

class simpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )

        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 16 * 16, 256),
            nn.ReLU(),
            nn.Linear(256, 11)
        )

    def forward(self, x):
        return self.fc(self.cnn(x))


In [8]:
model = simpleCNN().to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [9]:
# 验证函数
def validate():
    model.eval()
    correct = 0
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            preds = model(imgs)
            correct += (preds.argmax(1) == labels).sum().item()
    acc = correct / len(val_set)
    print(f"[EASY][Validation] Acc: {acc:.4f}")

In [10]:
# 训练函数

def train(epochs = 10):
    for epoch in range(epochs):
        model.train()
        total_loss, correct = 0, 0

        for imgs, labels in trian_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            correct += (outputs.argmax(1) == labels).sum().item()
        
        acc = correct / len(train_set)

        print(f"[EASY][Epoch {epoch+1}] Loss: {total_loss:.3f}, Train Acc: {acc:.4f}")
        validate()



In [None]:
%time train(epochs=10)

[EASY][Epoch 1] Loss: 0.559, Train Acc: 0.9971
[EASY][Validation] Acc: 0.3727
[EASY][Epoch 2] Loss: 0.824, Train Acc: 0.9854
[EASY][Validation] Acc: 0.3636
[EASY][Epoch 3] Loss: 0.604, Train Acc: 0.9955
[EASY][Validation] Acc: 0.3182
[EASY][Epoch 4] Loss: 3.268, Train Acc: 0.9331
[EASY][Validation] Acc: 0.2894
[EASY][Epoch 5] Loss: 2.038, Train Acc: 0.9503
[EASY][Validation] Acc: 0.3424
[EASY][Epoch 6] Loss: 0.943, Train Acc: 0.9825
[EASY][Validation] Acc: 0.3364
[EASY][Epoch 7] Loss: 0.319, Train Acc: 0.9987
[EASY][Validation] Acc: 0.3561
[EASY][Epoch 8] Loss: 0.147, Train Acc: 0.9994
[EASY][Validation] Acc: 0.3409
[EASY][Epoch 9] Loss: 0.077, Train Acc: 1.0000
[EASY][Validation] Acc: 0.3636
[EASY][Epoch 10] Loss: 0.045, Train Acc: 1.0000
[EASY][Validation] Acc: 0.3621
CPU times: total: 2min 38s
Wall time: 2min 29s


## 任务二：使用不同模型架构或数据增强的带标签图像来提升性能

In [17]:
# 改进网络结构
class BetterCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),

        )

        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 16 * 16, 256),
            nn.ReLU(),
            nn.Linear(256, 11)
        )

    def forward(self, x):
        x = self.cnn(x)
        x = self.fc(x)
        return x

In [18]:
model = BetterCNN().to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [None]:
%time train(epochs=10)

[EASY][Epoch 1] Loss: 156.184, Train Acc: 0.1055
[EASY][Validation] Acc: 0.1348
[EASY][Epoch 2] Loss: 50.802, Train Acc: 0.1302
[EASY][Validation] Acc: 0.1424
[EASY][Epoch 3] Loss: 30.659, Train Acc: 0.1484
[EASY][Validation] Acc: 0.1273
[EASY][Epoch 4] Loss: 30.040, Train Acc: 0.1506
[EASY][Validation] Acc: 0.1288
[EASY][Epoch 5] Loss: 30.242, Train Acc: 0.1627
[EASY][Validation] Acc: 0.1788
[EASY][Epoch 6] Loss: 29.349, Train Acc: 0.1860
[EASY][Validation] Acc: 0.1773
[EASY][Epoch 7] Loss: 28.797, Train Acc: 0.2195
[EASY][Validation] Acc: 0.1909
[EASY][Epoch 8] Loss: 27.708, Train Acc: 0.2542
[EASY][Validation] Acc: 0.2015
[EASY][Epoch 9] Loss: 27.618, Train Acc: 0.2542
[EASY][Validation] Acc: 0.2394
[EASY][Epoch 10] Loss: 27.061, Train Acc: 0.2896
[EASY][Validation] Acc: 0.2439
CPU times: total: 2min 41s
Wall time: 2min 32s


In [8]:
class Food11VGG(nn.Module):
    def __init__(self, num_classes = 11):
        super(Food11VGG, self).__init__()

        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(512 * 8 * 8, 1024),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(1024, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

In [9]:
model = Food11VGG(num_classes=11).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [None]:
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for images, labels in trian_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    train_acc = correct / total
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss:.4f}, Train Acc: {train_acc:.4f}")

    # 验证
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    val_acc = correct / total
    print(f"Validation Acc: {val_acc:.4f}")
