分三个模型训练：

1. 模型 1（简单卷积神经网络 CNN）：

- 仅包含两个卷积层 + 池化层 + 全连接层

- 适合作为Baseline Model，观察基本的分类能力

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision.datasets import ImageFolder
import matplotlib.pyplot as plt
import os
from PIL import Image
from tqdm.auto import tqdm
from pathlib import Path

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
TRAIN_PATH = "./train"
VAL_PATH = "./val"
TEST_PATH = "./test1"
BATCH_SIZE = 32

In [4]:
class CatDogDataset(Dataset):
    def __init__(self,train_dir,transform=None):
        self.train_dir = train_dir
        self.transform = transform
        self.images = os.listdir(train_dir)

    def __len__(self):
        return len(self.images)
    
    def __getitem__(self,index):
        image_path = os.path.join(self.train_dir,self.images[index])
        label = self.images[index].split(".")[0]
        label = 0 if label=='cat' else 1
        image = Image.open(image_path).convert("RGB")
        if self.transform  is not None:
            image = self.transform(image)
        return image,label

In [5]:
# 数据增强
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [6]:
# 数据加载
def load_data():
    train_dataset = CatDogDataset(TRAIN_PATH, transform=transform)
    val_dataset = CatDogDataset(VAL_PATH, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
    return train_loader, val_loader

train_loader, val_loader = load_data()

In [7]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(32 * 32 * 32, 128)
        self.fc2 = nn.Linear(128, 2)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

def train_and_evaluate(model, train_loader, val_loader, epochs=10, lr=0.001):
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    for epoch in tqdm(range(epochs)):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        accuracy = correct / total
        accuracy *= 100
        print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}, Accuracy: {accuracy:.2f}%")

In [8]:
# 训练模型 1
model1 = SimpleCNN()
train_and_evaluate(model1, train_loader, val_loader)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1, Loss: 0.5819, Accuracy: 74.80%
Epoch 2, Loss: 0.4703, Accuracy: 77.38%
Epoch 3, Loss: 0.4130, Accuracy: 78.54%
Epoch 4, Loss: 0.3580, Accuracy: 79.54%
Epoch 5, Loss: 0.2937, Accuracy: 79.94%
Epoch 6, Loss: 0.2356, Accuracy: 78.80%
Epoch 7, Loss: 0.1848, Accuracy: 79.64%
Epoch 8, Loss: 0.1348, Accuracy: 80.10%
Epoch 9, Loss: 0.0967, Accuracy: 80.20%
Epoch 10, Loss: 0.0745, Accuracy: 80.48%


In [9]:
from pathlib import Path
MODLE_PATH = Path("basecnnmodel")
MODLE_PATH.mkdir(parents=True,exist_ok=True)
MODLE_NAME = "NAIVECNNCLASSIFIER.pth"
MODLE_SAVE_PATH = MODLE_PATH / MODLE_NAME
print("saving to path: ",MODLE_SAVE_PATH)
torch.save(obj=model1.state_dict(),f=MODLE_SAVE_PATH)

saving to path:  basecnnmodel\NAIVECNNCLASSIFIER.pth


2. 模型 2（改进版 CNN）：

- 增加卷积层、批量归一化（BatchNorm）、Dropout 以提高泛化能力

- 适当增加通道数，提高表达能力

In [10]:
class ImprovedCNN(nn.Module):
    def __init__(self):
        super(ImprovedCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.fc1 = nn.Linear(128 * 16 * 16, 256)
        self.fc2 = nn.Linear(256, 2)
        self.dropout = nn.Dropout(0.5)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.pool(self.relu(self.bn1(self.conv1(x))))
        x = self.pool(self.relu(self.bn2(self.conv2(x))))
        x = self.pool(self.relu(self.bn3(self.conv3(x))))
        x = x.view(x.size(0), -1)
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.fc2(x)
        return x


In [11]:
model2 = ImprovedCNN()
train_and_evaluate(model2, train_loader, val_loader)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1, Loss: 0.8812, Accuracy: 69.58%
Epoch 2, Loss: 0.5908, Accuracy: 71.08%
Epoch 3, Loss: 0.5575, Accuracy: 75.56%
Epoch 4, Loss: 0.5145, Accuracy: 78.44%
Epoch 5, Loss: 0.4865, Accuracy: 81.14%
Epoch 6, Loss: 0.4633, Accuracy: 81.24%
Epoch 7, Loss: 0.4272, Accuracy: 83.14%
Epoch 8, Loss: 0.4054, Accuracy: 83.06%
Epoch 9, Loss: 0.3808, Accuracy: 85.90%
Epoch 10, Loss: 0.3668, Accuracy: 83.46%


In [12]:
from pathlib import Path
MODLE_PATH = Path("improvedcnnmodel")
MODLE_PATH.mkdir(parents=True,exist_ok=True)
MODLE_NAME = "IMPROVEDCNNCLASSIFIER.pth"
MODLE_SAVE_PATH = MODLE_PATH / MODLE_NAME
print("saving to path: ",MODLE_SAVE_PATH)
torch.save(obj=model2.state_dict(),f=MODLE_SAVE_PATH)

saving to path:  improvedcnnmodel\IMPROVEDCNNCLASSIFIER.pth


3. 模型 3（ResNet 变种）：

- 采用 ResNet-like 结构，引入残差连接（Residual Connection）

- 进一步提升分类能力，避免梯度消失

In [16]:

# 数据增强
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])


In [2]:
# ResNet 变种
class ResBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ResBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()
        self.shortcut = nn.Conv2d(in_channels, out_channels, kernel_size=1) if in_channels != out_channels else nn.Identity()
    
    def forward(self, x):
        identity = self.shortcut(x)
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.bn2(self.conv2(x))
        x += identity
        return self.relu(x)

class ResNet(nn.Module):
    def __init__(self):
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(64)
        self.res1 = ResBlock(64, 128)
        self.res2 = ResBlock(128, 256)
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(256, 2)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
    
    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.res1(x)
        x = self.res2(x)
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = self.dropout(x)
        return self.fc(x)


In [20]:
def train_and_evaluate(model, train_loader, val_loader, epochs=20, lr=0.0005):
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)  # 每5轮学习率减半
    
    for epoch in tqdm(range(epochs)):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        scheduler.step()
        
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        accuracy = correct / total * 100
        print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}, Accuracy: {accuracy:.2f}%")

In [21]:
model3 = ResNet()
train_and_evaluate(model3, train_loader, val_loader)

  0%|          | 0/20 [00:00<?, ?it/s]

Epoch 1, Loss: 0.6469, Accuracy: 69.30%
Epoch 2, Loss: 0.5868, Accuracy: 67.80%
Epoch 3, Loss: 0.5603, Accuracy: 70.08%
Epoch 4, Loss: 0.5451, Accuracy: 74.46%
Epoch 5, Loss: 0.5235, Accuracy: 76.30%
Epoch 6, Loss: 0.4902, Accuracy: 79.26%
Epoch 7, Loss: 0.4686, Accuracy: 79.24%
Epoch 8, Loss: 0.4491, Accuracy: 80.44%
Epoch 9, Loss: 0.4328, Accuracy: 71.42%
Epoch 10, Loss: 0.4135, Accuracy: 82.24%
Epoch 11, Loss: 0.3761, Accuracy: 84.40%
Epoch 12, Loss: 0.3654, Accuracy: 83.66%
Epoch 13, Loss: 0.3501, Accuracy: 86.50%
Epoch 14, Loss: 0.3416, Accuracy: 86.60%
Epoch 15, Loss: 0.3313, Accuracy: 86.98%
Epoch 16, Loss: 0.3114, Accuracy: 88.36%
Epoch 17, Loss: 0.3035, Accuracy: 86.84%
Epoch 18, Loss: 0.2965, Accuracy: 88.74%
Epoch 19, Loss: 0.2987, Accuracy: 88.36%
Epoch 20, Loss: 0.2901, Accuracy: 87.02%


In [22]:
from pathlib import Path
MODLE_PATH = Path("resnetmodel")
MODLE_PATH.mkdir(parents=True,exist_ok=True)
MODLE_NAME = "RESNETPLUS.pth"
MODLE_SAVE_PATH = MODLE_PATH / MODLE_NAME
print("saving to path: ",MODLE_SAVE_PATH)
torch.save(obj=model3.state_dict(),f=MODLE_SAVE_PATH)

saving to path:  resnetmodel\RESNETPLUS.pth


In [23]:
import pandas as pd

In [24]:
def predict_and_save_results(model, test_path, output_file):
    model.eval()
    test_images = os.listdir(test_path)
    results = []
    
    transform_test = transforms.Compose([
        transforms.Resize((128, 128)),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])
    
    for img_name in tqdm(test_images):
        img_path = os.path.join(test_path, img_name)
        image = Image.open(img_path).convert("RGB")
        image = transform_test(image)
        image = image.unsqueeze(0).to(device)  # 增加 batch 维度
        
        with torch.no_grad():
            output = model(image)
            _, predicted = torch.max(output, 1)
            label = "cat" if predicted.item() == 0 else "dog"
        
        results.append([img_name, label])
    
    df = pd.DataFrame(results, columns=["Image Name", "Label"])
    print(df.head())
    df.to_csv(output_file, index=False)

predict_and_save_results(model3, TEST_PATH, "submission.csv")

  0%|          | 0/12500 [00:00<?, ?it/s]

  Image Name Label
0      1.jpg   dog
1     10.jpg   cat
2    100.jpg   cat
3   1000.jpg   dog
4  10000.jpg   dog


In [28]:
import pandas as pd

# 读取 CSV 文件
df = pd.read_csv("submission.csv", header=None, names=["filename", "label"], sep="|")

# 去除可能的空格
df["filename"] = df["filename"].astype(str).str.strip()
df["label"] = df["label"].astype(str).str.strip()

# 提取图片编号
df["num"] = df["filename"].str.extract(r"(\d+)")[0]

# 删除 NaN 值（即无法提取数字的行）
df = df.dropna(subset=["num"])

# 转换为整数类型
df["num"] = df["num"].astype(int)

# 按编号排序
df = df.sort_values(by="num")

# 删除辅助列
df = df.drop(columns=["num"])

# 重新保存排序后的 CSV，确保没有额外的 `NaN`
df.to_csv("submission_sorted.csv", index=False, header=False, sep="|", na_rep="")

# 打印前几行检查
print(df.head(15))


         filename label
1       1.jpg,dog   nan
3613    2.jpg,dog   nan
4724    3.jpg,dog   nan
5835    4.jpg,dog   nan
6946    5.jpg,cat   nan
8057    6.jpg,dog   nan
9168    7.jpg,cat   nan
10279   8.jpg,cat   nan
11390   9.jpg,cat   nan
2      10.jpg,cat   nan
1113   11.jpg,cat   nan
2224   12.jpg,dog   nan
2836   13.jpg,dog   nan
2947   14.jpg,cat   nan
3058   15.jpg,cat   nan


In [4]:
model = ResNet()
MODLE_PATH = Path("resnetmodel")
MODLE_PATH.mkdir(parents=True,exist_ok=True)
MODLE_NAME = "RESNETPLUS.pth"
MODLE_SAVE_PATH = MODLE_PATH / MODLE_NAME
model.load_state_dict(torch.load(MODLE_SAVE_PATH))
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (res1): ResBlock(
    (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
    (shortcut): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1))
  )
  (res2): ResBlock(
    (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (r