In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.metrics import confusion_matrix


In [15]:
# 数据增强
transform_train = transforms.Compose([
    transforms.RandomRotation(10),
    transforms.RandomAffine(0, translate=(0.1, 0.1), scale=(0.9, 1.1)),
    transforms.ToTensor()
])
transform_test = transforms.Compose([transforms.ToTensor()])

# 下载与加载MNIST
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform_train)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=10000, shuffle=False,num_workers=0)


In [16]:
x_test_original, y_test_original = next(iter(test_loader))
# x_test_original: [10000, 1, 28, 28], y_test_original: [10000]

x_test_original = x_test_original.numpy()
y_test_original = y_test_original.numpy()

# 将两张图片中心对其然后叠加，能不能改成第二张图片右移10个像素点然后与第一张图片叠加，然后只保留第二张图像的28*28个像素点

# 两张图片为同一张图片然后叠加

In [17]:
def create_device_mixed_data(x_data, y_data, LH_param, RH_param, label_type='label1'):
    n = x_data.shape[0]
    mixed_images = []
    mixed_labels = []
    intensity_LH = 0.5
    intensity_RH = 1 - intensity_LH
    for i in range(n - 1):
        img1, label1 = x_data[i], y_data[i]
        img2, label2 = x_data[i + 1], y_data[i + 1]
        mixed_img = img1 * LH_param * intensity_LH + img2 * RH_param * intensity_RH
        mixed_img = mixed_img / np.max(mixed_img)
        mixed_images.append(mixed_img)
        mixed_labels.append(label1 if label_type == 'label1' else label2)
    # 最后一张和第一张混合
    img1, label1 = x_data[-1], y_data[-1]
    img2, label2 = x_data[0], y_data[0]
    mixed_img = img1 * LH_param * intensity_LH + img2 * RH_param * intensity_RH
    mixed_img = mixed_img / np.max(mixed_img)
    mixed_images.append(mixed_img)
    mixed_labels.append(label1 if label_type == 'label1' else label2)
    mixed_images = np.stack(mixed_images)
    mixed_labels = np.array(mixed_labels)
    return mixed_images, mixed_labels


In [24]:
R_form_LH = 0.7
R_form_RH = 0.3
S_form_LH = 0.4
S_form_RH = 0.6

x_test_mixed, y_test_mixed = create_device_mixed_data(x_test_original, y_test_original, 1.0, 1.0, label_type='label1')
x_test_R_form, y_test_R_form = create_device_mixed_data(x_test_original, y_test_original, R_form_LH, R_form_RH, label_type='label1')
x_test_S_form, y_test_S_form = create_device_mixed_data(x_test_original, y_test_original, S_form_LH, S_form_RH, label_type='label2')


In [25]:
idx = np.random.randint(0, x_test_original.shape[0])

fig, axs = plt.subplots(1, 4, figsize=(12, 4))
plt.suptitle(f'Index {idx}: Original/Mixed/R-form/S-form')

axs[0].imshow(x_test_original[idx][0], cmap='gray')
axs[0].set_title(f'Original\nlabel={y_test_original[idx]}')
axs[0].axis('off')

axs[1].imshow(x_test_mixed[idx][0], cmap='gray')
axs[1].set_title(f'Mixed\nlabel={y_test_mixed[idx]}')
axs[1].axis('off')

axs[2].imshow(x_test_R_form[idx][0], cmap='gray')
axs[2].set_title(f'R-form\nlabel={y_test_R_form[idx]}')
axs[2].axis('off')

axs[3].imshow(x_test_S_form[idx][0], cmap='gray')
axs[3].set_title(f'S-form\nlabel={y_test_S_form[idx]}')
axs[3].axis('off')

plt.tight_layout()
plt.show()


In [26]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 64, 3, padding=1), nn.ReLU(),
            nn.Conv2d(64, 64, 3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2),
            nn.BatchNorm2d(64),

            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(),
            nn.Conv2d(128, 128, 3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2),
            nn.BatchNorm2d(128),

            nn.Conv2d(128, 256, 3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2),
            nn.BatchNorm2d(256),
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256*3*3, 512),
            nn.ReLU(),
            nn.BatchNorm1d(512),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.conv(x)
        x = self.fc(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleCNN().to(device)


In [27]:
optimizer = optim.Adam(model.parameters(), lr=2e-6)
criterion = nn.CrossEntropyLoss()

In [28]:
def get_mixed_loader(x, y, batch_size=64):
    x_tensor = torch.FloatTensor(x)
    y_tensor = torch.LongTensor(y)
    dataset = TensorDataset(x_tensor, y_tensor)
    return DataLoader(dataset, batch_size=batch_size, shuffle=False,num_workers=0)

In [29]:
os.makedirs('results', exist_ok=True)
epochs = 20
accuracy_data = []

for epoch in range(epochs):
    model.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        output = model(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()

    # 评估
    model.eval()
    with torch.no_grad():
        def eval_on(loader, true_labels=None):
            preds = []
            trues = []
            for images, labels in loader:
                images = images.to(device)
                out = model(images)
                pred = torch.argmax(out, 1).cpu().numpy()
                preds.append(pred)
                if true_labels is not None:
                    trues.append(labels.numpy())
            return np.concatenate(preds), np.concatenate(trues) if true_labels is not None else None

        # 原始
        preds_ori, _ = eval_on(test_loader, True)
        acc_ori = np.mean(preds_ori == y_test_original)
        # 普通混合
        loader_mixed = get_mixed_loader(x_test_mixed, y_test_mixed)
        preds_mixed, _ = eval_on(loader_mixed, True)
        acc_mixed = np.mean(preds_mixed == y_test_mixed)
        # R-form
        loader_R = get_mixed_loader(x_test_R_form, y_test_R_form)
        preds_R, _ = eval_on(loader_R, True)
        acc_R = np.mean(preds_R == y_test_R_form)
        # S-form
        loader_S = get_mixed_loader(x_test_S_form, y_test_S_form)
        preds_S, _ = eval_on(loader_S, True)
        acc_S = np.mean(preds_S == y_test_S_form)

    accuracy_data.append([epoch+1, acc_ori, acc_mixed, acc_R, acc_S])
    print(f"Epoch {epoch+1}: 原始 {acc_ori:.4f}, 普通混合 {acc_mixed:.4f}, R-form {acc_R:.4f}, S-form {acc_S:.4f}")

accuracy_data = np.array(accuracy_data)
np.savetxt('results/MNIST_accuracy_data_pytorch.csv', accuracy_data, fmt='%1.4f', delimiter=',')


In [30]:
def save_confusion_matrix_pytorch(y_true, y_pred, filename_image, filename_data):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 10))
    sns.heatmap(cm, square=True, annot=False, fmt='d', cbar=False, cmap=plt.cm.Blues)
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.savefig(filename_image)
    plt.close()
    np.savetxt(filename_data, cm, fmt='%d', delimiter=',')

# 最后一轮的预测结果
save_confusion_matrix_pytorch(y_test_original, preds_ori, 'results/MNIST_confusion_matrix_original_pytorch.png',
                              'results/MNIST_confusion_matrix_original_pytorch.csv')
save_confusion_matrix_pytorch(y_test_mixed, preds_mixed, 'results/MNIST_confusion_matrix_mixed_pytorch.png',
                              'results/MNIST_confusion_matrix_mixed_pytorch.csv')
save_confusion_matrix_pytorch(y_test_R_form, preds_R, 'results/MNIST_confusion_matrix_R_form_pytorch.png',
                              'results/MNIST_confusion_matrix_R_form_pytorch.csv')
save_confusion_matrix_pytorch(y_test_S_form, preds_S, 'results/MNIST_confusion_matrix_S_form_pytorch.png',
                              'results/MNIST_confusion_matrix_S_form_pytorch.csv')
