In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os
from skimage.transform import resize
import matplotlib.image as mpig

from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision.transforms as transforms
%matplotlib inline

### 数据加载

In [2]:
# 数据加载类
class Mydata():
    """
    数据加载和预处理
    1. 读取图像文件
    2. 调整图像大小为28x28
    3. 处理标签
    4. 生成one-hot编码
    5. 划分训练集和测试集
    """
    def __init__(self, files):
        ims = []  # 存放以numpy格式的图片数据
        labels = []  
        for i in os.listdir(files):
            if i.endswith('.jpg'):
                im = mpig.imread(os.path.join(files, i), 0)
                im = resize(im, (28, 28))                
                l = i.replace('.jpg', '')
                l = l.split('_')[-1]  # label最后一位9表示数字8
                l = int(l) - 1
                labels.append(l)
                ims.append(im)
        self.data = np.array(ims)[:, :, :, np.newaxis]
        self.targets = np.array(labels)
        self.targets_hot = self.onehot(self.targets, 15)
    
    # One-hot编码函数
    def onehot(self, targets, num):
        return np.eye(num)[targets]
    
    def train_test_split(self, test_size=0.3, random_state=None):
        """
        划分训练集和测试集
        :param test_size: 测试集比例
        :param random_state: 随机种子
        :return: 训练集和测试集的数据及标签
        """
        X_train, X_test, y_train, y_test, y_train_hot, y_test_hot = train_test_split(
            self.data, self.targets, self.targets_hot, test_size=test_size, random_state=random_state
        )
        return X_train, y_train, y_train_hot, X_test, y_test, y_test_hot

In [3]:
# 读取数据
data = Mydata(r"C:\Users\TheAssshOne\Documents\文件\人工神经网络与深度学习\AI\data")
X_train, y_train, y_train_hot, X_test, y_test, y_test_hot = data.train_test_split(test_size=0.3, random_state=42)
'''
# 数据可视化
plt.figure(figsize=(10, 10))
for i in range(25):
    plt.subplot(5, 5, i+1)
    plt.imshow(X_train[i].reshape(28, 28), cmap='gray')
    plt.title(str(y_train[i]))
    plt.axis('off')
plt.show()

'''

"\n# 数据可视化\nplt.figure(figsize=(10, 10))\nfor i in range(25):\n    plt.subplot(5, 5, i+1)\n    plt.imshow(X_train[i].reshape(28, 28), cmap='gray')\n    plt.title(str(y_train[i]))\n    plt.axis('off')\nplt.show()\n\n"

DataLoaders方法

In [5]:
# 训练集图像数据
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)  
X_train_tensor = X_train_tensor.permute(0, 3, 1, 2)  # 将 (10500, 28, 28, 1) 转换为 (10500, 1, 28, 28)

y_train_tensor = torch.tensor(y_train, dtype=torch.long)     # 标签
trainset = TensorDataset(X_train_tensor, y_train_tensor)

# 测试集图像数据
X_test_tensor = torch.tensor(X_test, dtype=torch.float32) 
X_test_tensor = X_test_tensor.permute(0, 3, 1, 2)  # 将 (4500, 28, 28, 1) 转换为 (4500, 1, 28, 28)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)     # 标签
testset = TensorDataset(X_test_tensor, y_test_tensor)

# Transform
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)),
])

# dataloaders
batch_size = 4

trainloader = DataLoader(
    trainset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=2,
)

testloader = DataLoader(
    testset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=2, 
)

### 构建CNN

In [6]:
# 分类结果
classes = [
    "零", "一", "二", "三", "四",
    "五", "六", "七", "八", "九",
    "十", "百", "千", "万", "亿"
]

# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cpu device


In [7]:
# 输入图像为1通道， 28x28尺寸
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__() 
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.max_pool2d = nn.MaxPool2d(2, 2)
        self.flatten = nn.Flatten()

        self.linear_relu_stack = nn.Sequential(
            nn.Linear(16 * 4 * 4, 120),
            nn.ReLU(),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, 15)
        )

    def forward(self, input):
        # Convolution layer C1: 1 input image channel, 6 output channels,
        # 5x5 square convolution, it uses RELU activation function, and
        # outputs a Tensor with size (N, 6, 24, 24), where N is the size of the batch
        c1 = self.max_pool2d(F.relu(self.conv1(input))) # Pool to (N, 6, 12, 12) Tensor output

        # Convolution layer C2: 6 input channels, 16 output channels,
        # 5x5 square convolution, it uses RELU activation function, and
        # outputs a (N, 16, 8, 8) Tensor
        c2 = self.max_pool2d(F.relu(self.conv2(c1))) # (N, 16, 4, 4) Tensor output

        # Flatten operation: purely functional, outputs a (N, 256 = 16 * 4 * 4) Tensor
        s3 = self.flatten(c2)

        # Fully connected layer F5: (N, 256) Tensor input,
        output = self.linear_relu_stack(s3) # (N, 15) Tensor output

        return output

net = Net()
net.eval()

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (max_pool2d): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=256, out_features=120, bias=True)
    (1): ReLU()
    (2): Linear(in_features=120, out_features=84, bias=True)
    (3): ReLU()
    (4): Linear(in_features=84, out_features=15, bias=True)
  )
)

optimizer and criterion

In [8]:
criterion = nn.CrossEntropyLoss() # 交叉熵损失函数
optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9) # 随机梯度下降

TensorBorad Setup

In [9]:
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter('runs/fashion_mnist_experiment_1')

记录模型结构

In [10]:
dataiter = iter(trainloader)
images, labels = next(dataiter)
writer.add_graph(net, images)
writer.close()

### 训练

In [11]:
# helper function to show an image
def matplotlib_imshow(img, one_channel=False):
    if one_channel:
        img = img.mean(dim=0)
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    if one_channel:
        plt.imshow(npimg, cmap="Greys")
    else:
        plt.imshow(np.transpose(npimg, (1, 2, 0)))


def images_to_probs(net, images):
    '''
    生成预测结果和相应概率
    '''
    output = net(images)
    # convert output probabilities to predicted class
    _, preds_tensor = torch.max(output, 1)
    preds = np.squeeze(preds_tensor.numpy())
    return preds, [F.softmax(el, dim=0)[i].item() for i, el in zip(preds, output)]


def plot_classes_preds(net, images, labels):
    '''
    显示网络的最高预测值及其概率，以及实际标签。
    根据预测的正确与否为这些信息着色。
    '''
    preds, probs = images_to_probs(net, images)
    fig = plt.figure(figsize=(12, 48))
    for idx in np.arange(4):
        ax = fig.add_subplot(1, 4, idx+1, xticks=[], yticks=[])
        matplotlib_imshow(images[idx], one_channel=True)
        ax.set_title("{0}, {1:.1f}%\n(label: {2})".format(
            classes[preds[idx]],
            probs[idx] * 100.0,
            classes[labels[idx]]),
                    color=("green" if preds[idx]==labels[idx].item() else "red"))
    return fig

In [12]:
epochs = 7 # 训练次数

running_loss = 0.0
for epoch in range(epochs):
    for batch, (inputs, labels) in enumerate(trainloader):
        inputs, labels = inputs.to(device), labels.to(device)
        
        # zero the parameter gradients
        optimizer.zero_grad() 

        # forward
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        
        # backpropagation
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()

        if batch % 1000 == 999: # every 1000 mini-batches

            # log the running loss
            writer.add_scalar('training loss',
                            running_loss / 1000,
                            epoch * len(trainloader) + batch)
            
            # log a Matplotlib Figure showing the model's predictions on a
            # random mini-batch
            writer.add_figure('predictions vs. actuals',
                            plot_classes_preds(net, inputs, labels),
                            global_step=epoch * len(trainloader) + batch)
            running_loss = 0.0
            
print('Finished Training')
writer.close()
            

  canvas.draw()
  canvas.draw()
  canvas.draw()
  canvas.draw()
  canvas.draw()
  canvas.draw()
  canvas.draw()
  canvas.draw()
  canvas.draw()
  canvas.draw()
  canvas.draw()
  canvas.draw()
  canvas.draw()
  canvas.draw()
  canvas.draw()


Finished Training


### 评估模型

In [13]:
class_probs = []
class_preds = []
with torch.no_grad():
    for (images, labels) in testloader:
        images = images.to(device)
        output = net(images)
        class_probs_batch = [F.softmax(el, dim=0) for el in output]

        class_probs.append(class_probs_batch)
        class_preds.append(labels)
test_probs = torch.cat([torch.stack(batch) for batch in class_probs])
test_preds = torch.cat(class_preds)

def add_pr_curve_tensorboard(class_index, test_probs, test_preds, global_step=0):
    '''
    绘制相应的精确度-召回曲线
    '''
    tensorboard_truth = test_preds == class_index
    tensorboard_probs = test_probs[:, class_index]

    writer.add_pr_curve(classes[class_index],
                        tensorboard_truth,
                        tensorboard_probs,
                        global_step=global_step)
    writer.close()

# 绘制所有类别的精确度-召回曲线
for i in range(len(classes)):
    add_pr_curve_tensorboard(i, test_probs, test_preds)


In [14]:
# 输出打印分类报告
from sklearn.metrics import classification_report
y_true = y_test

y_pred = []
with torch.no_grad():
    for (images, labels) in testloader:
        images = images.to(device)
        output = net(images)
        _, predicted = torch.max(output, 1)
        y_pred.extend(predicted.cpu().numpy())
print(classification_report(y_true, y_pred, target_names=classes))


              precision    recall  f1-score   support

           零       0.73      0.97      0.83       301
           一       0.88      0.96      0.92       282
           二       0.87      0.67      0.76       309
           三       0.79      0.68      0.73       314
           四       0.77      0.93      0.84       298
           五       0.80      0.71      0.76       298
           六       0.88      0.54      0.67       287
           七       0.83      0.63      0.72       304
           八       0.88      0.91      0.90       303
           九       0.64      0.70      0.67       270
           十       0.60      0.91      0.72       310
           百       0.61      0.66      0.63       304
           千       0.78      0.35      0.49       320
           万       0.63      0.73      0.67       303
           亿       0.75      0.89      0.81       297

    accuracy                           0.75      4500
   macro avg       0.76      0.75      0.74      4500
weighted avg       0.76   