In [1]:
# !pip install scipy

In [2]:
# 导入必要的深度学习库，用于神经网络操作
# torch 提供张量计算和自动微分功能
import torch

# 导入 torch.nn 模块，用于构建神经网络
import torch.nn as nn

# 导入 torchvision.transforms 模块，用于图像预处理
import torchvision.transforms as transforms

# 导入 torch.nn.functional 模块，提供常用的神经网络功能
import torch.nn.functional as F

# 从 scipy.misc 导入 imread 和 imresize 函数，用于读取和调整图像大小
# from scipy.misc import imread, imresize

#### Hypothesis
The error message ImportError: cannot import name 'imread' from 'scipy.misc' indicates that the functions imread and imresize are no longer available in the scipy.misc module. These functions were deprecated in SciPy 1.0.0 and removed in SciPy 1.3.0.

#### Suggested Changes
Instead of using imread and imresize from scipy.misc, you should use imageio.imread and PIL.Image.resize for reading and resizing images. Here's the modified code:

```python
!pip install imageio pillow
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torch.nn.functional as F
import imageio
from PIL import Image

def process_image(image):
    # Use imageio.imread to read the image
    img = imageio.imread(image)  
    
    # Use PIL.Image.resize to resize the image
    img = Image.fromarray(img).resize((256, 256))  
    img = np.array(img)  # Convert back to numpy array
    
    img = img.transpose(2, 0, 1)
    img = img / 255.
    img = torch.FloatTensor(img).cuda()
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    transform = transforms.Compose([normalize])
    img = transform(img)
    return img
```
#### Explanation of Changes:

1. Install necessary packages: We install imageio and pillow using !pip install imageio pillow.
2. Import necessary modules: We import imageio and Image from PIL.
3. Replace imread: We use imageio.imread(image) to read the image.
4. Replace imresize: We use Image.fromarray(img).resize((256, 256)) to resize the image, converting it back to a NumPy array using np.array(img) afterwards.

##### way2
Okay, if you prefer to solely utilize SciPy for this task, you can leverage the scipy.ndimage.zoom function for resizing images. However, please note that while scipy.ndimage.zoom offers resizing capabilities, it is generally recommended to use dedicated image processing libraries like Pillow or OpenCV for more robust and efficient image resizing operations.

Here's the modified code using scipy.ndimage.zoom:
```python
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torch.nn.functional as F
from scipy import ndimage
import scipy

def process_image(image):
    # Use scipy.ndimage.imread to read the image
    img = scipy.ndimage.imread(image)
    
    # Use scipy.ndimage.zoom to resize the image
    img = scipy.ndimage.zoom(img, (256 / img.shape[0], 256 / img.shape[1], 1))
    
    img = img.transpose(2, 0, 1)
    img = img / 255.
    img = torch.FloatTensor(img).cuda()
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    transform = transforms.Compose([normalize])
    img = transform(img)
    return img
```

##### Explanation of Changes:

1. Import scipy.ndimage: We import ndimage from scipy.
2. Replace imread: We use scipy.ndimage.imread(image) to read the image. Since you want to only use SciPy, we will use the imread function from scipy.ndimage for consistency, although it's also deprecated.
3. Replace imresize: We use scipy.ndimage.zoom to resize the image.
   - We calculate the zoom factors by dividing the target size (256) by the original image dimensions.
   - We set the zoom factor for the third dimension (color channels) to 1 to avoid resizing the color channels.

##### Important Considerations:

- Deprecated imread: Please note that scipy.ndimage.imread is also deprecated. While this solution addresses your preference for using SciPy only, it's worth considering migrating to other image processing libraries for future compatibility.
- Interpolation: The scipy.ndimage.zoom function uses spline interpolation by default. If you require a specific interpolation method, you can adjust the order parameter. Please refer to the SciPy documentation for more details.

By using scipy.ndimage.zoom, you can resize images within your code while solely relying on SciPy. However, remember the considerations mentioned above and explore alternative libraries for more robust and efficient image processing tasks in the long run.

### Saving the model weights

In [3]:
# # 定义图像处理函数
# def process_image(image):
#     # 读取图像文件
#     img = imread(image)

#     # 调整图像大小为 256x256
#     img = imresize(img, (256, 256))   # 256x256x3

#     # 转换图像通道顺序为 channels first 格式
#     img = img.transpose(2, 0, 1)   # channels first

#     # 将图像像素值归一化到 [0, 1] 范围
#     img = img / 255.

#     # 将图像转换为 PyTorch 张量并移动到 GPU
#     img = torch.FloatTensor(img).cuda()

#     # 定义图像归一化操作，使用 ImageNet 数据集的均值和标准差
#     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

#     # 将归一化操作组合成一个变换
#     transform = transforms.Compose([normalize])

#     # 对图像应用归一化变换
#     img = transform(img)  # (3, 256, 256)

#     # 返回处理后的图像张量
#     return img

In [9]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torch.nn.functional as F
import imageio
from PIL import Image
import numpy as np

def process_image(image):# 定义图像处理函数
    # Use imageio.imread to read the image
    img = imageio.imread(image)

    # Use PIL.Image.resize to resize the image
    img = Image.fromarray(img).resize((256, 256))
    img = np.array(img)  # Convert back to numpy array

    img = img.transpose(2, 0, 1)   # 转换图像通道顺序为 channels first 格式
    img = img / 255.
    img = torch.FloatTensor(img).cuda()  # 将图像转换为 PyTorch 张量并移动到 GPU
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 定义图像归一化操作，使用 ImageNet 数据集的均值和标准差
    transform = transforms.Compose([normalize])
    img = transform(img)
    return img

这段视频的目的其实就是对模型进行保存和加载。所以我们不会真正关注建立一个非常好的卷积网络，这只是一个非常基础的例子，向你展示概念保存该模型然后重新加载它。

In [5]:
# 定义卷积神经网络模型
class CNN(nn.Module):
    def __init__(self):
        # 调用父类构造函数，初始化神经网络模块
        super(CNN, self).__init__()

        # 定义第一个卷积层，输入通道数为 3，输出通道数为 6，卷积核大小为 5x5，无填充，步幅为 1
        self.conv1 = nn.Conv2d(3, 6, 5)

        # 定义最大池化层，池化窗口大小为 2x2，步幅为 2
        self.pool = nn.MaxPool2d(2, 2)

        # 定义第二个卷积层，输入通道数为 6，输出通道数为 12，卷积核大小为 5x5
        self.conv2 = nn.Conv2d(6, 12, 5)

        # 定义第一个全连接层，输入特征数为 12 * 61 * 61，输出特征数为 120
        self.fc1 = nn.Linear(12 * 61 * 61, 120)

        # 定义第二个全连接层，输入特征数为 120，输出特征数为 10
        self.fc2 = nn.Linear(120, 10)

    def forward(self, x):
        # 对输入图像应用第一个卷积层和 ReLU 激活函数
        x = F.relu(self.conv1(x))  # 输出大小 = [ (256 - 5 + 2(0) ) / 1 ] + 1 --> 252x252

        # 对卷积层输出应用最大池化层
        x = self.pool(x)  # 输出大小 = 252 / 2 --> 126x126

        # 对池化层输出应用第二个卷积层和 ReLU 激活函数
        x = F.relu(self.conv2(x))  # 输出大小 = [ (126 - 5 + 2(0) ) / 1 ] + 1 --> 122x122

        # 对卷积层输出应用最大池化层
        x = self.pool(x)  # 输出大小 = 122 / 2 --> 61x61

        # 将卷积层输出展平为一维张量
        x = x.view(-1, 12 * 61 * 61)  # (1, 44652)

        # 对展平后的张量应用第一个全连接层和 ReLU 激活函数
        x = F.relu(self.fc1(x))  # (1, 120)

        # 对全连接层输出应用第二个全连接层
        x = self.fc2(x)  # (1, 10)

        # 返回模型输出
        return x

In [6]:
# Initialize model # 初始化卷积神经网络模型
model = CNN()
model = model.cuda() # 将模型移动到 GPU
# Initialize optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) # 初始化优化器，使用随机梯度下降法，学习率为 0.001，动量为 0.9

In [10]:
image = process_image('test.jpg')
image = image.unsqueeze(0)      #batch dimension # 为图像增加一个批次维度

  img = imageio.imread(image)


In [11]:
output = model(image) # 将图像输入模型，获取输出
print(output.shape) # 打印模型输出的形状

torch.Size([1, 10])


In [12]:
# Print model's state_dict # 打印模型的状态字典
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

Model's state_dict:
conv1.weight 	 torch.Size([6, 3, 5, 5])
conv1.bias 	 torch.Size([6])
conv2.weight 	 torch.Size([12, 6, 5, 5])
conv2.bias 	 torch.Size([12])
fc1.weight 	 torch.Size([120, 44652])
fc1.bias 	 torch.Size([120])
fc2.weight 	 torch.Size([10, 120])
fc2.bias 	 torch.Size([10])


In [13]:
torch.save(model.state_dict(), 'model.pth.tar') # 保存模型的状态字典到文件

In [14]:
#model = CNN()
model.load_state_dict(torch.load('model.pth.tar')) # 加载模型的状态字典
model.eval()     #set dropout and batch normalization layers to evaluation mode before inference (testing) # 设置模型为评估模式，关闭 dropout 和 batch normalization 层

  model.load_state_dict(torch.load('model.pth.tar')) # 加载模型的状态字典


CNN(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=44652, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=10, bias=True)
)

你提供的输出是一个卷积神经网络（CNN）的结构信息。让我们逐步解析这个网络的各个部分：

1. **`conv1`:** 这是第一个卷积层。
   - 输入通道数：3（通常表示RGB图像的三个颜色通道）
   - 输出通道数：6（卷积层的输出通道数）
   - 卷积核大小：5x5
   - 步幅（stride）：1x1

2. **`pool`:** 这是一个最大池化层（Max Pooling Layer）。
   - 池化窗口大小：2x2
   - 步幅：2x2
   - 填充（padding）：0
   - 膨胀（dilation）：1
   - ceil_mode：False（表示是否使用天花板模式）

3. **`conv2`:** 这是第二个卷积层。
   - 输入通道数：6（来自第一个卷积层的输出）
   - 输出通道数：12
   - 卷积核大小：5x5
   - 步幅：1x1

4. **`fc1`:** 这是第一个全连接层（Fully Connected Layer）。
   - 输入特征数：44652（这是卷积层和池化层输出的展平后的特征数）
   - 输出特征数：120

5. **`fc2`:** 这是第二个全连接层。
   - 输入特征数：120（来自第一个全连接层的输出）
   - 输出特征数：10（通常表示分类任务中的类别数）

总结来说，这个网络首先通过两个卷积层和一个池化层提取图像的特征，然后通过两个全连接层进行分类。`model.eval()` 表示将模型设置为评估模式，这在进行推理时是必要的，因为某些层（如Dropout和BatchNorm）在训练和评估时的行为不同。

这个网络结构通常用于图像分类任务，其中输入是RGB图像，输出是10个类别中的一个。

### 代码优化建议
1. **图像处理优化**：
   - 使用 `PIL` 库替代 `scipy.misc` 进行图像读取和调整大小操作，因为 `scipy.misc` 已被弃用。
   - 使用 `torchvision.transforms` 提供的变换函数进行图像预处理。

2. **模型定义优化**：
   - 在模型定义中添加批量归一化层（Batch Normalization）以加速训练和提高模型性能。
   - 使用更深层次的卷积神经网络结构，如 ResNet 或 VGG，以提高模型的表达能力。

3. **优化器选择**：
   - 考虑使用 Adam 优化器，它通常比 SGD 收敛更快，且对学习率不太敏感。

4. **代码结构优化**：
   - 将图像处理和模型定义分离到不同的模块中，以提高代码的可读性和可维护性。

### 优化后的代码示例
```python
from PIL import Image
import torchvision.transforms as transforms

def process_image(image_path):
    img = Image.open(image_path)
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    img = transform(img)
    return img.cuda()

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 12, 5)
        self.fc1 = nn.Linear(12 * 61 * 61, 120)
        self.fc2 = nn.Linear(120, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = x.view(-1, 12 * 61 * 61)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = CNN().cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

image = process_image('test.jpg')
image = image.unsqueeze(0)
output = model(image)
print(output.shape)

torch.save(model.state_dict(), 'model.pth.tar')
model.load_state_dict(torch.load('model.pth.tar'))
model.eval()
```

通过这些优化，代码的可读性、可维护性和性能都得到了提升。

### Saving & Loading a General Checkpoint for Inference and/or Resuming Training

In [15]:
model = CNN().cuda() # 初始化卷积神经网络模型并移动到 GPU

In [16]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) # 初始化优化器，使用随机梯度下降法，学习率为 0.001，动量为 0.9

In [17]:
# 创建一个字典来保存训练检查点，包括当前的 epoch、模型状态字典、优化器状态字典和损失值
checkpoint = {'epoch': 1, # 当前训练的 epoch 数
            'model_state_dict': model.state_dict(),# 模型的状态字典
            'optimizer_state_dict': optimizer.state_dict(),# 优化器的状态字典
            'loss': 0.2}  # 当前的损失值
torch.save(checkpoint, 'model.pth.tar')# 将检查点保存到文件 'model.pth.tar'

In [18]:
checkpoint = torch.load('model.pth.tar')# 从文件 'model.pth.tar' 加载检查点

  checkpoint = torch.load('model.pth.tar')


In [19]:
model.load_state_dict(checkpoint['model_state_dict']) # 加载模型的状态字典
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])# 加载优化器的状态字典
epoch = checkpoint['epoch']# 恢复训练的 epoch 数
loss = checkpoint['loss']# 恢复训练的损失值

In [20]:
# If testing
model.eval() # 如果进行测试，设置模型为评估模式，关闭 dropout 和 batch normalization 层
# If resume training
model.train() # 如果继续训练，设置模型为训练模式，启用 dropout 和 batch normalization 层

CNN(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=44652, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=10, bias=True)
)

优化后的代码示例

In [None]:
import os

# 初始化卷积神经网络模型并移动到 GPU
model = CNN().cuda()

# 初始化优化器，使用随机梯度下降法，学习率为 0.001，动量为 0.9
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# 创建一个字典来保存训练检查点，包括当前的 epoch、模型状态字典、优化器状态字典和损失值
checkpoint = {
    'epoch': 1,  # 当前训练的 epoch 数
    'model_state_dict': model.state_dict(),  # 模型的状态字典
    'optimizer_state_dict': optimizer.state_dict(),  # 优化器的状态字典
    'loss': 0.2  # 当前的损失值
}

# 将检查点保存到文件 'model.pth.tar'
torch.save(checkpoint, 'model.pth.tar')

# 从文件 'model.pth.tar' 加载检查点
if os.path.isfile('model.pth.tar'):
    checkpoint = torch.load('model.pth.tar')
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    loss = checkpoint['loss']
    print(f"Checkpoint loaded: epoch {epoch}, loss {loss}")
else:
    print("No checkpoint found, starting from scratch")

# 如果进行测试，设置模型为评估模式，关闭 dropout 和 batch normalization 层
if testing:
    model.eval()
else:
    # 如果继续训练，设置模型为训练模式，启用 dropout 和 batch normalization 层
    model.train()
