# 模型开发
- 数据集的定义和加载
- 数据预处理
- 模型组网
- 训练与预测验证
- 单机多卡训练
- 自定义指标
- 模型保存与载入
- 模型导出onnx协议
- [API文档](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/index_cn.html)

## 数据集的定义和加载
- 框架自带数据集
- 自定义数据集

In [4]:
import paddle
print(paddle.__version__)

2.2.0


In [5]:
# 框架自带数据集
print('视觉相关数据集：', paddle.vision.datasets.__all__)
print('自然语言相关数据集：', paddle.text.__all__)

视觉相关数据集： ['DatasetFolder', 'ImageFolder', 'MNIST', 'FashionMNIST', 'Flowers', 'Cifar10', 'Cifar100', 'VOC2012']
自然语言相关数据集： ['Conll05st', 'Imdb', 'Imikolov', 'Movielens', 'UCIHousing', 'WMT14', 'WMT16', 'ViterbiDecoder', 'viterbi_decode']


In [17]:
# 用ToTensor将数据格式转为Tensor
from paddle.vision.transforms import ToTensor

# 训练数据集 
train_dataset = paddle.vision.datasets.MNIST(mode='train', transform=ToTensor())
# 验证数据集
val_dataset = paddle.vision.datasets.MNIST(mode='test', transform=ToTensor())


In [13]:
# 自定义数据集
import paddle
from paddle.io import Dataset

BATCH_SIZE = 64
BATCH_NUM = 20

IMAGE_SIZE = (28, 28)
CLASS_NUM = 10

class MyDataset(Dataset):
    def __init__(self, num_samples):
        super(MyDataset, self).__init__()
        self.num_samples = num_samples
    
    def __getitem__(self, index):
        data = paddle.uniform(IMAGE_SIZE, dtype="float32")
        label = paddle.randint(0, CLASS_NUM-1, dtype="int")
        return data, label
    
    def __len__(self):
        return self.num_samples
    
# 测试定义的数据集
custom_dataset = MyDataset(BATCH_SIZE * BATCH_NUM)
print('=============custom dataset=============')
for data, label in custom_dataset:
    print(data.shape, label.shape)
    break

[28, 28] [1]


In [14]:
# 数据加载
train_loader = paddle.io.DataLoader(custom_dataset, batch_size=BATCH_SIZE, shuffle=True)
for batch_id, data in enumerate(train_loader()):
    x_data = data[0]
    y_data = data[1]
    
    print(x_data.shape)
    print(y_data.shape)
    
    break


[64, 28, 28]
[64, 1]


In [2]:
# 模型导出ONNX协议
# ONNX (Open Neural Network Exchange) 是针对机器学习所设计的开源文件格式，用于存储训练好的模型。
# 它使得不同的人工智能框架可以采用相同格式存储模型并交互。
# 通过ONNX格式，Paddle模型可以使用OpenVINO、ONNX Runtime等框架进行推理。

In [3]:
import paddle
from paddle import nn
from paddle.static import InputSpec

import paddle
from paddle import nn
from paddle.static import InputSpec

class LinearNet(nn.Layer):
    def __init__(self):
        super(LinearNet, self).__init__()
        self._linear = nn.Linear(784, 10)

    def forward(self, x):
        return self._linear(x)

# export to ONNX
layer = LinearNet()
save_path = 'onnx.save/linear_net'
x_spec = InputSpec([None, 784], 'float32', 'x')
paddle.onnx.export(layer, save_path, input_spec=[x_spec])


2022-03-07 16:39:26 [INFO]	ONNX model saved in onnx.save/linear_net.onnx


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  int(TensorProto.STRING): np.dtype(np.object)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  np.bool: core.VarDesc.VarType.BOOL,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  core.VarDesc.VarType.FP32: np.float,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  core.VarDesc.VarType.BOOL: np.bool
  return (isinstance(seq, collections.Sequence) and
  is_iterable = isinstance(value, collections.Iterable)
