In [2]:
import numpy as np
import torch
import torch.utils.data as Data
from sklearn.datasets import load_boston, load_iris

##读取波士顿回归数据
boston_X, boston_y = load_boston(return_X_y=True)
print("boston_X.dtype:", boston_X.dtype)
print("boston_y.dtype:", boston_y.dtype)

boston_X.dtype: float64
boston_y.dtype: float64



    The Boston housing prices dataset has an ethical problem. You can refer to
    the documentation of this function for further details.

    The scikit-learn maintainers therefore strongly discourage the use of this
    dataset unless the purpose of the code is to study and educate about
    ethical issues in data science and machine learning.

    In this special case, you can fetch the dataset from the original
    source::

        import pandas as pd
        import numpy as np


        data_url = "http://lib.stat.cmu.edu/datasets/boston"
        raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
        target = raw_df.values[1::2, 2]

    Alternative datasets include the California housing dataset (i.e.
    :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing
    dataset. You can load the datasets as follows::

        from sklearn.datasets import fetch_california_h

In [3]:
##训练集x转化为张量，训练集y转化为张量
train_xt = torch.from_numpy(boston_X.astype(np.float32))
train_yt = torch.from_numpy(boston_y.astype(np.float32))
print("train_xt.dtype:", train_xt.dtype)
print("train_yt.dtype:", train_yt.dtype)

train_xt.dtype: torch.float32
train_yt.dtype: torch.float32


In [4]:
##将训练集转化为张量后，使用TensorDataset将x和y整理到一起
train_data = Data.TensorDataset(train_xt, train_yt)
##定义一个数据加载器，将训练数据进行批量处理
train_loader = Data.DataLoader(
    dataset=train_data,  ##使用数据集
    batch_size=64,  ##处理样本大小
    shuffle=True,  ##每次迭代前打乱数据
    num_workers=1,  ##使用两个进程
)
##检查训练数据集的一个batch的样本的维度是否正确
for step, (b_x, b_y) in enumerate(train_loader):
    if step > 0:
        break
## 输出训练图像的尺寸和标签的尺寸及数据类型
print("b_x.shape:", b_x.shape)
print("b_y.shape:", b_y.shape)
print("b_x.dtype:", b_x.dtype)
print("b_y.dtype:", b_y.dtype)

b_x.shape: torch.Size([64, 13])
b_y.shape: torch.Size([64])
b_x.dtype: torch.float32
b_y.dtype: torch.float32


In [5]:
##处理分类数据
iris_x, irisy = load_iris(return_X_y=True)
print("iris_x.dtype:", iris_x.dtype)
print("irisy.dtype:", irisy.dtype)

iris_x.dtype: float64
irisy.dtype: int32


In [6]:
##训练集x转化为张量，训练集y转化为张量
train_xt = torch.from_numpy(iris_x.astype(np.int64))
train_yt = torch.from_numpy(irisy.astype(np.int64))
print("train_xt.dtype:", train_xt.dtype)
print("train_yt.dtype:", train_yt.dtype)

train_xt.dtype: torch.int64
train_yt.dtype: torch.int64


In [7]:
##将训练集转化为张量后，使用data.tensorDataset将x和y整理到一起
train_data = Data.TensorDataset(train_xt, train_yt)
##定义一个数据加载器，将训练数据进行批量处理
train_loader = Data.DataLoader(
    dataset=train_data,  ##使用数据集
    batch_size=10,  ##处理样本大小,
    shuffle=True,  ##每次迭代前打乱数据
    num_workers=1,  ##使用两个进程
)
##检查训练数据集的一个batch的样本的维度是否正确
for step, (b_x, b_y) in enumerate(train_loader):
    if step > 0:
        break
## 输出训练图像的尺寸和标签的尺寸及数据类型
print("b_x.shape:", b_x.shape)
print("b_y.shape:", b_y.shape)
print("b_x.dtype:", b_x.dtype)
print("b_y.dtype:", b_y.dtype)

b_x.shape: torch.Size([10, 4])
b_y.shape: torch.Size([10])
b_x.dtype: torch.int64
b_y.dtype: torch.int64


In [8]:
import torch
import torch.utils.data as Data
from torchvision.datasets import FashionMNIST
import torchvision.transforms as transfroms
from torchvision.datasets import ImageFolder

In [10]:
##使用FashionMNIST数据。准备训练数据集
train_data = FashionMNIST(
    root="./data/FashionMNIST",  ##数据的路径
    train=True,  ##只使用训练数据集
    transform=transfroms.ToTensor(),
    download=True  ##是否下载数据
)
##定义一个数据加载器
train_loader = Data.DataLoader(
    dataset=train_data,  ##使用的数据集
    batch_size=64,  ##批处理样本大小
    shuffle=True,  ##每次迭代前打乱数据
    num_workers=2,  ##使用两个进程
)
##就算train_loader有多少个batch
print("train_data的batch数量为：", len(train_loader))

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST\FashionMNIST\raw\train-images-idx3-ubyte.gz


  0%|          | 0/26421880 [00:00<?, ?it/s]

Extracting ./data/FashionMNIST\FashionMNIST\raw\train-images-idx3-ubyte.gz to ./data/FashionMNIST\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST\FashionMNIST\raw\train-labels-idx1-ubyte.gz


  0%|          | 0/29515 [00:00<?, ?it/s]

Extracting ./data/FashionMNIST\FashionMNIST\raw\train-labels-idx1-ubyte.gz to ./data/FashionMNIST\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST\FashionMNIST\raw\t10k-images-idx3-ubyte.gz


  0%|          | 0/4422102 [00:00<?, ?it/s]

Extracting ./data/FashionMNIST\FashionMNIST\raw\t10k-images-idx3-ubyte.gz to ./data/FashionMNIST\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz


  0%|          | 0/5148 [00:00<?, ?it/s]

Extracting ./data/FashionMNIST\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST\FashionMNIST\raw

train_data的batch数量为： 938


In [11]:
##对测试集进行处理
test_data = FashionMNIST(
    root="./data/FashionMNIST",  ##数据的路径
    train=False,  ##不使用训练数据集
    download=False
)
##为数据添加一个通道维度，并且取值范围缩放到0~1间
test_data_x = test_data.data.type(torch.FloatTensor) / 255.0
test_data_x = torch.unsqueeze(test_data_x, dim=1)
test_data_y = test_data.targets  ##测试集的标签
print("test_data_x.shape:", test_data_x.shape)
print("test_data_y.shape:", test_data_y.shape)

test_data_x.shape: torch.Size([10000, 1, 28, 28])
test_data_y.shape: torch.Size([10000])


In [14]:
##对训练集的预处理
train_data_transforms = transfroms.Compose([
    transfroms.RandomResizedCrop(224),  ##随机成宽比裁剪为224
    transfroms.RandomHorizontalFlip(),  ##依概率p=0.5水平反转
    transfroms.ToTensor(),  ##转化为张量并归一化至[0-1]
    ##图像标准化处理
    transfroms.Normalize([
        0.485, 0.456, 0.406
    ], [
        0.229,0.224,0.225
    ])
])

In [23]:
##读取图像
train_data_dir="D:/Rookie/pytorch/data/"
test_data=ImageFolder(train_data_dir,transform=train_data_transforms)
train_data_loader=Data.DataLoader(train_data,batch_size=4,shuffle=True,num_workers=1)
print("数据的lable：",train_data.targets)
##获得一个batch数据
for step,(b_x,b_y) in enumerate(train_data_loader):
    if step>0:
        break
##输出训练图像的尺寸的标签和chicun
print(b_x.shape)
print(b_y.shape)
print("图像取值范围为：",b_x.min(),"~",b_x.max())

FileNotFoundError: Couldn't find any class folder in D:/Rookie/pytorch/data/.