In [12]:
import torch 
import torchvision
import torch.nn as nn
import numpy as np
import torchvision.transforms as transforms

# 数据加载

从官方数据集、网络、本地硬盘、内存加载数据的方法

## 从numpy转换为tensor

In [13]:
# Create a numpy array.
x = np.array([[1, 2], [3, 4]])

# Convert the numpy array to a torch tensor.
y = torch.from_numpy(x)

# Convert the torch tensor to a numpy array.
z = y.numpy()

## 从官方数据集加载数据

In [14]:
# Download and construct CIFAR-10 dataset.
root = '~/Code/dataset/CIFAR10' # 注意配置该路径
train_dataset = torchvision.datasets.CIFAR10(root=root,
                                             train=True, 
                                             transform=transforms.ToTensor(),
                                             download=True)

# Fetch one data pair (read data from disk).
# 数据集是以(sample, label)的形式组成一个列表存储的。
image, label = train_dataset[0]
print (image.size())
print (label)

Files already downloaded and verified
torch.Size([3, 32, 32])
6


In [15]:
# Data loader (this provides queues and threads in a very simple way).
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=64, 
                                           shuffle=True)

# When iteration starts, queue and thread start to load data from files.
data_iter = iter(train_loader)

# Mini-batch images and labels.
images, labels = data_iter.next()

# Actual usage of the data loader is as below.
for images, labels in train_loader:
    # Training code should be written here.
    pass


## 用自定义dataloader加载本地硬盘上的数据

In [17]:
import pandas as pd

# You should build your custom dataset as below.
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, csv_filej):
        # 1. Initialize file paths or a list of file names. 
        self.df=pd.read_csv(csv_file)
        
    def __getitem__(self, index):
        # 1. Read one data from file (e.g. using numpy.fromfile, PIL.Image.open).
        # 2. Preprocess the data (e.g. torchvision.Transform).
        # 3. Return a data pair (e.g. image and label).
        return self.df.iloc[index]

    def __len__(self):
        # You should change 0 to the total size of your dataset.
        return len(self.df)

# You can then use the prebuilt data loader. 
custom_dataset = CustomDataset()
train_loader = torch.utils.data.DataLoader(dataset=custom_dataset,
                                           batch_size=64, 
                                           shuffle=True)