In [None]:
!wget https://github.com/DeepStudio-TW/torch-dataloader-tutorial/raw/main/data.csv

In [82]:
import torch
import time
import pandas as pd
import torch.utils.data as tud
import numpy as np
import matplotlib.pyplot as plt

In [83]:
class NumDataset(tud.Dataset):
    '''Init: 使用變數宣告- data來源、總量、preprocess方法...等等'''
    def __init__(self,fname,number_length):
        super().__init__()
        self.df=pd.read_csv(fname).head(number_length)
        self.number_length=number_length
        
        self.data=self.df.data.values
        self.label=self.df.label.values
    '''一定要宣告長度，自訂義'''
    def __len__(self):
        return self.number_length
    '''定義回傳一筆資料的方式，input會是某個index, 輸出data, 以及label'''
    def __getitem__(self, idx):
        data=self.data[idx]
        label=self.label[idx]
        return data,label

多種sampler方法

1. SequentialSampler：按照順序的取資料
2. RandomSampler：按照指定的隨機方法進行取資料
3. WeightedRandomSampler：可以設定資料出現的比率進行取資料

RrandomSampler可以搭配Generator()固定隨機的參數，讓隨機的狀況每次都一樣。

sampler需要搭配dataloader進行使用

In [84]:
dataset = NumDataset("/code/data.csv",5)
seq_sampler = tud.SequentialSampler(dataset)

In [87]:
dataloader=tud.DataLoader(dataset,sampler=seq_sampler)
for d,l in dataloader:
    print("d type = ",type(d),"d = ",d," || l type = ",type(l),"l = ",l)

d type =  <class 'torch.Tensor'> d =  tensor([0.3315], dtype=torch.float64)  || l type =  <class 'torch.Tensor'> l =  tensor([0])
d type =  <class 'torch.Tensor'> d =  tensor([0.2033], dtype=torch.float64)  || l type =  <class 'torch.Tensor'> l =  tensor([1])
d type =  <class 'torch.Tensor'> d =  tensor([-1.5153], dtype=torch.float64)  || l type =  <class 'torch.Tensor'> l =  tensor([2])
d type =  <class 'torch.Tensor'> d =  tensor([-0.9327], dtype=torch.float64)  || l type =  <class 'torch.Tensor'> l =  tensor([3])
d type =  <class 'torch.Tensor'> d =  tensor([-0.7553], dtype=torch.float64)  || l type =  <class 'torch.Tensor'> l =  tensor([4])


In [114]:
rnd_gen = torch.Generator().manual_seed(45)
rad_sampler = tud.RandomSampler(dataset,replacement=True,num_samples=5, generator=rnd_gen)
dataloader_rad=tud.DataLoader(dataset,sampler=rad_sampler)
for d,l in dataloader_rad:
    print("d = ",d, " i = ",l)

d =  tensor([-1.5153], dtype=torch.float64)  i =  tensor([2])
d =  tensor([0.2033], dtype=torch.float64)  i =  tensor([1])
d =  tensor([0.2033], dtype=torch.float64)  i =  tensor([1])
d =  tensor([-0.9327], dtype=torch.float64)  i =  tensor([3])
d =  tensor([-0.7553], dtype=torch.float64)  i =  tensor([4])


In [123]:
wet_gen = tud.WeightedRandomSampler(weights=[1,2,1,5,2],num_samples=10)
dataloader_rad=tud.DataLoader(dataset,sampler=wet_gen)


In [124]:

for d,l in dataloader_rad:
    print("d type = ",type(d),"d = ",d," || l type = ",type(l),"l = ",l)

d type =  <class 'torch.Tensor'> d =  tensor([0.2033], dtype=torch.float64)  || l type =  <class 'torch.Tensor'> l =  tensor([1])
d type =  <class 'torch.Tensor'> d =  tensor([-0.9327], dtype=torch.float64)  || l type =  <class 'torch.Tensor'> l =  tensor([3])
d type =  <class 'torch.Tensor'> d =  tensor([-0.7553], dtype=torch.float64)  || l type =  <class 'torch.Tensor'> l =  tensor([4])
d type =  <class 'torch.Tensor'> d =  tensor([0.3315], dtype=torch.float64)  || l type =  <class 'torch.Tensor'> l =  tensor([0])
d type =  <class 'torch.Tensor'> d =  tensor([0.2033], dtype=torch.float64)  || l type =  <class 'torch.Tensor'> l =  tensor([1])
d type =  <class 'torch.Tensor'> d =  tensor([-0.9327], dtype=torch.float64)  || l type =  <class 'torch.Tensor'> l =  tensor([3])
d type =  <class 'torch.Tensor'> d =  tensor([0.2033], dtype=torch.float64)  || l type =  <class 'torch.Tensor'> l =  tensor([1])
d type =  <class 'torch.Tensor'> d =  tensor([-0.7553], dtype=torch.float64)  || l type