In [1]:
import torch
import torchvision
from torch.utils.data import Dataset
import numpy as np

In [2]:
#MNIST is a dataset of handwritten digits from 0-9
#dataset = torchvision.datasets.MNIST(
#   root="/data", #location where the dataset will be stored
#   transform = torchvision.transforms.ToTensor() #converts images to tensors
#)

In [3]:
#Custom dataset:

class WineDataset(Dataset):

  def __init__(self, transform=None): #we keep transform parameter optional and none by default
    #data loading
    xy=np.loadtxt('/wine.csv', delimiter=',', dtype=np.float32, skiprows=1)
    #delimiter separates data based on ','
    #skiprows skips the first row because its a header

    #x & y are numpy arrays and not converted to tensors
    self.x = xy[:,1:] #includes all rows and columns starting from column 1
    self.y = xy[:,[0]] #size = n_samples, 1, includes all rows but only column 0
    self.n_samples = xy.shape[0] #no. of rows

    self.transform = transform


  def __getitem__(self, index):
    #this will allow for accessing, ex: dataset[0]
    sample = self.x[index], self.y[index]

    if self.transform:
      sample = self.transform(sample) #applying transform

    return sample


  def __len__(self):
    #len(dataset)
    return self.n_samples

In [4]:
#Custom tranform class

class ToTensor():

  def __call__(self, sample): #call allows an instance to be called like a function i.e. makes it like callable function
    inputs, targets = sample
    return torch.from_numpy(inputs), torch.from_numpy(targets) #returns tuple

In [5]:
#Custom tranform class

class MulTransform():

  def __init__(self, factor):
    self.factor = factor

  def __call__(self, sample):
    inputs, targets = sample
    inputs *= self.factor
    return inputs, targets

In [6]:
dataset = WineDataset(transform = ToTensor()) #passing ToTensor to WineDataset

first_data = dataset[0]
features, labels = first_data
print(features)
print(type(features), type(labels))

tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03])
<class 'torch.Tensor'> <class 'torch.Tensor'>


In [7]:
#for multiple transforms
composed = torchvision.transforms.Compose([ToTensor(), MulTransform(4)])

In [8]:
dataset = WineDataset(transform=composed)
first_data = dataset[0]
features, labels = first_data
print(features)
print(type(features), type(labels))

tensor([5.6920e+01, 6.8400e+00, 9.7200e+00, 6.2400e+01, 5.0800e+02, 1.1200e+01,
        1.2240e+01, 1.1200e+00, 9.1600e+00, 2.2560e+01, 4.1600e+00, 1.5680e+01,
        4.2600e+03])
<class 'torch.Tensor'> <class 'torch.Tensor'>
