In [32]:
"""import necessary modules"""
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math

## Basic Informations
  - DataLoader can do the batch computation
  - epoch: one forward and backward pass of all training samples
  - batch_size: number of training samples in one forward/backward pass
  - number of iterations: number of passes, each pass (forwar+backward) using [batch_size] number of sampes
  - For example: 100 samples, batch_size=20, hence, 100/20=5 iterations for 1 epoch

In [33]:
""""define a class to initialize or download a dataset"""
class WineDataset(Dataset):

    def __init__(self):
        xy = np.loadtxt('wine.csv', delimiter=',', dtype=np.float32, skiprows=1) # read data with numpy
        self.n_samples = xy.shape[0] # get the shape of the data

        self.x_data = torch.from_numpy(xy[:, 1:]) # size [n_samples, n_features], all features
        self.y_data = torch.from_numpy(xy[:, [0]]) # size [n_samples, 1], class label

    def __getitem__(self, index): # dataset[i] can be used to get i-th sample
        return self.x_data[index], self.y_data[index]

    def __len__(self): # return the size of the dataset
        return self.n_samples

In [34]:
"""create dataset"""
dataset = WineDataset()

In [35]:
"""get first sample and unpack"""
first_data = dataset[0] # first training sample
features, labels = first_data
print("First features are:\n{}\nFirst label is:{}".format(features, labels))

First features are:
tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03])
First label is:tensor([1.])


### Tasks
  - Load whole dataset with DataLoader
    - shuffle: shuffle data, good for training
    - num_workers: faster loading with multiple subprocesses
**Note:** if get error while loading, set num_workers = 0

In [36]:
"""load the data using DataLoader"""
train_loader = DataLoader(dataset=dataset,batch_size=4,shuffle=True,num_workers=2) # batch size set to 4

In [37]:
"""convert to an iterator and look at one random sample: to check if bath size or other informations are correct"""
dataiter = iter(train_loader)
data = next(dataiter) # data = dataiter.next()
features, labels = data
print("First features are:\n{}\nFirst label is:\n{}".format(features, labels))

First features are:
tensor([[1.3940e+01, 1.7300e+00, 2.2700e+00, 1.7400e+01, 1.0800e+02, 2.8800e+00,
         3.5400e+00, 3.2000e-01, 2.0800e+00, 8.9000e+00, 1.1200e+00, 3.1000e+00,
         1.2600e+03],
        [1.1610e+01, 1.3500e+00, 2.7000e+00, 2.0000e+01, 9.4000e+01, 2.7400e+00,
         2.9200e+00, 2.9000e-01, 2.4900e+00, 2.6500e+00, 9.6000e-01, 3.2600e+00,
         6.8000e+02],
        [1.2370e+01, 1.1300e+00, 2.1600e+00, 1.9000e+01, 8.7000e+01, 3.5000e+00,
         3.1000e+00, 1.9000e-01, 1.8700e+00, 4.4500e+00, 1.2200e+00, 2.8700e+00,
         4.2000e+02],
        [1.2040e+01, 4.3000e+00, 2.3800e+00, 2.2000e+01, 8.0000e+01, 2.1000e+00,
         1.7500e+00, 4.2000e-01, 1.3500e+00, 2.6000e+00, 7.9000e-01, 2.5700e+00,
         5.8000e+02]])
First label is:
tensor([[1.],
        [2.],
        [2.],
        [2.]])


**Note:** have 4 different features and label batches for each epoch. Total 178 samples, batch_size = 4. Hence, 178/4=44.5, means 45 iterations in each epoch.

In [38]:
"""Dummy Training loop"""
num_epochs = 2
total_samples = len(dataset)
n_iterations = math.ceil(total_samples/4)
print("Toral samples are: {}\nTotal iterations in each epoch: {}\n".format(total_samples, n_iterations))

for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(train_loader):
        # Run your training process
        if (i+1) % 5 == 0:
            print(f'Epoch: {epoch+1}/{num_epochs}, Step {i+1}/{n_iterations}| Inputs {inputs.shape} | Labels {labels.shape}')

Toral samples are: 178
Total iterations in each epoch: 45

Epoch: 1/2, Step 5/45| Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 10/45| Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 15/45| Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 20/45| Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 25/45| Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 30/45| Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 35/45| Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 40/45| Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])
Epoch: 1/2, Step 45/45| Inputs torch.Size([2, 13]) | Labels torch.Size([2, 1])
Epoch: 2/2, Step 5/45| Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])
Epoch: 2/2, Step 10/45| Inputs torch.Size([4, 13]) | Labels torch.Size([4, 1])
Epoch: 2/2, Step 15/45| Inputs torch.Size([4, 13]) | Labels torch.Size([4,

## Some Famous Datasets: available in `torchvision.datasets`
  - for example, MNIST, Fashion-MNIST, CIFAR10, COCO

In [39]:
"""download the available data from torchvision and use DataLoader to prepare the data"""
train_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=True,
                                           transform=torchvision.transforms.ToTensor(),
                                           download=True)

train_loader = DataLoader(dataset=train_dataset, batch_size=3, shuffle=True)

"""one random sample: to informations"""
dataiter = iter(train_loader)
data = next(dataiter)
inputs, targets = data
print(inputs.shape, targets.shape)

torch.Size([3, 1, 28, 28]) torch.Size([3])


## Try Some New Data