- epoch = 1 forward & backward pass of all training examples
- batch size = no. of training samples in one forward & backward pass
- no. of iterations = no. of passes, where each pass uses [batch_size] number of samples
- Example: 100 samples, batch size = 20, 100/20 = 5 iterations for 1 epoch

In [1]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math

In [4]:
#Custom dataset:

class WineDataset(Dataset):

  def __init__(self):
    #data loading
    xy=np.loadtxt('/wine.csv', delimiter=',', dtype=np.float32, skiprows=1)
    #delimiter separates data based on ','
    #skiprows skips the first row because its a header

    self.x = torch.from_numpy(xy[:,1:]) #includes all rows and columns starting from column 1
    self.y = torch.from_numpy(xy[:,[0]]) #size = n_samples, 1, includes all rows but only column 0
    self.n_samples = xy.shape[0] #no. of rows


  def __getitem__(self, index):
    #this will allow for accessing, ex: dataset[0]
    return self.x[index], self.y[index] #returns a tuple

  def __len__(self):
    #len(dataset)
    return self.n_samples

In [5]:
dataset = WineDataset() #initializing class object
first_row = dataset[0]  #first row
features, labels = first_row
print(features, labels)

tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03]) tensor([1.])


In [6]:
dataloader = DataLoader(dataset=dataset, batch_size=4, shuffle=True, num_workers=2)

In [8]:
dataiter = iter(dataloader)
data = next(dataiter)
features, labels = data
print(features, labels) #batch_size = 4, i.e. 4 records

tensor([[1.2290e+01, 2.8300e+00, 2.2200e+00, 1.8000e+01, 8.8000e+01, 2.4500e+00,
         2.2500e+00, 2.5000e-01, 1.9900e+00, 2.1500e+00, 1.1500e+00, 3.3000e+00,
         2.9000e+02],
        [1.3510e+01, 1.8000e+00, 2.6500e+00, 1.9000e+01, 1.1000e+02, 2.3500e+00,
         2.5300e+00, 2.9000e-01, 1.5400e+00, 4.2000e+00, 1.1000e+00, 2.8700e+00,
         1.0950e+03],
        [1.4100e+01, 2.0200e+00, 2.4000e+00, 1.8800e+01, 1.0300e+02, 2.7500e+00,
         2.9200e+00, 3.2000e-01, 2.3800e+00, 6.2000e+00, 1.0700e+00, 2.7500e+00,
         1.0600e+03],
        [1.2600e+01, 1.3400e+00, 1.9000e+00, 1.8500e+01, 8.8000e+01, 1.4500e+00,
         1.3600e+00, 2.9000e-01, 1.3500e+00, 2.4500e+00, 1.0400e+00, 2.7700e+00,
         5.6200e+02]]) tensor([[2.],
        [1.],
        [1.],
        [2.]])


In [9]:
num_epochs = 2
total_samples = len(dataset)
n_iterations = math.ceil(total_samples/4) #178/4 = 44.5 = 45
print(total_samples, n_iterations)

178 45


In [14]:
#Training Loop:
for epoch in range(num_epochs):
  for i, (inputs, labels) in enumerate(dataloader):
    #forward & backward pass, update weights
    if (i+1) % 5 == 0:
      print(f'epoch: {epoch+1}/{num_epochs}, step: {i+1}/{n_iterations}, inputs: {inputs.shape}')

epoch: 1/2, step: 5/45, inputs: torch.Size([4, 13])
epoch: 1/2, step: 10/45, inputs: torch.Size([4, 13])
epoch: 1/2, step: 15/45, inputs: torch.Size([4, 13])
epoch: 1/2, step: 20/45, inputs: torch.Size([4, 13])
epoch: 1/2, step: 25/45, inputs: torch.Size([4, 13])
epoch: 1/2, step: 30/45, inputs: torch.Size([4, 13])
epoch: 1/2, step: 35/45, inputs: torch.Size([4, 13])
epoch: 1/2, step: 40/45, inputs: torch.Size([4, 13])
epoch: 1/2, step: 45/45, inputs: torch.Size([2, 13])
epoch: 2/2, step: 5/45, inputs: torch.Size([4, 13])
epoch: 2/2, step: 10/45, inputs: torch.Size([4, 13])
epoch: 2/2, step: 15/45, inputs: torch.Size([4, 13])
epoch: 2/2, step: 20/45, inputs: torch.Size([4, 13])
epoch: 2/2, step: 25/45, inputs: torch.Size([4, 13])
epoch: 2/2, step: 30/45, inputs: torch.Size([4, 13])
epoch: 2/2, step: 35/45, inputs: torch.Size([4, 13])
epoch: 2/2, step: 40/45, inputs: torch.Size([4, 13])
epoch: 2/2, step: 45/45, inputs: torch.Size([2, 13])
