# divide the training set into smaller batches
- 1 epoch means 1 forward and backward pass
- batch size means number of training samples in 1 forward and backward pass
- training the batch
# So 100 samples, 20 batch size will have 1 epoch, 5 iterations


In [1]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math

In [3]:
# -O for output file
!wget https://www.kaggle.com/datasets/sgus1318/winedata/download?datasetVersionNumber=1 -O wine.csv

--2024-03-07 13:42:18--  https://www.kaggle.com/datasets/sgus1318/winedata/download?datasetVersionNumber=1
Resolving www.kaggle.com (www.kaggle.com)... 35.244.233.98
Connecting to www.kaggle.com (www.kaggle.com)|35.244.233.98|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: /account/login?titleType=dataset-downloads&showDatasetDownloadSkip=False&messageId=datasetsWelcome&returnUrl=%2Fdatasets%2Fsgus1318%2Fwinedata%2Fversions%2F1%3Fresource%3Ddownload [following]
--2024-03-07 13:42:19--  https://www.kaggle.com/account/login?titleType=dataset-downloads&showDatasetDownloadSkip=False&messageId=datasetsWelcome&returnUrl=%2Fdatasets%2Fsgus1318%2Fwinedata%2Fversions%2F1%3Fresource%3Ddownload
Reusing existing connection to www.kaggle.com:443.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: ‘wine.csv’

wine.csv                [ <=>                ]   5.30K  --.-KB/s    in 0s      

2024-03-07 13:42:19 (39.7 MB/s) - ‘wine.cs

In [6]:
class WineDataset(Dataset):
    def __init__(self): # for data loading
        xy = np.loadtxt("./wine.csv", delimiter = ",", dtype = np.float32, skiprows = 1)
        self.x = torch.from_numpy(xy[:, 1:])
        self.y = torch.from_numpy(xy[:, [0]]) # 2d array so wont require reshaping later on
        self.n_samples = xy.shape[0]
        
    def __getitem__(self, index): # data lookup using index
        return self.x[index], self.y[index]
    
    def __len__(self): # shape of dataset
        return self.n_samples

In [8]:
dataset = WineDataset()

In [9]:
dataset[0]

(tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
         3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
         1.0650e+03]),
 tensor([1.]))

In [23]:
# unpacking
features, labels = dataset.x, dataset.y
features.shape, labels.shape

(torch.Size([178, 13]), torch.Size([178, 1]))

In [13]:
# for batch processing
# num_workers will use multiple sub processes
dataloader = DataLoader(dataset = dataset, batch_size = 4, shuffle = True, num_workers = 2)

In [15]:
# check batch size
dataiter = iter(dataloader) # iterator
data = next(dataiter) # fetch the next batch
features, labels = data
features, labels # batch size = 4

(tensor([[1.2700e+01, 3.8700e+00, 2.4000e+00, 2.3000e+01, 1.0100e+02, 2.8300e+00,
          2.5500e+00, 4.3000e-01, 1.9500e+00, 2.5700e+00, 1.1900e+00, 3.1300e+00,
          4.6300e+02],
         [1.2290e+01, 3.1700e+00, 2.2100e+00, 1.8000e+01, 8.8000e+01, 2.8500e+00,
          2.9900e+00, 4.5000e-01, 2.8100e+00, 2.3000e+00, 1.4200e+00, 2.8300e+00,
          4.0600e+02],
         [1.2070e+01, 2.1600e+00, 2.1700e+00, 2.1000e+01, 8.5000e+01, 2.6000e+00,
          2.6500e+00, 3.7000e-01, 1.3500e+00, 2.7600e+00, 8.6000e-01, 3.2800e+00,
          3.7800e+02],
         [1.3560e+01, 1.7100e+00, 2.3100e+00, 1.6200e+01, 1.1700e+02, 3.1500e+00,
          3.2900e+00, 3.4000e-01, 2.3400e+00, 6.1300e+00, 9.5000e-01, 3.3800e+00,
          7.9500e+02]]),
 tensor([[2.],
         [2.],
         [2.],
         [1.]]))

In [16]:
# training loop
epochs = 2
total_samples = len(dataset)
n_iterations = math.ceil(total_samples/4) # ound a number up to the nearest integer
total_samples, n_iterations

(178, 45)

In [21]:
for epoch in range(epochs):
    for i, (features, labels) in enumerate(dataloader):
        # forward
        # backward
        # update
        print(epoch, i, features.shape, labels.shape)

0 0 torch.Size([4, 13]) torch.Size([4, 1])
0 1 torch.Size([4, 13]) torch.Size([4, 1])
0 2 torch.Size([4, 13]) torch.Size([4, 1])
0 3 torch.Size([4, 13]) torch.Size([4, 1])
0 4 torch.Size([4, 13]) torch.Size([4, 1])
0 5 torch.Size([4, 13]) torch.Size([4, 1])
0 6 torch.Size([4, 13]) torch.Size([4, 1])
0 7 torch.Size([4, 13]) torch.Size([4, 1])
0 8 torch.Size([4, 13]) torch.Size([4, 1])
0 9 torch.Size([4, 13]) torch.Size([4, 1])
0 10 torch.Size([4, 13]) torch.Size([4, 1])
0 11 torch.Size([4, 13]) torch.Size([4, 1])
0 12 torch.Size([4, 13]) torch.Size([4, 1])
0 13 torch.Size([4, 13]) torch.Size([4, 1])
0 14 torch.Size([4, 13]) torch.Size([4, 1])
0 15 torch.Size([4, 13]) torch.Size([4, 1])
0 16 torch.Size([4, 13]) torch.Size([4, 1])
0 17 torch.Size([4, 13]) torch.Size([4, 1])
0 18 torch.Size([4, 13]) torch.Size([4, 1])
0 19 torch.Size([4, 13]) torch.Size([4, 1])
0 20 torch.Size([4, 13]) torch.Size([4, 1])
0 21 torch.Size([4, 13]) torch.Size([4, 1])
0 22 torch.Size([4, 13]) torch.Size([4, 1]