In [3]:
"""
Looking at PyTorch dataset and dataloader classes
It's a better way of looking at large datasets, by dividing samples in to smaller "batches"
Amend our training loop to have another loop which loops over batches:
    > Then we get x_batch samples and y_batch samples and do the optimisation based on those
PyTorch can do the batch caluclations and iterations for us
"""

"""
Terminology:
    > Epoch = 1 forward and backward pass of ALL training samples
    > batch_size = number of training samples in one forward and backward pass
    > number of iterations = number of passes, each pass uing [batch_size] number of samples
    > e.g. 100 samples, batch_size = 20 --> 100/20 = 5 iteratoins for 1 epoch
"""

Collecting torchvision
  Downloading torchvision-0.14.1-cp38-cp38-win_amd64.whl (1.1 MB)
Installing collected packages: torchvision
Successfully installed torchvision-0.14.1


In [5]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader # The important classes for htis tutorial
import numpy as np
import math

In [27]:
# We can start implementing our own, custom dataset

"""
Dataset layout (data/wine.csv):
    > First line is header
    > label 1 is class column
    > The rest of the labels are features
We want to predict wine class based off features
"""

class WineDataset(Dataset):
    # Implement dataset
    def __init__(self):
        # Data loading
        
        # Load data and split columns in to x and y
        
        xy = np.loadtxt('./data/wine.csv', delimiter = ",", dtype=np.float32, skiprows=1)
        # skiprows=1 skips the first row which is header (titles)
        self.x = torch.from_numpy(xy[:, 1:]) # We want all of the samples and all columns but the first
        self.y = torch.from_numpy(xy[:, [0]]) # We want all samples but only first column, making the
        # ...[0]an array makes it easier for calculations later as we get the size to be (n_samples, 1)
        self.n_samples = xy.shape[0] # First dimension is number of samples

    
    def __getitem__(self, index):
        # Allows indexing later with like "dataset[0]"
        return self.x[index], self.y[index] # Returns tuple
        
    def __len__(self):
        # Allows us to call "len(dataset)""
        return self.n_samples
    

dataset = WineDataset()
"""
first_data = dataset[0]
features, labels = first_data
print(features, labels)
"""

batchsize = 4 # The batch size for the DataLoader

# Now how to use DataLoader
dataloader = DataLoader(dataset=dataset, batch_size=batchsize, shuffle=True)
# shuffle = True shuffles the data, good for training
# num_workers uses more sub-processes so it might make data faster
# ... Causes me an error for some reason, I think it might be to do with Jupyter Notebook
"""
# Conver to iterator
dataiter = iter(dataloader)
data = next(dataiter)
# Unpack data
features, labels = data
print(features, labels)
"""


# Traiing loop

num_epochs = 2
total_samples = len(dataset) # 178
n_iterations = math.ceil(total_samples / batchsize) # 45

for epoch in range(num_epochs):
    # SECOND LOOP, GOING OVER TRAIN LOADER
    
    for i, (inputs, labels) in enumerate(dataloader):
        # Enumerate gives us index and (inputs, labels)
        
        # Usually would do usual loop: forward, backward, update ect
        # This is a dummy though so we just want to print some information about batch
        if ((i + 1) % 5 == 0):
            print(f'epoch, {epoch+1} / {num_epochs}, step {i+1}/{n_iterations}, inputs {inputs.shape}')
            



epoch, 1 / 2, step 5/45, inputs torch.Size([4, 13])
epoch, 1 / 2, step 10/45, inputs torch.Size([4, 13])
epoch, 1 / 2, step 15/45, inputs torch.Size([4, 13])
epoch, 1 / 2, step 20/45, inputs torch.Size([4, 13])
epoch, 1 / 2, step 25/45, inputs torch.Size([4, 13])
epoch, 1 / 2, step 30/45, inputs torch.Size([4, 13])
epoch, 1 / 2, step 35/45, inputs torch.Size([4, 13])
epoch, 1 / 2, step 40/45, inputs torch.Size([4, 13])
epoch, 1 / 2, step 45/45, inputs torch.Size([2, 13])
epoch, 2 / 2, step 5/45, inputs torch.Size([4, 13])
epoch, 2 / 2, step 10/45, inputs torch.Size([4, 13])
epoch, 2 / 2, step 15/45, inputs torch.Size([4, 13])
epoch, 2 / 2, step 20/45, inputs torch.Size([4, 13])
epoch, 2 / 2, step 25/45, inputs torch.Size([4, 13])
epoch, 2 / 2, step 30/45, inputs torch.Size([4, 13])
epoch, 2 / 2, step 35/45, inputs torch.Size([4, 13])
epoch, 2 / 2, step 40/45, inputs torch.Size([4, 13])
epoch, 2 / 2, step 45/45, inputs torch.Size([2, 13])


[1.000e+00 1.423e+01 1.710e+00 2.430e+00 1.560e+01 1.270e+02 2.800e+00
 3.060e+00 2.800e-01 2.290e+00 5.640e+00 1.040e+00 3.920e+00 1.065e+03]
