In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from  torch.utils.data import DataLoader, Dataset 


In [2]:
import numpy as np
import matplotlib.pyplot as plt

import math 

## DataLoader

Template for dataset 

```python
def CustomDataset(Dataset):
    def __init__(self):
        # TO load the data
        pass
        
    def __len__(self):
        # TO get the length of the dataset
        pass
    
    def __getitem__(self, idx):
        # TO get the item at the index idx
        pass 
```

In [3]:
class WineDataset(Dataset):
    def __init__(self):
        # data loading
        xy = np.loadtxt('./data/wine.csv', delimiter=',', dtype=np.float32, skiprows=1)
        self.X = torch.from_numpy(xy[:, 1:])
        self.y = torch.from_numpy(xy[:, [0]]) # (n_samples, 1) (if we dont do [0] it will be (n_samples,))
        self.n_samples = xy.shape[0]
        
    def __len__(self):
        return self.n_samples
    
    def __getitem__(self, index):
        return self.X[index], self.y[index]

dataset = WineDataset()    
len(dataset)

178

In [4]:
idx = np.random.randint(0, len(dataset))
idx
Xi, yi = dataset[idx]
Xi, yi

5

(tensor([1.4200e+01, 1.7600e+00, 2.4500e+00, 1.5200e+01, 1.1200e+02, 3.2700e+00,
         3.3900e+00, 3.4000e-01, 1.9700e+00, 6.7500e+00, 1.0500e+00, 2.8500e+00,
         1.4500e+03]),
 tensor([1.]))

Creating batch sized data loader using the dataset object which will sample the data in the dataset object in batches of size batch_size

```python 
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
```

In [5]:
data_loader = DataLoader(dataset, 
                         batch_size=4, 
                         shuffle=True , # shuffle the data
                        #  num_workers=1, # number of threads
                         )

In [6]:
data_iter = iter(data_loader) 
data = next(data_iter)

print(data[0].shape  , data[1].shape)

torch.Size([4, 13]) torch.Size([4, 1])


In [7]:
n_epochs = 2
batch_size = 4
total_samples = len(dataset)
n_iterations = math.ceil(total_samples/batch_size)
print(f'{total_samples} = number of examples \n {n_iterations} = number of iterations per epoch')

178 = number of examples 
 45 = number of iterations per epoch


In [8]:
for epoch in range(n_epochs):
    for i, (inputs, labels) in enumerate(data_loader):
        # forward, backward, update
        if (i+1) % 5 == 0:
            print(f'epoch {epoch+1}/{n_epochs}, step {i+1}/{n_iterations}, inputs {inputs.shape}')
            
            

epoch 1/2, step 5/45, inputs torch.Size([4, 13])
epoch 1/2, step 10/45, inputs torch.Size([4, 13])
epoch 1/2, step 15/45, inputs torch.Size([4, 13])
epoch 1/2, step 20/45, inputs torch.Size([4, 13])
epoch 1/2, step 25/45, inputs torch.Size([4, 13])
epoch 1/2, step 30/45, inputs torch.Size([4, 13])
epoch 1/2, step 35/45, inputs torch.Size([4, 13])
epoch 1/2, step 40/45, inputs torch.Size([4, 13])
epoch 1/2, step 45/45, inputs torch.Size([2, 13])
epoch 2/2, step 5/45, inputs torch.Size([4, 13])
epoch 2/2, step 10/45, inputs torch.Size([4, 13])
epoch 2/2, step 15/45, inputs torch.Size([4, 13])
epoch 2/2, step 20/45, inputs torch.Size([4, 13])
epoch 2/2, step 25/45, inputs torch.Size([4, 13])
epoch 2/2, step 30/45, inputs torch.Size([4, 13])
epoch 2/2, step 35/45, inputs torch.Size([4, 13])
epoch 2/2, step 40/45, inputs torch.Size([4, 13])
epoch 2/2, step 45/45, inputs torch.Size([2, 13])


## Transforms


In [9]:
class WineDataset(Dataset):
    def __init__(self, transform = None):
        # data loading
        xy = np.loadtxt('./data/wine.csv', delimiter=',', dtype=np.float32, skiprows=1)
        self.X = xy[:, 1:]
        self.y = xy[:, [0]]
        # self.X = torch.from_numpy(xy[:, 1:])
        # self.y = torch.from_numpy(xy[:, [0]]) # (n_samples, 1) (if we dont do [0] it will be (n_samples,))
        self.n_samples = xy.shape[0]
        
        self.transform = transform
        
    def __len__(self):
        return self.n_samples
    
    def __getitem__(self, index):
        sample = self.X[index], self.y[index]
        if self.transform:
            sample = self.transform(sample)
        return sample

dataset = WineDataset()    
len(dataset)

178

In [10]:
class toTensor:
    def __call__(self, sample):
        inputs, targets = sample
        return torch.from_numpy(inputs), torch.from_numpy(targets)

In [11]:
dataset = WineDataset(transform=toTensor()) 
dataset[0] 

(tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
         3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
         1.0650e+03]),
 tensor([1.]))

In [13]:
import torchvision

class standardize:
    def __call__(self, sample):
        inputs, targets = sample
        inputs = (inputs - torch.mean(inputs, axis=0)) / torch.std(inputs, axis=0)
        return inputs, targets

ts = torchvision.transforms.Compose([toTensor(), standardize()])
dataset = WineDataset(transform=ts)
dataset[0]

(tensor([-0.2781, -0.3208, -0.3183, -0.2734,  0.1065, -0.3171, -0.3162, -0.3257,
         -0.3188, -0.3074, -0.3231, -0.3133,  3.3056]),
 tensor([1.]))