# 09 Dataset and DataLoader

In [166]:
import torch 
import torchvision as tv 
from torch.utils.data import Dataset, DataLoader 
import numpy as np
import math 

from sklearn.preprocessing import StandardScaler

In [167]:
class WineDataset(Dataset):
    def __init__(self):
        xy = np.loadtxt('wine.csv', delimiter=",", dtype=np.float32, skiprows=1)
        sc = StandardScaler()
        self.x = torch.from_numpy(sc.fit_transform(xy[:, 1:]))
        self.y = torch.from_numpy(xy[:, 0]).clone().detach()
        self.y = torch.tensor(self.y - 1, dtype=torch.long)
        self.n_samples = xy.shape[0]
        
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return self.n_samples
    

In [168]:
dataset = WineDataset()
dataset[0]

  self.y = torch.tensor(self.y - 1, dtype=torch.long)


(tensor([ 1.5186, -0.5622,  0.2321, -1.1696,  1.9139,  0.8090,  1.0348, -0.6596,
          1.2249,  0.2517,  0.3622,  1.8479,  1.0130]),
 tensor(0))

In [169]:
dataloader = DataLoader(dataset=dataset, batch_size=4, shuffle=True)

In [170]:
dataiter = iter(dataloader)
data = dataiter.next()
data

[tensor([[-0.6802,  0.6227,  0.9997,  2.2537, -0.1925, -0.6331, -1.4550,  2.1607,
          -0.7900,  1.0563, -1.2611, -1.2453,  0.4239],
         [ 1.1604, -0.5443, -0.3528, -0.6291,  0.5799,  0.9372,  1.5167, -0.3373,
           0.8569,  1.6619,  0.7132,  0.6897,  1.6340],
         [-0.0131, -0.5982,  0.8535,  3.1545,  2.7565,  1.6102,  0.8641, -1.2236,
           0.6467, -0.7389,  1.5468,  1.2547,  0.7582],
         [-0.9890,  0.6227, -0.1700, -0.1486, -0.2627, -1.6746, -1.5454,  0.3074,
          -1.5084,  0.1912, -1.3050, -1.1041, -0.7544]]),
 tensor([2, 0, 1, 2])]

In [183]:
nepochs = 50
samples = len(dataset)
n_it = math.ceil(samples/4)
n_it

45

In [184]:
import torch.nn as nn 

class WineModel(nn.Module):
    def __init__(self, input_size):
        super(WineModel, self).__init__()
        self.d1 = nn.Linear(input_size, 64)
        self.d2 = nn.Linear(64, 64)
        self.d3 = nn.Linear(64, 3)
        
    def forward(self, x):
        x = nn.functional.relu(self.d1(x))
        x = nn.functional.relu(self.d2(x))
        return nn.functional.softmax(self.d3(x), 1)

In [185]:
model = WineModel(13)

In [186]:
criteria = nn.CrossEntropyLoss()
lr = 0.001
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

In [187]:
for epoch in range(nepochs):
    for i, (inputs, labels) in enumerate(dataloader):
        y_pred = model(inputs)
        loss = criteria(y_pred, labels)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
    print(f'epoch: {epoch+1}/{nepochs}, step {i+1:2d}/{n_it:2d}, loss = {loss.item():.10f}')

epoch: 1/50, step 45/45, loss = 1.1349562407
epoch: 2/50, step 45/45, loss = 1.1025137901
epoch: 3/50, step 45/45, loss = 1.1131125689
epoch: 4/50, step 45/45, loss = 1.0876976252
epoch: 5/50, step 45/45, loss = 1.1359601021
epoch: 6/50, step 45/45, loss = 1.1408059597
epoch: 7/50, step 45/45, loss = 1.1298906803
epoch: 8/50, step 45/45, loss = 1.1127537489
epoch: 9/50, step 45/45, loss = 1.0858095884
epoch: 10/50, step 45/45, loss = 1.1292493343
epoch: 11/50, step 45/45, loss = 1.1223441362
epoch: 12/50, step 45/45, loss = 1.0467007160
epoch: 13/50, step 45/45, loss = 1.0759199858
epoch: 14/50, step 45/45, loss = 1.1342823505
epoch: 15/50, step 45/45, loss = 1.0485947132
epoch: 16/50, step 45/45, loss = 1.0238354206
epoch: 17/50, step 45/45, loss = 1.0256578922
epoch: 18/50, step 45/45, loss = 1.1159292459
epoch: 19/50, step 45/45, loss = 1.0746421814
epoch: 20/50, step 45/45, loss = 1.0072985888
epoch: 21/50, step 45/45, loss = 1.0742896795
epoch: 22/50, step 45/45, loss = 1.04196870

In [188]:
model(dataset.x).argmax(dim=1).eq(dataset.y).sum() / 178

tensor(0.5281)