<a href="https://colab.research.google.com/github/Mechanics-Mechatronics-and-Robotics/PytorchBasics/blob/main/09_dataset_and_dataloader.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import libraries

In [9]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math

## Initialization

## Data set

In [12]:
class WineDataset(Dataset):

  def __init__(self):
    #data loading
    xy = np.loadtxt('/content/drive/MyDrive/Colab Notebooks/pytorchTutorial-master/pytorchTutorial-master/data/wine/wine.csv', delimiter=',', dtype=np.float32, skiprows=1)
    self.x = torch.from_numpy(xy[:, 1:])
    self.y = torch.from_numpy(xy[:,[0]]) #n
    self.n_samples = xy.shape[0]

  def __getitem__(self, index):
    return self.x[index], self.y[index]

  def __len__(self):
    return self.n_samples

In [15]:
dataset = WineDataset()
first_data = dataset[0]


(tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
         3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
         1.0650e+03]), tensor([1.]))

In [16]:
dataloader = DataLoader(dataset=dataset, batch_size=4, shuffle=True, num_workers=2)

In [18]:
dataiter = iter(dataloader)
data = dataiter.next()
features, labels = data
features, labels

(tensor([[1.4210e+01, 4.0400e+00, 2.4400e+00, 1.8900e+01, 1.1100e+02, 2.8500e+00,
          2.6500e+00, 3.0000e-01, 1.2500e+00, 5.2400e+00, 8.7000e-01, 3.3300e+00,
          1.0800e+03],
         [1.3670e+01, 1.2500e+00, 1.9200e+00, 1.8000e+01, 9.4000e+01, 2.1000e+00,
          1.7900e+00, 3.2000e-01, 7.3000e-01, 3.8000e+00, 1.2300e+00, 2.4600e+00,
          6.3000e+02],
         [1.3400e+01, 4.6000e+00, 2.8600e+00, 2.5000e+01, 1.1200e+02, 1.9800e+00,
          9.6000e-01, 2.7000e-01, 1.1100e+00, 8.5000e+00, 6.7000e-01, 1.9200e+00,
          6.3000e+02],
         [1.2330e+01, 9.9000e-01, 1.9500e+00, 1.4800e+01, 1.3600e+02, 1.9000e+00,
          1.8500e+00, 3.5000e-01, 2.7600e+00, 3.4000e+00, 1.0600e+00, 2.3100e+00,
          7.5000e+02]]), tensor([[1.],
         [2.],
         [3.],
         [2.]]))

In [22]:
#training loop
num_epochs = 2
total_samples = len(dataset)
n_iterations = math.ceil(total_samples/4)
total_samples, n_iterations

for epoch in range(num_epochs):
  for i, (inputs, labels) in enumerate (dataloader):
    # forward, backward
    if (i+1) % 5 == 0:
      print(f'epoch {epoch+1}/{num_epochs}, step {i+1}/{n_iterations}, inputs {inputs.shape}')

epoch 1/2, step 5/45, inputs torch.Size([4, 13])
epoch 1/2, step 10/45, inputs torch.Size([4, 13])
epoch 1/2, step 15/45, inputs torch.Size([4, 13])
epoch 1/2, step 20/45, inputs torch.Size([4, 13])
epoch 1/2, step 25/45, inputs torch.Size([4, 13])
epoch 1/2, step 30/45, inputs torch.Size([4, 13])
epoch 1/2, step 35/45, inputs torch.Size([4, 13])
epoch 1/2, step 40/45, inputs torch.Size([4, 13])
epoch 1/2, step 45/45, inputs torch.Size([2, 13])
epoch 2/2, step 5/45, inputs torch.Size([4, 13])
epoch 2/2, step 10/45, inputs torch.Size([4, 13])
epoch 2/2, step 15/45, inputs torch.Size([4, 13])
epoch 2/2, step 20/45, inputs torch.Size([4, 13])
epoch 2/2, step 25/45, inputs torch.Size([4, 13])
epoch 2/2, step 30/45, inputs torch.Size([4, 13])
epoch 2/2, step 35/45, inputs torch.Size([4, 13])
epoch 2/2, step 40/45, inputs torch.Size([4, 13])
epoch 2/2, step 45/45, inputs torch.Size([2, 13])


## Model prediction

In [3]:
input_size = n_features
output_size = 1

#The first approach
#model = nn.Linear(input_size, output_size)

#The second approach
class LogisticRegression(nn.Module):

  def __init__(self, n_input_features):
    super(LogisticRegression, self).__init__()
    #define layers
    self.linear = nn.Linear(n_input_features, 1)

  def forward(self, x):
    y_predicted = torch.sigmoid(self.linear(x))
    return y_predicted

model = LogisticRegression(input_size)

## Loss that is MSE (mean square error)

In [4]:
loss = nn.BCELoss()


# Training

In [7]:
lr = 0.01
n = 100

optimizer = torch.optim.SGD(model.parameters(), lr=lr)

for epoch in range(n):
  #prediciton
  y_pred = model(X_train)
  #loss
  L = loss(y_pred, Y_train)
  #gradients
  L.backward()
  #update
  optimizer.step()
  #zero gradients
  optimizer.zero_grad()

  if (epoch+1) % 10 == 0:
    print(f'epoch {epoch+1}: loss = {L.item():.4f}')

with torch.no_grad():
  y_predicted = model(X_test)
  y_predicted_cls = y_predicted.round()
  acc = y_predicted_cls.eq(Y_test).sum()/float(Y_test.shape[0])
  print(f'accuracy = {acc:.4f}')

epoch 10: loss = 0.2230
epoch 20: loss = 0.2148
epoch 30: loss = 0.2075
epoch 40: loss = 0.2009
epoch 50: loss = 0.1948
epoch 60: loss = 0.1893
epoch 70: loss = 0.1843
epoch 80: loss = 0.1796
epoch 90: loss = 0.1753
epoch 100: loss = 0.1713
accuracy = 0.9123


## Visualization

In [6]:
# plt.plot(X_numpy, Y_numpy, 'ro')
# plt.plot(X_numpy, predicted, 'b')