<a href="https://colab.research.google.com/github/TimS-ml/My-ML/blob/main/Intro/0x03_DataLoader.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# DataLoader

https://pytorch.org/docs/stable/data.html?highlight=dataloader#torch.utils.data.DataLoader

https://pytorch.org/docs/stable/data.html?highlight=dataset#torch.utils.data.Dataset


## go through dataset

basically is a data viewer

wrap in tensor in each epoach

https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01-basics/pytorch_basics/main.py
http://pytorch.org/tutorials/beginner/data_loading_tutorial.html#dataset-class

In [13]:
from torch.utils.data import Dataset, DataLoader
from torch import from_numpy, tensor
import numpy as np
import io, requests
import gzip

In [16]:
url = 'https://github.com/TimS-ml/DataMining/blob/master/z_Other/diabetes.csv.gz?raw=true'
f = requests.get(url).content

In [17]:
class DiabetesDataset(Dataset):  # Dataset is from torch too
    """ Diabetes dataset."""

    # Initialize your data, download, etc.
    def __init__(self):
        # xy = np.loadtxt('./data/diabetes.csv.gz',
        #                 delimiter=',', dtype=np.float32)
        xy = np.loadtxt(gzip.open(io.BytesIO(f), 'rt'), 
                        delimiter=',', dtype=np.float32)
        self.len = xy.shape[0]
        self.x_data = from_numpy(xy[:, 0:-1])
        self.y_data = from_numpy(xy[:, [-1]])

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.len

In [18]:
dataset = DiabetesDataset()
train_loader = DataLoader(dataset=dataset,
                          batch_size=32,
                          shuffle=True,
                          num_workers=2)  # how many subprocesses to use for data loading

In [19]:
# nothing interesting, just go over the data 2 times
for epoch in range(2):
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data
        
        # wrap them in Variable
        inputs, labels = tensor(inputs), tensor(labels)
        
        # Run your training process
        if i % 5 == 0:
            print(f'Epoch: {i}')
            # print(f'Inputs {inputs.data} | Labels {labels.data}')

Epoch: 0
Epoch: 5
Epoch: 10
Epoch: 15
Epoch: 20


  


Epoch: 0
Epoch: 5
Epoch: 10
Epoch: 15
Epoch: 20


## regression

https://pytorch.org/docs/stable/torch.html?highlight=from_numpy#torch.from_numpy

https://pytorch.org/docs/stable/optim.html?highlight=optim%20sgd#torch.optim.SGD

https://pytorch.org/docs/stable/nn.html?highlight=nn%20bceloss#torch.nn.BCELoss


In [20]:
# References
# https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01-basics/pytorch_basics/main.py
# http://pytorch.org/tutorials/beginner/data_loading_tutorial.html#dataset-class
from torch import nn, optim

In [21]:
class Model(nn.Module):

    def __init__(self):
        """
        In the constructor we instantiate two nn.Linear module
        """
        super(Model, self).__init__()
        self.l1 = nn.Linear(8, 6)
        self.l2 = nn.Linear(6, 4)
        self.l3 = nn.Linear(4, 1)

        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        """
        In the forward function we accept a Variable of input data and we must return
        a Variable of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Variables.
        """
        out1 = self.sigmoid(self.l1(x))
        out2 = self.sigmoid(self.l2(out1))
        y_pred = self.sigmoid(self.l3(out2))
        return y_pred

In [22]:
# our model
model = Model()

In [23]:
# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters of the two
# nn.Linear modules which are members of the model.
criterion = nn.BCELoss(reduction='sum')
optimizer = optim.SGD(model.parameters(), lr=0.1)

# Training loop
for epoch in range(2):
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data

        # Forward pass: Compute predicted y by passing x to the model
        y_pred = model(inputs)

        # Compute and print loss
        loss = criterion(y_pred, labels)
        if (i+1) % 6 == 0:
            print(f'Epoch {epoch + 1} | Batch: {i+1} | Loss: {loss.item():.4f}')

        # Zero gradients, perform a backward pass, and update the weights.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

Epoch 1 | Batch: 6 | Loss: 24.1880
Epoch 1 | Batch: 12 | Loss: 19.9337
Epoch 1 | Batch: 18 | Loss: 25.5831
Epoch 1 | Batch: 24 | Loss: 14.7560
Epoch 2 | Batch: 6 | Loss: 20.9187
Epoch 2 | Batch: 12 | Loss: 27.4578
Epoch 2 | Batch: 18 | Loss: 22.2178
Epoch 2 | Batch: 24 | Loss: 19.4152
