# DataLoader

https://pytorch.org/docs/stable/data.html?highlight=dataloader#torch.utils.data.DataLoader

https://pytorch.org/docs/stable/data.html?highlight=dataset#torch.utils.data.Dataset


## go through dataset

basically is a data viewer

wrap in tensor in each epoach

In [1]:
# References
# https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01-basics/pytorch_basics/main.py
# http://pytorch.org/tutorials/beginner/data_loading_tutorial.html#dataset-class
from torch.utils.data import Dataset, DataLoader
from torch import from_numpy, tensor
import numpy as np

In [2]:
class DiabetesDataset(Dataset):  # Dataset is from torch too
    """ Diabetes dataset."""

    # Initialize your data, download, etc.
    def __init__(self):
        xy = np.loadtxt('./data/diabetes.csv.gz',
                        delimiter=',', dtype=np.float32)
        self.len = xy.shape[0]
        self.x_data = from_numpy(xy[:, 0:-1])
        self.y_data = from_numpy(xy[:, [-1]])

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.len

In [3]:
dataset = DiabetesDataset()
train_loader = DataLoader(dataset=dataset,
                          batch_size=32,
                          shuffle=True,
                          num_workers=2)  # how many subprocesses to use for data loading

In [4]:
# nothing interesting, just go over the data 2 times
for epoch in range(2):
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data
        
        # wrap them in Variable
        inputs, labels = tensor(inputs), tensor(labels)
        
        # Run your training process
        if i % 5 == 0:
            print(f'Epoch: {i}')
            # print(f'Inputs {inputs.data} | Labels {labels.data}')

Epoch: 0
Epoch: 5
Epoch: 10
Epoch: 15
Epoch: 20


  inputs, labels = tensor(inputs), tensor(labels)


Epoch: 0
Epoch: 5
Epoch: 10
Epoch: 15
Epoch: 20


## regression

https://pytorch.org/docs/stable/torch.html?highlight=from_numpy#torch.from_numpy

https://pytorch.org/docs/stable/optim.html?highlight=optim%20sgd#torch.optim.SGD

https://pytorch.org/docs/stable/nn.html?highlight=nn%20bceloss#torch.nn.BCELoss


In [5]:
# References
# https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01-basics/pytorch_basics/main.py
# http://pytorch.org/tutorials/beginner/data_loading_tutorial.html#dataset-class
from torch import nn, optim

In [6]:
class Model(nn.Module):

    def __init__(self):
        """
        In the constructor we instantiate two nn.Linear module
        """
        super(Model, self).__init__()
        self.l1 = nn.Linear(8, 6)
        self.l2 = nn.Linear(6, 4)
        self.l3 = nn.Linear(4, 1)

        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        """
        In the forward function we accept a Variable of input data and we must return
        a Variable of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Variables.
        """
        out1 = self.sigmoid(self.l1(x))
        out2 = self.sigmoid(self.l2(out1))
        y_pred = self.sigmoid(self.l3(out2))
        return y_pred

In [7]:
# our model
model = Model()

In [10]:
# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters of the two
# nn.Linear modules which are members of the model.
criterion = nn.BCELoss(reduction='sum')
optimizer = optim.SGD(model.parameters(), lr=0.1)

# Training loop
for epoch in range(2):
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data

        # Forward pass: Compute predicted y by passing x to the model
        y_pred = model(inputs)

        # Compute and print loss
        loss = criterion(y_pred, labels)
        if (i+1) % 6 == 0:
            print(f'Epoch {epoch + 1} | Batch: {i+1} | Loss: {loss.item():.4f}')

        # Zero gradients, perform a backward pass, and update the weights.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

Epoch 1 | Batch: 6 | Loss: 21.7653
Epoch 1 | Batch: 12 | Loss: 19.8895
Epoch 1 | Batch: 18 | Loss: 22.2112
Epoch 1 | Batch: 24 | Loss: 15.4154
Epoch 2 | Batch: 6 | Loss: 21.6797
Epoch 2 | Batch: 12 | Loss: 23.5177
Epoch 2 | Batch: 18 | Loss: 18.6194
Epoch 2 | Batch: 24 | Loss: 14.7989


# Softmax

## loss

In [11]:
from torch import nn, tensor, max
import numpy as np

In [21]:
# Cross entropy example (np only)
# One hot
# 0: 1 0 0
# 1: 0 1 0
# 2: 0 0 1
Y = np.array([1, 0, 0])
Y_pred1 = np.array([0.7, 0.2, 0.1])  # small loss
Y_pred2 = np.array([0.1, 0.3, 0.6])  # large loss

# calculate loss manually
print(f'Loss1: {np.sum(-Y * np.log(Y_pred1)):.4f}')
print(f'Loss2: {np.sum(-Y * np.log(Y_pred2)):.4f}')

Loss1: 0.3567
Loss2: 2.3026


In [13]:
# Softmax + CrossEntropy (logSoftmax + NLLLoss)
loss = nn.CrossEntropyLoss()

In [14]:
# target is of size nBatch
# each element in target has to have 0 <= value < nClasses (0-2)
# Input is class, not one-hot!!!
Y = tensor([0], requires_grad=False)  # means class 0 is correct

# input is of size nBatch x nClasses = 1 x 4
# Y_pred are logits (not softmax)!!!
Y_pred1 = tensor([[2.0, 1.0, 0.1]])  # should be small, since [0] is larger than the rest
Y_pred2 = tensor([[0.5, 2.0, 0.3]])

l1 = loss(Y_pred1, Y)
l2 = loss(Y_pred2, Y)

print(f'PyTorch Loss1: {l1.item():.4f} \nPyTorch Loss2: {l2.item():.4f}')
print(f'Y_pred1: {max(Y_pred1.data, 1)[1].item()}')
print(f'Y_pred2: {max(Y_pred2.data, 1)[1].item()}')

PyTorch Loss1: 0.4170 
PyTorch Loss2: 1.8406
Y_pred1: 0
Y_pred2: 1


In [20]:
# target is of size nBatch
# each element in target has to have 0 <= value < nClasses (0-2)
# Input is class, not one-hot
Y = tensor([2, 0, 1], requires_grad=False)

# input is of size nBatch x nClasses = 2 x 4
# Y_pred are logits (not softmax)
Y_pred1 = tensor([[0.1, 0.2, 0.9],
                  [1.1, 0.1, 0.2],
                  [0.2, 2.1, 0.1]])  # should be small

Y_pred2 = tensor([[0.8, 0.2, 0.3],
                  [0.2, 0.3, 0.5],
                  [0.2, 0.2, 0.5]])  # should be large

l1 = loss(Y_pred1, Y)
l2 = loss(Y_pred2, Y)
print(f'Batch Loss1:  {l1.item():.4f} \nBatch Loss2: {l2.data:.4f}')

Batch Loss1:  0.4966 
Batch Loss2: 1.2389


## MNIST

http://yann.lecun.com/exdb/mnist/

http://cs231n.github.io/neural-networks-3/#sgd

https://blog.csdn.net/u010089444/article/details/76725843

https://pytorch.org/docs/stable/torchvision/transforms.html?highlight=transforms

https://pytorch.org/docs/stable/nn.functional.html?highlight=functional%20relu#torch.nn.functional.relu

https://pytorch.org/docs/stable/nn.html#torch.nn.ReLU


In [1]:
# https://github.com/pytorch/examples/blob/master/mnist/main.py
from __future__ import print_function
from torch import nn, optim, cuda
from torch.utils import data
from torchvision import datasets, transforms  # transforms: common image transformations
import torch.nn.functional as F
import time

In [2]:
# Training settings
batch_size = 64
device = 'cuda' if cuda.is_available() else 'cpu'
# device = 'cpu'
print(f'Training MNIST Model on {device}\n{"=" * 44}')

Training MNIST Model on cuda


In [3]:
# MNIST Dataset
train_dataset = datasets.MNIST(root='./data/',
                               train=True,
                               transform=transforms.ToTensor(),
                               download=True)

test_dataset = datasets.MNIST(root='./data/',
                              train=False,
                              transform=transforms.ToTensor())

In [4]:
# Data Loader (Input Pipeline)
train_loader = data.DataLoader(dataset=train_dataset,
                               batch_size=batch_size,  # 64
                               shuffle=True)

test_loader = data.DataLoader(dataset=test_dataset,
                              batch_size=batch_size,
                              shuffle=False)

In [5]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.l1 = nn.Linear(784, 520)
        self.l2 = nn.Linear(520, 320)
        self.l3 = nn.Linear(320, 240)
        self.l4 = nn.Linear(240, 120)
        self.l5 = nn.Linear(120, 10)

    def forward(self, x):
        x = x.view(-1, 784)  # Flatten the data (n, 1, 28, 28)-> (n, 784)
        x = F.relu(self.l1(x))  # activation function
        x = F.relu(self.l2(x))
        x = F.relu(self.l3(x))
        x = F.relu(self.l4(x))
        return self.l5(x)

In [6]:
model = Net()
model.to(device)
criterion = nn.CrossEntropyLoss()
# SGD with momentum is method which helps accelerate gradients vectors in the right directions
# thus leading to faster converging
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

In [8]:
def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)  # Cross Entropy Loss
        loss.backward()
        optimizer.step()
        if batch_idx % 200 == 0:
            print('Train Epoch: {} | Batch Status: {}/{} ({:.0f}%) | Loss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))


def test():
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        output = model(data)
        # sum up batch loss
        test_loss += criterion(output, target).item()
        # get the index of the max
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    test_loss /= len(test_loader.dataset)
    print(f'===========================\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} '
          f'({100. * correct / len(test_loader.dataset):.0f}%)')


In [9]:
since = time.time()
for epoch in range(1, 10):
    epoch_start = time.time()
    train(epoch)
    m, s = divmod(time.time() - epoch_start, 60)
    print(f'Training time: {m:.0f}m {s:.0f}s')
    test()
    m, s = divmod(time.time() - epoch_start, 60)
    print(f'Testing time: {m:.0f}m {s:.0f}s')

m, s = divmod(time.time() - since, 60)
print(f'Total Time: {m:.0f}m {s:.0f}s\nModel was trained on {device}!')

Train Epoch: 1 | Batch Status: 0/60000 (0%) | Loss: 2.295890
Train Epoch: 1 | Batch Status: 12800/60000 (21%) | Loss: 2.307435
Train Epoch: 1 | Batch Status: 25600/60000 (43%) | Loss: 2.282576
Train Epoch: 1 | Batch Status: 38400/60000 (64%) | Loss: 2.260816
Train Epoch: 1 | Batch Status: 51200/60000 (85%) | Loss: 2.049404
Training time: 0m 7s
Test set: Average loss: 0.0227, Accuracy: 5633/10000 (56%)
Testing time: 0m 8s
Train Epoch: 2 | Batch Status: 0/60000 (0%) | Loss: 1.280056
Train Epoch: 2 | Batch Status: 12800/60000 (21%) | Loss: 0.837074
Train Epoch: 2 | Batch Status: 25600/60000 (43%) | Loss: 0.550821
Train Epoch: 2 | Batch Status: 38400/60000 (64%) | Loss: 0.487448
Train Epoch: 2 | Batch Status: 51200/60000 (85%) | Loss: 0.378238
Training time: 0m 7s
Test set: Average loss: 0.0068, Accuracy: 8741/10000 (87%)
Testing time: 0m 8s
Train Epoch: 3 | Batch Status: 0/60000 (0%) | Loss: 0.452543
Train Epoch: 3 | Batch Status: 12800/60000 (21%) | Loss: 0.423461
Train Epoch: 3 | Batch 

# CNN

https://pytorch.org/docs/stable/nn.html?highlight=maxpool2d#torch.nn.MaxPool2d

https://pytorch.org/docs/stable/nn.html?highlight=conv2d#torch.nn.Conv2d


## basic example

In [13]:
# https://github.com/pytorch/examples/blob/master/mnist/main.py
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

In [14]:
# Training settings
batch_size = 64

# MNIST Dataset
train_dataset = datasets.MNIST(root='./data/',
                               train=True,
                               transform=transforms.ToTensor(),
                               download=True)

test_dataset = datasets.MNIST(root='./data/',
                              train=False,
                              transform=transforms.ToTensor())

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

In [19]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.mp = nn.MaxPool2d(2)  # kernel size
        self.fc = nn.Linear(320, 10)  # avoid size mismatch, in: 64 * 320

    def forward(self, x):
        in_size = x.size(0)
        x = F.relu(self.mp(self.conv1(x)))  # max pool, print x.size() to check out
        x = F.relu(self.mp(self.conv2(x)))
        x = x.view(in_size, -1)  # flatten the tensor
        x = self.fc(x)
        # It is almost always you will need the last dimension when you compute the cross-entropy
        return F.log_softmax(x, -1)  # add dim -1 to avoid warning...


model = Net()

optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

In [22]:
def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 200 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))


# https://discuss.pytorch.org/t/two-small-questions-about-with-torch-no-grad/27571
@torch.no_grad()
def test():
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        data, target = Variable(data), Variable(target)  # change Variable(data, volatile=True)
        output = model(data)
        # sum up batch loss
        test_loss += F.nll_loss(output, target, size_average=False).data
        # get the index of the max log-probability
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [23]:
for epoch in range(1, 10):
    train(epoch)
    test()


Test set: Average loss: 0.0939, Accuracy: 9730/10000 (97%)


Test set: Average loss: 0.0745, Accuracy: 9758/10000 (97%)


Test set: Average loss: 0.0726, Accuracy: 9769/10000 (97%)


Test set: Average loss: 0.0637, Accuracy: 9796/10000 (97%)


Test set: Average loss: 0.0589, Accuracy: 9801/10000 (98%)


Test set: Average loss: 0.0599, Accuracy: 9803/10000 (98%)


Test set: Average loss: 0.0517, Accuracy: 9835/10000 (98%)


Test set: Average loss: 0.0528, Accuracy: 9834/10000 (98%)


Test set: Average loss: 0.0512, Accuracy: 9830/10000 (98%)



## advance example

In the context of convolutional neural networks, kernel = filter = feature detector

http://www.wildml.com/2015/11/understanding-convolutional-neural-networks-for-nlp/

https://www.youtube.com/watch?v=VxhSouuSZDY

https://buzzrobot.com/whats-happening-inside-the-convolutional-neural-network-the-answer-is-convolution-2c22075dc68d

https://docs.python.org/3/library/argparse.html

In [24]:
# https://github.com/pytorch/examples/blob/master/mnist/main.py
from __future__ import print_function
import argparse  # interesting package indeed
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

In [25]:
# Training settings
batch_size = 64

# MNIST Dataset
train_dataset = datasets.MNIST(root='./data/',
                               train=True,
                               transform=transforms.ToTensor(),
                               download=True)

test_dataset = datasets.MNIST(root='./data/',
                              train=False,
                              transform=transforms.ToTensor())

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

In [26]:
# make comparison of each convolution
class InceptionA(nn.Module):
    def __init__(self, in_channels):
        super(InceptionA, self).__init__()
        self.branch1x1 = nn.Conv2d(in_channels, 16, kernel_size=1)

        self.branch5x5_1 = nn.Conv2d(in_channels, 16, kernel_size=1)
        self.branch5x5_2 = nn.Conv2d(16, 24, kernel_size=5, padding=2)

        self.branch3x3dbl_1 = nn.Conv2d(in_channels, 16, kernel_size=1)
        self.branch3x3dbl_2 = nn.Conv2d(16, 24, kernel_size=3, padding=1)
        self.branch3x3dbl_3 = nn.Conv2d(24, 24, kernel_size=3, padding=1)

        self.branch_pool = nn.Conv2d(in_channels, 24, kernel_size=1)


    def forward(self, x):
        branch1x1 = self.branch1x1(x)

        branch5x5 = self.branch5x5_1(x)
        branch5x5 = self.branch5x5_2(branch5x5)

        branch3x3dbl = self.branch3x3dbl_1(x)
        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
        branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)

        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
        branch_pool = self.branch_pool(branch_pool)

        outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool]
        return torch.cat(outputs, 1)

In [32]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(88, 20, kernel_size=5)

        self.incept1 = InceptionA(in_channels=10)
        self.incept2 = InceptionA(in_channels=20)

        self.mp = nn.MaxPool2d(2)
        self.fc = nn.Linear(1408, 10)


    def forward(self, x):
        in_size = x.size(0)
        x = F.relu(self.mp(self.conv1(x)))
        x = self.incept1(x)  # inceptions
        x = F.relu(self.mp(self.conv2(x)))
        x = self.incept2(x)
        x = x.view(in_size, -1)  # flatten the tensor
        x = self.fc(x)
        # Implicit dimension choice for log_softmax has been deprecated. Change the call to include dim=X as an argument.
        return F.log_softmax(x, -1)


model = Net()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

In [35]:
def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 200 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data))


@torch.no_grad()
def test():
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        data, target = Variable(data), Variable(target)
        output = model(data)
        # sum up batch loss
        test_loss += F.nll_loss(output, target, size_average=False).data
        # get the index of the max log-probability
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [None]:
for epoch in range(1, 10):
    train(epoch)
    test()






Test set: Average loss: 0.0895, Accuracy: 9714/10000 (97%)


Test set: Average loss: 0.0791, Accuracy: 9754/10000 (97%)


Test set: Average loss: 0.0634, Accuracy: 9800/10000 (98%)

