# Neural Network Programming – Deep Learning with PyTorch
PART 2: NEURAL NETWORK TRAINING

Link: http://deeplizard.com/learn/video/v5cngxo4mIg

# Importance of Data in Deep Learning - Fashion MNIST for Artificial Intelligence

Link: http://deeplizard.com/learn/video/EqpzfvxBx30

# ETL with the PyTorch ```Dataset``` and ```DataLoader``` classes

Link: http://deeplizard.com/learn/video/8n-TGaBZnk4

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms

In [None]:
# Check blog post on deeplizard.com for any version related updates
# This notebook runs with the version below
print(torch.__version__)
print(torchvision.__version__)

In [None]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

In [None]:
train_loader = torch.utils.data.DataLoader(train_set)

# PyTorch Datasets and DataLoaders - Training Set Exploration for Deep Learning and AI

Link: http://deeplizard.com/learn/video/mUueSPmcOBc

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms

In [None]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)
train_loader = torch.utils.data.DataLoader(
    train_set, batch_size=10
)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

torch.set_printoptions(linewidth=120)

Working with the training set:

In [None]:
len(train_set)

In [None]:
train_set.targets

In [None]:
train_set.targets.bincount()

In [None]:
sample = next(iter(train_set))

In [None]:
len(sample)

In [None]:
type(sample)

In [None]:
image, label = sample

In [None]:
type(image)

In [None]:
type(label) # Note that this used to be a tensor as well. Changed as of torchvision version 0.2.2

See the release notes for this change: https://github.com/pytorch/vision/releases/tag/v0.2.2

In [None]:
image.shape

In [None]:
torch.tensor(label).shape

In [None]:
plt.imshow(image.squeeze(), cmap='gray')
print('label:', label)

Working with the data loader:

In [None]:
batch = next(iter(train_loader))

In [None]:
len(batch)

In [None]:
type(batch)

In [None]:
images, labels = batch

In [None]:
images.shape

In [None]:
labels.shape

In [None]:
grid = torchvision.utils.make_grid(images, nrow=10)

plt.figure(figsize=(15,15))
plt.imshow(grid.permute(1,2,0))

print('labels:', labels)

In [None]:
how_many_to_plot = 20

train_loader = torch.utils.data.DataLoader(
    train_set, batch_size=1, shuffle=True
)

plt.figure(figsize=(50,50))
for i, batch in enumerate(train_loader, start=1):
    image, label = batch
    plt.subplot(10,10,i)
    plt.imshow(image.reshape(28,28), cmap='gray')
    plt.axis('off')
    plt.title(train_set.classes[label.item()], fontsize=28)
    if (i >= how_many_to_plot): break
plt.show()

# Build PyTorch CNN - Object Oriented Neural Networks

Link: http://deeplizard.com/learn/video/k4jY9L8H89U

Object Oriented Programming Review

In [None]:
class Lizard:
    def __init__(self, name):
        self.name = name
        
    def set_name(self, name):
        self.name = name

In [None]:
lizard = Lizard('deep')
print(lizard.name)

In [None]:
lizard.set_name('lizard')
print(lizard.name)

PyTorch Network

In [None]:
class Network:
    def __init__(self):
        self.layer = None
        
    def forward(self, t):
        t = self.layer(t)
        return t

In [None]:
import torch
import torch.nn as nn

In [None]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        
    def forward(self, t):
        #implement the forward pass
        return t

In [None]:
network = Network()
network

# PyTorch CNN Weights - Learnable Parameters in Neural Networks

Link: http://deeplizard.com/learn/video/stWU37L91Yc

In [None]:
import torch
import torch.nn as nn

In [None]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        
    def forward(self, t):
        #implement the forward pass
        return t

In [None]:
network = Network()

In [None]:
print(network)

In [None]:
class Network():#nn.Module):
    def __init__(self):
        #super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        
    def forward(self, t):
        #implement the forward pass
        return t

In [None]:
network = Network()

In [None]:
print(network)

In [None]:
class Network():#nn.Module):
    def __init__(self):
        #super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        
    def forward(self, t):
        #implement the forward pass
        return t
    
    def __repr__(self):
        return "lizardnet"

In [None]:
network = Network()

In [None]:
print(network)

In [None]:
# Setting model back to original
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        
    def forward(self, t):
        #implement the forward pass
        return t
    
network = Network()

Accessing the Layers

In [None]:
network.conv1

In [None]:
network.conv2

In [None]:
network.fc1

In [None]:
network.fc2

In [None]:
network.out

In [None]:
network

Accessing the Weights

In [None]:
network.conv1.weight

In [None]:
network.conv1.weight.shape

In [None]:
network.conv2.weight.shape

In [None]:
network.conv2.weight[0].shape

In [None]:
network.fc1.weight.shape

In [None]:
network.fc2.weight.shape

In [None]:
network.out.weight.shape

Looking at the linear layers.

In [None]:
network

In [None]:
network.fc1.weight.shape

In [None]:
network.fc2.weight.shape

In [None]:
network.out.weight.shape

Matrix Multiplication

In [None]:
in_features = torch.tensor([1,2,3,4], dtype=torch.float32)

In [None]:
weight_matrix = torch.tensor([
    [1,2,3,4],
    [2,3,4,5],
    [3,4,5,6]
], dtype=torch.float32)

In [None]:
weight_matrix.matmul(in_features)

Accessing the Parameters

In [None]:
for param in network.parameters():
    print(param.shape)

In [None]:
for name, param in network.named_parameters():
    print(name, '\t\t', param.shape)

# PyTorch Callable Neural Networks – Linear Layer in Depth

In [None]:
import torch.nn as nn

In [None]:
in_features = torch.tensor([1,2,3,4], dtype=torch.float32)

In [None]:
weight_matrix = torch.tensor([
    [1,2,3,4],
    [2,3,4,5],
    [3,4,5,6]
], dtype=torch.float32)

In [None]:
weight_matrix.matmul(in_features)

In [None]:
fc = nn.Linear(in_features=4, out_features=3)

In [None]:
fc.weight = nn.Parameter(weight_matrix)

In [None]:
fc(in_features)

In [None]:
fc = nn.Linear(in_features=4, out_features=3, bias=False)
fc.weight = nn.Parameter(weight_matrix)
fc(in_features)

# Build PyTorch CNN - Forward Method Implementation

In [None]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12 * 4 * 4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        
    def forward(self, t):
        # (1) input layer
        t = t
        
        # (2) hidden conv layer
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        # (3) hidden conv layer
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        # (4) hidden linear layer
        t = t.reshape(-1, 12 * 4 * 4)
        t = self.fc1(t)
        t = F.relu(t)
        
        # (5) hidden linear layer
        t = self.fc2(t)
        t = F.relu(t)
        
        # (6) output layer
        t = self.out(t)
        #t = F.softmax(t, dim=1)
        
        return t

# Forward Propagation Explained - Pass Image to PyTorch Neural Network

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth=120)

In [None]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

In [None]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        
    def forward(self, t):
        t = F.relu(self.conv1(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        t = F.relu(self.conv2(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        t = F.relu(self.fc1(t.reshape(-1, 12*4*4)))
        t = F.relu(self.fc2(t))
        t = self.out(t)
        
        return t

This tells PyTorch to disable gradient tracking:

In [None]:
torch.set_grad_enabled(False)

*Passing a single image to the network*

In [None]:
network = Network()

In [None]:
sample = next(iter(train_set))
image, label = sample
image.shape

Neural networks usually accept batches of inputs.  
Thus, the image tensor's shape needs to be in the form (batch_size × in_channels × height × width)

In [None]:
image.unsqueeze(0).shape # This gives us a batch with size 1

In [None]:
pred = network(image.unsqueeze(0)) # image shape needs to be (batch_size × in_channels × H × W)

In [None]:
pred.shape

In [None]:
pred

In [None]:
label

In [None]:
pred.argmax(dim=1)

In [None]:
F.softmax(pred, dim=1)

In [None]:
F.softmax(pred, dim=1).sum()

Different instances of our network have different weights.

In [None]:
net1 = Network()

In [None]:
net1(image.unsqueeze(0))

In [None]:
net2 = Network()

In [None]:
net2(image.unsqueeze(0))

# Neural Network Batch Processing - Pass Image Batch to PyTorch Network

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth=120)

In [None]:
# Check blog post on deeplizard.com for any version related updates
print(torch.__version__)
print(torchvision.__version__)

In [None]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

In [None]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        
    def forward(self, t):
        # (1) input layer
        t = t
        
        # (2) hidden conv layer
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        # (3) hidden conv layer
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        # (4) hidden linear layer
        t = t.reshape(-1, 12 * 4 * 4)
        t = self.fc1(t)
        t = F.relu(t)
        
        # (5) hidden linear layer
        t = self.fc2(t)
        t = F.relu(t)
        
        # (6) output layer
        t = self.out(t)
        #t = F.softmax(t, dim=1)
        
        return t

In [None]:
torch.set_grad_enabled(False)

In [None]:
network = Network()

In [None]:
data_loader = torch.utils.data.DataLoader(train_set, batch_size=10)

In [None]:
batch = next(iter(data_loader))
images, labels = batch

In [None]:
images.shape

In [None]:
labels.shape

In [None]:
preds = network(images)

In [None]:
preds.shape

In [None]:
preds

*Checking batch predictions aginst the label tensor*

In [None]:
preds.argmax(dim=1)

In [None]:
labels

In [None]:
preds.argmax(dim=1).eq(labels)

In [None]:
preds.argmax(dim=1).eq(labels).sum()

In [None]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [None]:
get_num_correct(preds, labels)

# Training a PyTorch CNN - Calculate Loss, Gradient & Update Weights

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth=120) # Display options for output
torch.set_grad_enabled(True) # Already on by default

In [None]:
print(torch.__version__)
print(torchvision.__version__)

In [None]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [None]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12 * 4 * 4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        
    def forward(self, t):
        # (1) input layer
        t = t
        
        # (2) hidden conv layer
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        # (3) hidden conv layer
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        # (4) hidden linear layer
        t = t.reshape(-1, 12 * 4 * 4)
        t = self.fc1(t)
        t = F.relu(t)
        
        # (5) hidden linear layer
        t = self.fc2(t)
        t = F.relu(t)
        
        # (6) output layer
        t = self.out(t)
        #t = F.softmax(t, dim=1)
        
        return t

In [None]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

In [None]:
network = Network()

In [None]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
batch = next(iter(train_loader))
images, labels = batch

### Calculating the Loss

In [None]:
preds = network(images)
loss = F.cross_entropy(preds, labels) # Calculating the loss
loss.item()

### Calculating the Gradients

In [None]:
print(network.conv1.weight.grad)

In [None]:
loss.backward() # Calculating the gradients

In [None]:
network.conv1.weight.grad.shape

In [None]:
network.conv1.weight.shape

### Updating the Weights

In [None]:
optimizer = optim.Adam(network.parameters(), lr=0.01)

In [None]:
loss.item()

In [None]:
get_num_correct(preds, labels)

In [None]:
optimizer.step() # Updating the weights

In [None]:
preds = network(images)
loss = F.cross_entropy(preds, labels)

In [None]:
loss.item()

In [None]:
get_num_correct(preds, labels)

# CNN Training Loop - Neural Network Programming

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth=120) # Display options for output
torch.set_grad_enabled(True) # Already on by default

In [None]:
print(torch.__version__)
print(torchvision.__version__)

In [None]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [None]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12 * 4 * 4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        
    def forward(self, t):
       
        t = F.relu(self.conv1(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        t = F.relu(self.conv2(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        t = t.reshape(-1, 12 * 4 * 4)
        t = F.relu(self.fc1(t))
        
        t = F.relu(self.fc2(t))
        
        t = self.out(t)
        
        return t

In [None]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

### Training with a single batch: Review

In [None]:
network = Network()

train_loader = torch.utils.data.DataLoader(train_set, batch_size=100)
optimizer = optim.Adam(network.parameters(), lr=0.01)

batch = next(iter(train_loader)) # Get Batch
images, labels = batch

preds = network(images) # Pass Batch
loss = F.cross_entropy(preds, labels) # Calculate Loss

loss.backward() # Calculate Gradients
optimizer.step() # Update Weights

#------------------------------------------

print('loss1:', loss.item())
preds = network(images)
loss = F.cross_entropy(preds, labels)
print('loss2:', loss.item())

### Training with all batches: A single epoch

In [None]:
network = Network()

train_loader = torch.utils.data.DataLoader(train_set, batch_size=1000)
optimizer = optim.Adam(network.parameters(), lr=0.01)

total_loss = 0
total_correct = 0

for batch in train_loader: # Get Batch
    images, labels = batch 

    preds = network(images) # Pass Batch
    loss = F.cross_entropy(preds, labels) # Calculate Loss

    optimizer.zero_grad()
    loss.backward() # Calculate Gradients
    optimizer.step() # Update Weights

    total_loss += loss.item()
    total_correct += get_num_correct(preds, labels)
    
print("epoch:", 0, "total_correct:", total_correct, "loss:", total_loss)

In [None]:
total_correct / len(train_set)

### Training with multiple epochs: The complete training loop

In [None]:
network = Network()

train_loader = torch.utils.data.DataLoader(train_set, batch_size=1000)
optimizer = optim.Adam(network.parameters(), lr=0.01)

for epoch in range(1):
    
    total_loss = 0
    total_correct = 0

    for batch in train_loader: # Get Batch
        images, labels = batch 

        preds = network(images) # Pass Batch
        loss = F.cross_entropy(preds, labels) # Calculate Loss

        optimizer.zero_grad()
        loss.backward() # Calculate Gradients
        optimizer.step() # Update Weights

        total_loss += loss.item()
        total_correct += get_num_correct(preds, labels)

    print("epoch:", epoch, "total_correct:", total_correct, "loss:", total_loss)

In [None]:
total_correct / len(train_set)

# Analyzing CNN Results - Building and Plotting a Confusion Matrix

In [None]:
len(train_set)

In [None]:
len(train_set.targets)

### Getting predictions for the entire training set

In [None]:
def get_all_preds(model, loader):
    all_preds = torch.tensor([])
    for batch in loader:
        images, labels = batch

        preds = model(images)
        all_preds = torch.cat(
            (all_preds, preds)
            ,dim=0
        )
    return all_preds

In [None]:
prediction_loader = torch.utils.data.DataLoader(train_set, batch_size=10000)
train_preds = get_all_preds(network, prediction_loader)

In [None]:
train_preds.shape

In [None]:
print(train_preds.requires_grad)

In [None]:
train_preds.grad

In [None]:
train_preds.grad_fn

In [None]:
with torch.no_grad():
    prediction_loader = torch.utils.data.DataLoader(train_set, batch_size=10000)
    train_preds = get_all_preds(network, prediction_loader)

In [None]:
print(train_preds.requires_grad)

In [None]:
train_preds.grad

In [None]:
train_preds.grad_fn

In [None]:
preds_correct = get_num_correct(train_preds, train_set.targets)

print('total correct:', preds_correct)
print('accuracy:', preds_correct / len(train_set))

### Building a confusion matrix

In [None]:
train_set.targets

In [None]:
train_preds.argmax(dim=1)

In [None]:
stacked = torch.stack(
    (
        train_set.targets
        ,train_preds.argmax(dim=1)
    )
    ,dim=1
)

In [None]:
stacked.shape

In [None]:
stacked

In [None]:
stacked[0].tolist()

In [None]:
cmt = torch.zeros(10,10, dtype=torch.int32)
cmt

In [None]:
for p in stacked:
    tl, pl = p.tolist()
    cmt[tl, pl] = cmt[tl, pl] + 1

In [None]:
cmt

### Plotting a confusion matrix

In [None]:
import matplotlib.pyplot as plt

from sklearn.metrics import confusion_matrix
from resources.plotcm import plot_confusion_matrix

In [None]:
cm = confusion_matrix(train_set.targets, train_preds.argmax(dim=1))
print(type(cm))
cm

In [None]:
plt.figure(figsize=(10,10))
plot_confusion_matrix(cmt, train_set.classes)

# Using TensorBoard with PyTorch - Deep Learning Metrics

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth=120) # Display options for output
torch.set_grad_enabled(True) # Already on by default

from torch.utils.tensorboard import SummaryWriter

In [None]:
print(torch.__version__)
print(torchvision.__version__)

In [None]:
def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

In [None]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12 * 4 * 4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        
    def forward(self, t):
       
        t = F.relu(self.conv1(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        t = F.relu(self.conv2(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        t = t.flatten(start_dim=1)
        t = F.relu(self.fc1(t))
        
        t = F.relu(self.fc2(t))
        
        t = self.out(t)
        
        return t

In [None]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

In [None]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size=100, shuffle=True)

### Starting out with TensorBoard (Network Graph and Images)

In [None]:
tb = SummaryWriter()

network = Network()
images, labels = next(iter(train_loader))
grid = torchvision.utils.make_grid(images)

tb.add_image('images', grid)
tb.add_graph(network, images)
tb.close()

### The Training Loop Review

In [None]:
network = Network()
train_loader = torch.utils.data.DataLoader(train_set, batch_size=1000, shuffle=True)
optimizer = optim.Adam(network.parameters(), lr=0.01)

for epoch in range(1):
    
    total_loss = 0
    total_correct = 0
    
    for batch in train_loader: # Get Batch
        images, labels = batch 

        preds = network(images) # Pass Batch
        loss = F.cross_entropy(preds, labels) # Calculate Loss

        optimizer.zero_grad()
        loss.backward() # Calculate Gradients
        optimizer.step() # Update Weights

        total_loss += loss.item()
        total_correct += get_num_correct(preds, labels)

    print("epoch", epoch, "total_correct:", total_correct, "loss:", total_loss)

### Training Loop with TensorBoard

In [None]:
network = Network()
train_loader = torch.utils.data.DataLoader(train_set, batch_size=1000, shuffle=True)
optimizer = optim.Adam(network.parameters(), lr=0.01)

images, labels = next(iter(train_loader))
grid = torchvision.utils.make_grid(images)

tb = SummaryWriter()
tb.add_image('images', grid)
tb.add_graph(network, images)

for epoch in range(1):
    
    total_loss = 0
    total_correct = 0
    
    for batch in train_loader: # Get Batch
        images, labels = batch 

        preds = network(images) # Pass Batch
        loss = F.cross_entropy(preds, labels) # Calculate Loss

        optimizer.zero_grad()
        loss.backward() # Calculate Gradients
        optimizer.step() # Update Weights

        total_loss += loss.item()
        total_correct += get_num_correct(preds, labels)
    
    tb.add_scalar('Loss', total_loss, epoch)
    tb.add_scalar('Number Correct', total_correct, epoch)
    tb.add_scalar('Accuracy', total_correct / len(train_set), epoch)
    
    tb.add_histogram('conv1.bias', network.conv1.bias, epoch)
    tb.add_histogram('conv1.weight', network.conv1.weight, epoch)
    tb.add_histogram('conv1.weight.grad', network.conv1.weight.grad, epoch)
    
    print("epoch", epoch, "total_correct:", total_correct, "loss:", total_loss)
    
tb.close()

#  CNN Training Hyperparameters - Nerual Networks

In [None]:
network = Network()
train_loader = torch.utils.data.DataLoader(train_set, batch_size=1000, shuffle=True)
optimizer = optim.Adam(network.parameters(), lr=0.01)

images, labels = next(iter(train_loader))
grid = torchvision.utils.make_grid(images)

tb = SummaryWriter()
tb.add_image('images', grid)
tb.add_graph(network, images)

for epoch in range(1):
    
    total_loss = 0
    total_correct = 0
    
    for batch in train_loader: # Get Batch
        images, labels = batch 

        preds = network(images) # Pass Batch
        loss = F.cross_entropy(preds, labels) # Calculate Loss

        optimizer.zero_grad()
        loss.backward() # Calculate Gradients
        optimizer.step() # Update Weights

        total_loss += loss.item()
        total_correct += get_num_correct(preds, labels)
    
    tb.add_scalar('Loss', total_loss, epoch)
    tb.add_scalar('Number Correct', total_correct, epoch)
    tb.add_scalar('Accuracy', total_correct / len(train_set), epoch)
    
    #tb.add_histogram('conv1.bias', network.conv1.bias, epoch)
    #tb.add_histogram('conv1.weight', network.conv1.weight, epoch)
    #tb.add_histogram('conv1.weight.grad', network.conv1.weight.grad, epoch)
    
    for name, weight in network.named_parameters():
        tb.add_histogram(name, weight, epoch)
        tb.add_histogram(f'{name}.grad', weight.grad, epoch)
    
    print("epoch", epoch, "total_correct:", total_correct, "loss:", total_loss)
    
tb.close()

In [None]:
for name, weight in network.named_parameters():
    print(name, weight.shape)

In [None]:
for name, weight in network.named_parameters():
    print(f'{name}.grad', weight.grad.shape)

### Parameterize Hyperparameters

In [None]:
batch_size = 1000
lr = 0.01

network = Network()
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size)
optimizer = optim.Adam(network.parameters(), lr=lr)

images, labels = next(iter(train_loader))
grid = torchvision.utils.make_grid(images)

comment = f' batch_size={batch_size} lr={lr}'
tb = SummaryWriter(comment=comment)
tb.add_image('images', grid)
tb.add_graph(network, images)

for epoch in range(1):
    total_loss = 0
    total_correct = 0
    for batch in train_loader:
        images, labels = batch # Get Batch
        preds = network(images) # Pass Batch
        loss = F.cross_entropy(preds, labels) # Calculate Loss
        optimizer.zero_grad() # Zero Gradients
        loss.backward() # Calculate Gradients
        optimizer.step() # Update Weights

        total_loss += loss.item() * batch_size
        total_correct += get_num_correct(preds, labels)
    
    tb.add_scalar('Loss', total_loss, epoch)
    tb.add_scalar('Number Correct', total_correct, epoch)
    tb.add_scalar('Accuracy', total_correct / len(train_set), epoch)
    
    for name, param in network.named_parameters():
        tb.add_histogram(name, param, epoch)
        tb.add_histogram(f'{name}.grad', param.grad, epoch)
    
    print("epoch", epoch, "total_correct:", total_correct, "loss:", total_loss)  
tb.close()

### Iterating on Parameter Values Part 1

In [None]:
batch_size_list = [1000, 10000]
lr_list = [.01]

for batch_size in batch_size_list:
    for lr in lr_list:
        network = Network()
        train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size)
        optimizer = optim.Adam(network.parameters(), lr=lr)

        images, labels = next(iter(train_loader))
        grid = torchvision.utils.make_grid(images)

        comment=f' batch_size={batch_size} lr={lr}'
        tb = SummaryWriter(comment=comment)
        tb.add_image('images', grid)
        tb.add_graph(network, images)

        for epoch in range(1):
            total_loss = 0
            total_correct = 0
            for batch in train_loader:
                images, labels = batch # Get Batch
                preds = network(images) # Pass Batch
                loss = F.cross_entropy(preds, labels) # Calculate Loss
                optimizer.zero_grad() # Zero Gradients
                loss.backward() # Calculate Gradients
                optimizer.step() # Update Weights

                total_loss += loss.item() * batch_size
                total_correct += get_num_correct(preds, labels)

            tb.add_scalar('Loss', total_loss, epoch)
            tb.add_scalar('Number Correct', total_correct, epoch)
            tb.add_scalar('Accuracy', total_correct / len(train_set), epoch)

            for name, param in network.named_parameters():
                tb.add_histogram(name, param, epoch)
                tb.add_histogram(f'{name}.grad', param.grad, epoch)

            print("epoch", epoch, "total_correct:", total_correct, "loss:", total_loss)  
        tb.close()

### Iterating on Parameter Values Part 2

In [None]:
from itertools import product

In [None]:
parameters = dict(
    lr = [.01, .001]
    ,batch_size = [100, 1000]
    ,shuffle = [False]
)

In [None]:
param_values = [v for v in parameters.values()]
param_values

In [None]:
for lr, batch_size, shuffle in product(*param_values): 
    print (lr, batch_size, shuffle)

In [None]:
for lr, batch_size, shuffle in product(*param_values): 
    comment = f' batch_size={batch_size} lr={lr} shuffle={shuffle}'
    network = Network()
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=shuffle)
    optimizer = optim.Adam(network.parameters(), lr=lr)
    images, labels = next(iter(train_loader))
    grid = torchvision.utils.make_grid(images)
    tb = SummaryWriter(comment=comment)
    tb.add_image('images', grid)
    tb.add_graph(network, images)
    for epoch in range(1):
        total_loss = 0
        total_correct = 0
        for batch in train_loader:
            images, labels = batch # Get Batch
            preds = network(images) # Pass Batch
            loss = F.cross_entropy(preds, labels) # Calculate Loss
            optimizer.zero_grad() # Zero Gradients
            loss.backward() # Calculate Gradients
            optimizer.step() # Update Weights

            total_loss += loss.item() * batch_size
            total_correct += get_num_correct(preds, labels)

        tb.add_scalar('Loss', total_loss, epoch)
        tb.add_scalar('Number Correct', total_correct, epoch)
        tb.add_scalar('Accuracy', total_correct / len(train_set), epoch)

        for name, param in network.named_parameters():
            tb.add_histogram(name, param, epoch)
            tb.add_histogram(f'{name}.grad', param.grad, epoch)

        print("epoch", epoch, "total_correct:", total_correct, "loss:", total_loss)  
    tb.close()

# Stack vs Concat in PyTorch, TensorFlow & NumPy

Link: https://deeplizard.com/learn/video/kF2AlpykJGY

Adding a dimension

In [None]:
import torch

In [None]:
t1 = torch.tensor([1,1,1])

In [None]:
t1.unsqueeze(dim=0)

In [None]:
t1.unsqueeze(dim=1)

In [None]:
print(t1.shape)
print(t1.unsqueeze(dim=0).shape)
print(t1.unsqueeze(dim=1).shape)

## PyTorch: Stack vs Cat

In [None]:
import torch

In [None]:
t1 = torch.tensor([1,1,1])
t2 = torch.tensor([2,2,2])
t3 = torch.tensor([3,3,3])

In [None]:
torch.cat(
    (t1,t2,t3)
    ,dim=0
)

In [None]:
torch.stack(
    (t1,t2,t3)
    ,dim=0
)

In [None]:
torch.cat(
    (
         t1.unsqueeze(0)
        ,t2.unsqueeze(0)
        ,t3.unsqueeze(0)
    )
    ,dim=0
)

In [None]:
torch.stack(
    (t1,t2,t3)
    ,dim=1
)

In [None]:
torch.cat(
    ( 
        t1.unsqueeze(1)
        ,t2.unsqueeze(1)
        ,t3.unsqueeze(1)
    )
    ,dim=1
)

### Stacking along the second axis

In [None]:
import torch
t1 = torch.tensor([1,1,1])
t2 = torch.tensor([2,2,2])
t3 = torch.tensor([3,3,3])

In [None]:
t1.unsqueeze(1)

In [None]:
t2.unsqueeze(1)

In [None]:
t3.unsqueeze(1)

## TensorFlow: Stack vs Concat

In [None]:
# pip install tensorflow==2.0.0-rc1
import tensorflow as tf

In [None]:
t1 = tf.constant([1,1,1])
t2 = tf.constant([2,2,2])
t3 = tf.constant([3,3,3])

In [None]:
tf.concat(
    (t1,t2,t3)
    ,axis=0
)

In [None]:
tf.stack(
    (t1,t2,t3)
    ,axis=0
)

In [None]:
tf.concat(
    (
         tf.expand_dims(t1, 0)
        ,tf.expand_dims(t2, 0)
        ,tf.expand_dims(t3, 0)
    )
    ,axis=0
)

In [None]:
tf.stack(
    (t1,t2,t3)
    ,axis=1
)

In [None]:
tf.concat(
    (
         tf.expand_dims(t1, 1)
        ,tf.expand_dims(t2, 1)
        ,tf.expand_dims(t3, 1)
    )    
    ,axis=1
)

## NumPy: Stack vs Concatenate

In [None]:
import numpy as np

In [None]:
t1 = np.array([1,1,1])
t2 = np.array([2,2,2])
t3 = np.array([3,3,3])

In [None]:
np.concatenate(
    (t1,t2,t3)
    ,axis=0
)

In [None]:
np.stack(
    (t1,t2,t3)
    ,axis=0
)

In [None]:
np.concatenate(
    (
         np.expand_dims(t1, 0)
        ,np.expand_dims(t2, 0)
        ,np.expand_dims(t3, 0)
    )
    ,axis=0
)

In [None]:
np.stack(
    (t1,t2,t3)
    ,axis=1
)

In [None]:
np.concatenate(
    (
         np.expand_dims(t1, 1)
        ,np.expand_dims(t2, 1)
        ,np.expand_dims(t3, 1)
    )
    ,axis=1
)

## Real World Example

### Joining images into a single batch

In [None]:
import torch
t1 = torch.zeros(3,28,28)
t2 = torch.zeros(3,28,28)
t3 = torch.zeros(3,28,28)
torch.stack(
    (t1,t2,t3)
    ,dim=0
).shape

### Joining batches into a single batch

In [None]:
import torch
t1 = torch.zeros(1,3,28,28)
t2 = torch.zeros(1,3,28,28)
t3 = torch.zeros(1,3,28,28)
torch.cat(
    (t1,t2,t3)
    ,dim=0
).shape

### Joining images with an existing batch

In [None]:
import torch
batch = torch.zeros(3,3,28,28)
t1 = torch.zeros(3,28,28)
t2 = torch.zeros(3,28,28)
t3 = torch.zeros(3,28,28)

torch.cat(
    (
        batch
        ,torch.stack(
            (t1,t2,t3)
            ,dim=0
        )
    )
    ,dim=0
).shape

In [None]:
import torch
batch = torch.zeros(3,3,28,28)
t1 = torch.zeros(3,28,28)
t2 = torch.zeros(3,28,28)
t3 = torch.zeros(3,28,28)

torch.cat(
    (
        batch
        ,t1.unsqueeze(0)
        ,t2.unsqueeze(0)
        ,t3.unsqueeze(0)
    )
    ,dim=0
).shape

# Looking at torch.autograd

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth=120) # Display options for output
torch.set_grad_enabled(True) # Already on by default

In [None]:
a = nn.Parameter(torch.ones(1,3))
b = nn.Parameter(torch.tensor([1.,2,3]))
c = a + b[0] * 2
d = c.sum()

In [None]:
a

In [None]:
b

In [None]:
c

In [None]:
d

In [None]:
d.backward()

In [None]:
a.grad

In [None]:
b.grad

In [None]:
c.grad

In [None]:
d.grad

# Training Loop Run Builder - Neural Network Experimentation

In [None]:
from collections import OrderedDict
from collections import namedtuple
from itertools import product

In [None]:
class RunBuilder():
    @staticmethod
    def get_runs(params):

        Run = namedtuple('Run', params.keys())

        runs = []
        for v in product(*params.values()):
            runs.append(Run(*v))

        return runs

In [None]:
params = OrderedDict(
    lr = [.01, .001]
    ,batch_size = [1000, 10000]
)

In [None]:
runs = RunBuilder.get_runs(params)
runs

In [None]:
run = runs[0]
run

In [None]:
print(run.lr, run.batch_size)

In [None]:
for run in runs:
    print(run, run.lr, run.batch_size)

In [None]:
params = OrderedDict(
    lr = [.01, .001]
    ,batch_size = [1000, 10000]
    ,device = ["cuda", "cpu"]
)

In [None]:
runs = RunBuilder.get_runs(params)
runs

## How to build the RunBuilder

In [None]:
params = OrderedDict(
    lr = [.01, .001]
    ,batch_size = [1000, 10000]
)

In [None]:
params.keys()

In [None]:
params.values()

In [None]:
Run = namedtuple('Run', params.keys())

In [None]:
runs = []
for v in product(*params.values()):
    runs.append(Run(*v))
runs

In [None]:
for run in RunBuilder.get_runs(params):
    comment = f'-{run}'
    print(f'comment:{comment} lr={run.lr}, batch_size={run.batch_size}')

# CNN Training Loop Refactoring - Simultaneous Hyperparameter Testing

Link: https://deeplizard.com/learn/video/ycxulUVoNbk

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from IPython.display import display, clear_output
import pandas as pd
import time
import json

from itertools import product
from collections import namedtuple
from collections import OrderedDict

In [None]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        
    def forward(self, t):
       
        t = F.relu(self.conv1(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        t = F.relu(self.conv2(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        t = t.flatten(start_dim=1)
        t = F.relu(self.fc1(t))
        t = F.relu(self.fc2(t))
        t = self.out(t)
        
        return t

In [None]:
class RunBuilder():
    @staticmethod
    def get_runs(params):

        Run = namedtuple('Run', params.keys())

        runs = []
        for v in product(*params.values()):
            runs.append(Run(*v))

        return runs

In [None]:
class RunManager():
    def __init__(self):
        
        self.epoch_count = 0
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        self.epoch_start_time = None
        
        self.run_params = None
        self.run_count = 0
        self.run_data = []
        self.run_start_time = None
        
        self.network = None
        self.loader = None
        self.tb = None
        
    def begin_run(self, run, network, loader):
        
        self.run_start_time = time.time()

        self.run_params = run
        self.run_count += 1
        
        self.network = network
        self.loader = loader
        self.tb = SummaryWriter(comment=f'-{run}')
        
        images, labels = next(iter(self.loader))
        grid = torchvision.utils.make_grid(images)

        self.tb.add_image('images', grid)
        self.tb.add_graph(
             self.network
            ,images.to(getattr(run, 'device', 'cpu'))
        )
        
    def end_run(self):
        self.tb.close()
        self.epoch_count = 0   

    def begin_epoch(self):
        self.epoch_start_time = time.time()
        
        self.epoch_count += 1
        self.epoch_loss = 0
        self.epoch_num_correct = 0

    def end_epoch(self):
        
        epoch_duration = time.time() - self.epoch_start_time
        run_duration = time.time() - self.run_start_time
        
        loss = self.epoch_loss / len(self.loader.dataset)
        accuracy = self.epoch_num_correct / len(self.loader.dataset)
                
        self.tb.add_scalar('Loss', loss, self.epoch_count)
        self.tb.add_scalar('Accuracy', accuracy, self.epoch_count)
        
        for name, param in self.network.named_parameters():
            self.tb.add_histogram(name, param, self.epoch_count)
            self.tb.add_histogram(f'{name}.grad', param.grad, self.epoch_count)
        
        results = OrderedDict()
        results["run"] = self.run_count
        results["epoch"] = self.epoch_count
        results['loss'] = loss
        results["accuracy"] = accuracy
        results['epoch duration'] = epoch_duration
        results['run duration'] = run_duration
        for k,v in self.run_params._asdict().items(): results[k] = v
        self.run_data.append(results)
        
        df = pd.DataFrame.from_dict(self.run_data, orient='columns')
        
        clear_output(wait=True)
        display(df)
        
    def track_loss(self, loss, batch):
        self.epoch_loss += loss.item() * batch[0].shape[0]
        
    def track_num_correct(self, preds, labels):
        self.epoch_num_correct += self._get_num_correct(preds, labels)
    
    def _get_num_correct(self, preds, labels):
        return preds.argmax(dim=1).eq(labels).sum().item()
    
    def save(self, fileName):
        
        pd.DataFrame.from_dict(
            self.run_data
            ,orient='columns'
        ).to_csv(f'{fileName}.csv')
        
        with open(f'{fileName}.json', 'w', encoding='utf-8') as f:
            json.dump(self.run_data, f, ensure_ascii=False, indent=4)

In [None]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

In [None]:
params = OrderedDict(
    lr = [.01]
    ,batch_size = [1000]
    ,shuffle = [True]
)
m = RunManager()
for run in RunBuilder.get_runs(params):

    network = Network()
    loader = DataLoader(train_set, batch_size=run.batch_size, shuffle=run.shuffle)
    optimizer = optim.Adam(network.parameters(), lr=run.lr)
    
    m.begin_run(run, network, loader)
    for epoch in range(1):
        m.begin_epoch()
        for batch in loader:
            
            images, labels = batch
            preds = network(images) # Pass Batch
            loss = F.cross_entropy(preds, labels) # Calculate Loss
            optimizer.zero_grad() # Zero Gradients
            loss.backward() # Calculate Gradients
            optimizer.step() # Update Weights
            
            m.track_loss(loss, batch)
            m.track_num_correct(preds, labels)  
        m.end_epoch()
    m.end_run()
m.save('results')

# PyTorch DataLoader num_workers Test

Link: https://deeplizard.com/learn/video/kWVgvsejXsE

In [None]:
params = OrderedDict(
    lr = [.01]
    ,batch_size = [100]#, 1000, 10000]
    ,num_workers = [0]#, 1, 2, 4, 8, 16]
    #,shuffle = [True, False]
)
m = RunManager()
for run in RunBuilder.get_runs(params):

    network = Network()
    loader = DataLoader(train_set, batch_size=run.batch_size, num_workers=run.num_workers)
    optimizer = optim.Adam(network.parameters(), lr=run.lr)
    
    m.begin_run(run, network, loader)
    for epoch in range(1):
        m.begin_epoch()
        for batch in loader:
            
            images, labels = batch
            preds = network(images) # Pass Batch
            loss = F.cross_entropy(preds, labels) # Calculate Loss
            optimizer.zero_grad() # Zero Gradients
            loss.backward() # Calculate Gradients
            optimizer.step() # Update Weights
            
            m.track_loss(loss, batch)
            m.track_num_correct(preds, labels)  
        m.end_epoch()
    m.end_run()
m.save('results')

# PyTorch and the GPU: CUDA

Link: https://deeplizard.com/learn/video/Bs1mdHZiAS8

## Moving to GPU

In [None]:
t = torch.ones(1,1,28,28)
network = Network()

In [None]:
t = t.cuda()
network = network.cuda()

In [None]:
gpu_pred = network(t)
gpu_pred.device

## Moving to CPU

In [None]:
t = t.cpu()
network = network.cpu()

In [None]:
cpu_pred = network(t)
cpu_pred.device

## Working with Tensors

In [None]:
t1 = torch.tensor([
    [1,2],
    [3,4]
])

t2 = torch.tensor([
    [5,6],
    [7,8]
])

In [None]:
t1.device, t2.device

In [None]:
t1 = t1.to('cuda')
t1.device

In [None]:
try: t1 + t2
except Exception as e: print(e)

In [None]:
try: t2 + t1
except Exception as e: print(e)

In [None]:
t2 = t2.to('cuda')

In [None]:
t1 + t2

## Working with Neural Network Modules

In [None]:
network = Network()

In [None]:
for name, param in network.named_parameters():
    print(name, '\t\t\t', param.shape)

In [None]:
for n, p in network.named_parameters():
    print(p.device, '', n)

In [None]:
network.to('cuda')

In [None]:
for n, p in network.named_parameters():
    print(p.device, '', n)

In [None]:
sample = torch.ones(1,1,28,28)
sample.shape

In [None]:
try: network(sample)
except Exception as e: print(e)

In [None]:
try:
    pred = network(sample.to('cuda'))
    print(pred)
except Exception as e:
    print(e)

## Checking for GPU

In [None]:
torch.cuda.is_available()

## Using the GPU: Test

In [None]:
params = OrderedDict(
    lr = [.01]
    ,batch_size = [20000]
    , num_workers = [1]
    , device = ['cuda', 'cpu']
)
m = RunManager()
for run in RunBuilder.get_runs(params):

    device = torch.device(run.device)
    network = Network().to(device)
    loader = DataLoader(train_set, batch_size=run.batch_size, num_workers=run.num_workers)
    optimizer = optim.Adam(network.parameters(), lr=run.lr)
    
    m.begin_run(run, network, loader)
    for epoch in range(1):
        m.begin_epoch()
        for batch in loader:
            
            images = batch[0].to(device)
            labels = batch[1].to(device)
            preds = network(images) # Pass Batch
            loss = F.cross_entropy(preds, labels) # Calculate Loss
            optimizer.zero_grad() # Zero Gradients
            loss.backward() # Calculate Gradients
            optimizer.step() # Update Weights
            
            m.track_loss(loss, batch)
            m.track_num_correct(preds, labels)
        m.end_epoch()
    m.end_run()
m.save('results')

In [None]:
pd.DataFrame.from_dict(m.run_data).sort_values('epoch duration')

# Dataset Normalization

Link: https://deeplizard.com/learn/video/lu7TCu7HeYc

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

from torch.utils.data import DataLoader
import matplotlib.pyplot as plt

In [None]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
        # normalize
    ])
)

## Easy way

In [None]:
loader = DataLoader(train_set, batch_size=len(train_set), num_workers=1)
data = next(iter(loader))
data[0].mean(), data[0].std()

## Harder way

In [None]:
loader = DataLoader(train_set, batch_size=1000, num_workers=1)
num_of_pixels = len(train_set) * 28 * 28

total_sum = 0
for batch in loader: total_sum += batch[0].sum()
mean = total_sum / num_of_pixels

sum_of_squared_error = 0
for batch in loader: sum_of_squared_error += ((batch[0] - mean).pow(2)).sum()
std = torch.sqrt(sum_of_squared_error / num_of_pixels)

mean, std

## Plotting the Values

In [None]:
plt.hist(data[0].flatten())
plt.axvline(data[0].mean())

## Using the `mean` and `std` values

Note that we use the same mean and std values for training, validation, and testing.

In [None]:
train_set_normal = torchvision.datasets.FashionMNIST(
    root='./data'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
          transforms.ToTensor()
        , transforms.Normalize(mean, std)
    ])
)

In [None]:
loader = DataLoader(train_set_normal, batch_size=len(train_set), num_workers=1)
data = next(iter(loader))
data[0].mean(), data[0].std()

In [None]:
plt.hist(data[0].flatten())
plt.axvline(data[0].mean())

In [None]:
trainsets = {
    'not_normal': train_set
    ,'normal': train_set_normal
}

In [None]:
params = OrderedDict(
    lr = [.01]
    , batch_size = [1000]
    , num_workers = [1]
    , device = ['cuda']
    , trainset = ['not_normal', 'normal']
)
m = RunManager()
for run in RunBuilder.get_runs(params):

    device = torch.device(run.device)
    network = Network().to(device)
    loader = DataLoader(trainsets[run.trainset], batch_size=run.batch_size, num_workers=run.num_workers)
    optimizer = optim.Adam(network.parameters(), lr=run.lr)
    
    m.begin_run(run, network, loader)
    for epoch in range(1):
        m.begin_epoch()
        for batch in loader:
            
            images = batch[0].to(device)
            labels = batch[1].to(device)
            preds = network(images) # Pass Batch
            loss = F.cross_entropy(preds, labels) # Calculate Loss
            optimizer.zero_grad() # Zero Gradients
            loss.backward() # Calculate Gradients
            optimizer.step() # Update Weights
            
            m.track_loss(loss, batch)
            m.track_num_correct(preds, labels)
        m.end_epoch()
    m.end_run()
m.save('results')

In [None]:
pd.DataFrame.from_dict(m.run_data).sort_values('accuracy', ascending=False)

# PyTorch Sequential Models - Neural Networks Made Easy

Link: https://deeplizard.com/learn/video/bH9Nkg7G8S0

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision
import torchvision.transforms as transforms

import matplotlib.pyplot as plt
import math

from collections import OrderedDict

torch.set_printoptions(linewidth=150)

In [None]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

In [None]:
image, label = train_set[0]
image.shape

In [None]:
plt.imshow(image.squeeze(), cmap='gray')

In [None]:
train_set.classes

In [None]:
in_features = image.numel()
in_features

In [None]:
out_features = math.floor(in_features / 2)
out_features

In [None]:
out_classes = len(train_set.classes)
out_classes

In [None]:
network1 = nn.Sequential(
     nn.Flatten(start_dim=1)
    ,nn.Linear(in_features, out_features)
    ,nn.Linear(out_features, out_classes)
)

network1

In [None]:
network1[1]

In [None]:
image = image.unsqueeze(0)
image.shape

In [None]:
network1(image)

In [None]:
layers = OrderedDict([
     ('flat', nn.Flatten(start_dim=1))
    ,('hidden', nn.Linear(in_features, out_features))
    ,('output', nn.Linear(out_features, out_classes))
])

network2 = nn.Sequential(layers)
network2

In [None]:
network2(image)

In [None]:
torch.manual_seed(50)
network1 = nn.Sequential(
     nn.Flatten(start_dim=1)
    ,nn.Linear(in_features, out_features)
    ,nn.Linear(out_features, out_classes)
)

torch.manual_seed(50)
layers = OrderedDict([
     ('flat', nn.Flatten(start_dim=1))
    ,('hidden', nn.Linear(in_features, out_features))
    ,('output', nn.Linear(out_features, out_classes))
])

network2 = nn.Sequential(layers)

In [None]:
network1(image), network2(image)

In [None]:
torch.manual_seed(50)
network3 = nn.Sequential()
network3.add_module('flat', nn.Flatten(start_dim=1))
network3.add_module('hidden', nn.Linear(in_features, out_features))
network3.add_module('output', nn.Linear(out_features, out_classes))
network3

In [None]:
network1(image), network2(image), network3(image)

## Building a Network Class

In [None]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        
    def forward(self, t):
       
        t = F.relu(self.conv1(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        t = F.relu(self.conv2(t))
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        t = t.flatten(start_dim=1)
        t = F.relu(self.fc1(t))
        t = F.relu(self.fc2(t))
        t = self.out(t)
        
        return t

In [None]:
torch.manual_seed(50)
network = Network()
network

### Building the Same Network Using the Sequential Class

In [None]:
torch.manual_seed(50)
sequential1 = nn.Sequential(
      nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
    , nn.ReLU()
    , nn.MaxPool2d(kernel_size=2, stride=2)
    , nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
    , nn.ReLU()
    , nn.MaxPool2d(kernel_size=2, stride=2)
    , nn.Flatten(start_dim=1)  
    , nn.Linear(in_features=12*4*4, out_features=120)
    , nn.ReLU()
    , nn.Linear(in_features=120, out_features=60)
    , nn.ReLU()
    , nn.Linear(in_features=60, out_features=10)
)

In [None]:
torch.manual_seed(50)
layers = OrderedDict([
     ('conv1', nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5))
    ,('relu1', nn.ReLU())
    ,('maxpool1', nn.MaxPool2d(kernel_size=2, stride=2))
    
    ,('conv2', nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5))
    ,('relu2', nn.ReLU())
    ,('maxpool2', nn.MaxPool2d(kernel_size=2, stride=2))
    
    ,('flatten', nn.Flatten(start_dim=1)  )
    ,('fc1', nn.Linear(in_features=12*4*4, out_features=120))
    ,('relu3', nn.ReLU())
    
    ,('fc2', nn.Linear(in_features=120, out_features=60))
    ,('relu4', nn.ReLU())
    ,('out', nn.Linear(in_features=60, out_features=10))
])

sequential2 = nn.Sequential(layers)

In [None]:
torch.manual_seed(50)
sequential3 = nn.Sequential()
sequential3.add_module('conv1', nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5))
sequential3.add_module('relu1', nn.ReLU())
sequential3.add_module('maxpool1', nn.MaxPool2d(kernel_size=2, stride=2))
    
sequential3.add_module('conv2', nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5))
sequential3.add_module('relu2', nn.ReLU())
sequential3.add_module('maxpool2', nn.MaxPool2d(kernel_size=2, stride=2))
    
sequential3.add_module('flatten', nn.Flatten(start_dim=1))
sequential3.add_module('fc1', nn.Linear(in_features=12*4*4, out_features=120))
sequential3.add_module('relu3', nn.ReLU())
    
sequential3.add_module('fc2', nn.Linear(in_features=120, out_features=60))
sequential3.add_module('relu4', nn.ReLU())
sequential3.add_module('out', nn.Linear(in_features=60, out_features=10))

In [None]:
sequential1

In [None]:
sequential2

In [None]:
sequential3

In [None]:
network(image), sequential1(image), sequential2(image), sequential3(image)

## `RunManager` Updates for Next Lesson
(Removing TensorBoard comment and graph)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from IPython.display import display, clear_output
import pandas as pd
import time
import json

from itertools import product
from collections import namedtuple
from collections import OrderedDict

In [None]:
class RunBuilder():
    @staticmethod
    def get_runs(params):

        Run = namedtuple('Run', params.keys())

        runs = []
        for v in product(*params.values()):
            runs.append(Run(*v))

        return runs

In [None]:
class RunManager():
    def __init__(self):
        
        self.epoch_count = 0
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        self.epoch_start_time = None
        
        self.run_params = None
        self.run_count = 0
        self.run_data = []
        self.run_start_time = None
        
        self.network = None
        self.loader = None
        self.tb = None
        
    def begin_run(self, run, network, loader):
        
        self.run_start_time = time.time()

        self.run_params = run
        self.run_count += 1
        
        self.network = network
        self.loader = loader
#         self.tb = SummaryWriter(comment=f'-{run}')
        self.tb = SummaryWriter()
        
        images, labels = next(iter(self.loader))
        grid = torchvision.utils.make_grid(images)

#         self.tb.add_image('images', grid)
#         self.tb.add_graph(
#              self.network
#             ,images.to(getattr(run, 'device', 'cpu'))
#         )
        
    def end_run(self):
        self.tb.close()
        self.epoch_count = 0   

    def begin_epoch(self):
        self.epoch_start_time = time.time()
        
        self.epoch_count += 1
        self.epoch_loss = 0
        self.epoch_num_correct = 0

    def end_epoch(self):
        
        epoch_duration = time.time() - self.epoch_start_time
        run_duration = time.time() - self.run_start_time
        
        loss = self.epoch_loss / len(self.loader.dataset)
        accuracy = self.epoch_num_correct / len(self.loader.dataset)
                
        self.tb.add_scalar('Loss', loss, self.epoch_count)
        self.tb.add_scalar('Accuracy', accuracy, self.epoch_count)
        
        for name, param in self.network.named_parameters():
            self.tb.add_histogram(name, param, self.epoch_count)
            self.tb.add_histogram(f'{name}.grad', param.grad, self.epoch_count)
        
        results = OrderedDict()
        results["run"] = self.run_count
        results["epoch"] = self.epoch_count
        results['loss'] = loss
        results["accuracy"] = accuracy
        results['epoch duration'] = epoch_duration
        results['run duration'] = run_duration
        for k,v in self.run_params._asdict().items(): results[k] = v
        self.run_data.append(results)
        
        df = pd.DataFrame.from_dict(self.run_data, orient='columns')
        
        clear_output(wait=True)
        display(df)
        
    def track_loss(self, loss, batch):
        self.epoch_loss += loss.item() * batch[0].shape[0]
        
    def track_num_correct(self, preds, labels):
        self.epoch_num_correct += self._get_num_correct(preds, labels)
    
    def _get_num_correct(self, preds, labels):
        return preds.argmax(dim=1).eq(labels).sum().item()
    
    def save(self, fileName):
        
        pd.DataFrame.from_dict(
            self.run_data
            ,orient='columns'
        ).to_csv(f'{fileName}.csv')
        
        with open(f'{fileName}.json', 'w', encoding='utf-8') as f:
            json.dump(self.run_data, f, ensure_ascii=False, indent=4)

# Using BatchNorm

Link: https://deeplizard.com/learn/video/bCQ2cNhUWQ8

In [None]:
torch.manual_seed(50)
network1 = nn.Sequential(
      nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
    , nn.ReLU()
    , nn.MaxPool2d(kernel_size=2, stride=2)
    , nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
    , nn.ReLU()
    , nn.MaxPool2d(kernel_size=2, stride=2)
    , nn.Flatten(start_dim=1)  
    , nn.Linear(in_features=12*4*4, out_features=120)
    , nn.ReLU()
    , nn.Linear(in_features=120, out_features=60)
    , nn.ReLU()
    , nn.Linear(in_features=60, out_features=10)
)

In [None]:
torch.manual_seed(50)
network2 = nn.Sequential(
      nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
    , nn.ReLU()
    , nn.MaxPool2d(kernel_size=2, stride=2)
    , nn.BatchNorm2d(6)
    , nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
    , nn.ReLU()
    , nn.MaxPool2d(kernel_size=2, stride=2)
    , nn.Flatten(start_dim=1)  
    , nn.Linear(in_features=12*4*4, out_features=120)
    , nn.ReLU()
    , nn.BatchNorm1d(120)
    , nn.Linear(in_features=120, out_features=60)
    , nn.ReLU()
    , nn.Linear(in_features=60, out_features=10)
)

In [None]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

In [None]:
loader = DataLoader(train_set, batch_size=len(train_set), num_workers=1)
data = next(iter(loader))
mean = data[0].mean()
std = data[0].std()
mean, std

In [None]:
train_set_normal = torchvision.datasets.FashionMNIST(
    root='./data'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
        ,transforms.Normalize(mean, std)
    ])
)

In [None]:
trainsets = {
    'not_normal': train_set
    ,'normal': train_set_normal
}

In [None]:
networks = {
    'no_batch_norm': network1
    ,'batch_norm': network2
}

In [None]:
params = OrderedDict(
    lr = [.01]
    , batch_size = [1000]
    , num_workers = [1]
    , device = ['cuda']
    , trainset = ['normal']
    , network = list(networks.keys())
)
m = RunManager()
for run in RunBuilder.get_runs(params):

    device = torch.device(run.device)
    network = networks[run.network].to(device)
    loader = DataLoader(trainsets[run.trainset], batch_size=run.batch_size, num_workers=run.num_workers)
    optimizer = optim.Adam(network.parameters(), lr=run.lr)
    
    m.begin_run(run, network, loader)
    for epoch in range(1):
        m.begin_epoch()
        for batch in loader:
            
            images = batch[0].to(device)
            labels = batch[1].to(device)
            preds = network(images) # Pass Batch
            loss = F.cross_entropy(preds, labels) # Calculate Loss
            optimizer.zero_grad() # Zero Gradients
            loss.backward() # Calculate Gradients
            optimizer.step() # Update Weights
            
            m.track_loss(loss, batch)
            m.track_num_correct(preds, labels)
        m.end_epoch()
    m.end_run()
m.save('results')

In [None]:
pd.DataFrame.from_dict(m.run_data).sort_values('accuracy', ascending=False)

# THE END OF PART 2

# Automation Code

In [None]:
%%script powershell
# automated script to removes the runs directory
if(test-path runs) { rm results.csv, results.json; ls runs; rm runs -recurse -force; }