<a href="https://colab.research.google.com/github/MengOonLee/BertelsmannAITrack/blob/Lesson05/NetworkArchitectures.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from torch import nn

In [0]:
class Network(nn.Module):
  def __init__(self):
    super().__init__()
    # Inputs to hidden layer linear transformation
    self.hidden = nn.Linear(784, 256)
    # Output layer, 10 units - one for each digit
    self.output = nn.Linear(256, 10)
    # Define sigmoid activation and softmax output
    self.sigmoid = nn.Sigmoid()
    self.softmax = nn.Softmax(dim=1)

  def forward(self, x):
    # Pass the input tensor through each of our operations
    x = self.hidden(x)
    x = self.sigmoid(x)
    x = self.output(x)
    x = self.softmax(x)
    return x

In [0]:
# Look at the network's text representation
model = Network()
model

Network(
  (hidden): Linear(in_features=784, out_features=256, bias=True)
  (output): Linear(in_features=256, out_features=10, bias=True)
  (sigmoid): Sigmoid()
  (softmax): Softmax(dim=1)
)

In [0]:
import torch.nn.functional as F

class Network(nn.Module):
  def __init__(self):
    super().__init__()
    # Inputs to hidden layer linear transformation
    self.hidden = nn.Linear(784, 256)
    # Output layer, 10 units - one for each digit
    self.output = nn.Linear(256, 10)

  def forward(self, x):
    # Hidden layer with sigmoid activation
    x = F.sigmoid(self.hidden(x))
    # Output layer with softmax activation
    x = F.softmax(self.output(x), dim=1)
    return x

In [0]:
model = Network()
model

Network(
  (hidden): Linear(in_features=784, out_features=256, bias=True)
  (output): Linear(in_features=256, out_features=10, bias=True)
)

In [0]:
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch import optim

In [0]:
# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,)),
                              ])

# Download and load the training data
trainset = datasets.MNIST("~/.pytorch/MNIST_data/", download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

In [0]:
# Build a feed-forward network
model = nn.Sequential(nn.Linear(784,128),
                      nn.ReLU(),
                      nn.Linear(128,64),
                      nn.ReLU(),
                      nn.Linear(64,10))
# Define the loss
criterion = nn.CrossEntropyLoss()
# Get our data
images, labels = next(iter(trainloader))
# Flatten images
images = images.view(images.shape[0],-1)
# Forward pass, get our logits
logits = model(images)
# Calculate the loss with the logits and the labels
loss = criterion(logits, labels)

print(loss)

tensor(2.3165, grad_fn=<NllLossBackward>)


In [16]:
x = torch.randn(2,2, requires_grad=True)
print(x)
print(x.grad)

tensor([[ 0.6539,  1.0537],
        [ 2.8084, -1.7500]], requires_grad=True)
None


In [17]:
y = x**2
print(y, "\n")
## grad_fn shows the function that generated this variable
print(y.grad_fn)

tensor([[0.4276, 1.1103],
        [7.8870, 3.0626]], grad_fn=<PowBackward0>) 

<PowBackward0 object at 0x7f67d26d0ac8>


In [18]:
z = y.mean()
print(z)
# backward method on a variable
z.backward()
print(x.grad)
print(x/2)

tensor(3.1219, grad_fn=<MeanBackward0>)
tensor([[ 0.3270,  0.5269],
        [ 1.4042, -0.8750]])
tensor([[ 0.3270,  0.5269],
        [ 1.4042, -0.8750]], grad_fn=<DivBackward0>)


In [3]:
# Build a feed-forward network
model = nn.Sequential(nn.Linear(784,128),
                      nn.ReLU(),
                      nn.Linear(128,64),
                      nn.ReLU(),
                      nn.Linear(64,10),
                      nn.LogSoftmax(dim=1))
# Define the loss
criterion = nn.NLLLoss()
# Get our data
images, labels = next(iter(trainloader))
# Flatten images
images = images.view(images.shape[0],-1)
# Forward pass, get our logits
logps = model(images)
# Calculate the loss with the logits and the labels
loss = criterion(logps, labels)

print("Before backward pass: \n", model[0].weight.grad)
loss.backward()
print("After backward pass: \n", model[0].weight.grad)

Before backward pass: 
 None
After backward pass: 
 tensor([[-0.0057, -0.0057, -0.0057,  ..., -0.0057, -0.0057, -0.0057],
        [-0.0017, -0.0017, -0.0017,  ..., -0.0017, -0.0017, -0.0017],
        [ 0.0033,  0.0033,  0.0033,  ...,  0.0033,  0.0033,  0.0033],
        ...,
        [-0.0067, -0.0067, -0.0067,  ..., -0.0067, -0.0067, -0.0067],
        [ 0.0027,  0.0027,  0.0027,  ...,  0.0027,  0.0027,  0.0027],
        [-0.0007, -0.0007, -0.0007,  ..., -0.0007, -0.0007, -0.0007]])


In [0]:
# Optimizers require the parameters to optimize and a learning rate
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [5]:
print("Initial weights -", model[0].weight)
images, labels = next(iter(trainloader))
images.resize_(64,784)
# Clear the gradients, do this because gradients are accumulated
optimizer.zero_grad()
# Forward pass, then backward pass, then update weights
output = model.forward(images)
loss = criterion(output, labels)
loss.backward()
print("Gradient -", model[0].weight.grad)

Initial weights - Parameter containing:
tensor([[-0.0290, -0.0058,  0.0170,  ..., -0.0247, -0.0344,  0.0207],
        [-0.0334,  0.0156,  0.0071,  ...,  0.0041, -0.0291,  0.0255],
        [ 0.0292,  0.0074,  0.0222,  ...,  0.0122,  0.0020, -0.0172],
        ...,
        [ 0.0181, -0.0189, -0.0020,  ..., -0.0180,  0.0259, -0.0039],
        [ 0.0346, -0.0135, -0.0160,  ...,  0.0183,  0.0242, -0.0307],
        [-0.0262,  0.0107, -0.0336,  ..., -0.0230, -0.0125,  0.0187]],
       requires_grad=True)
Gradient - tensor([[-3.4955e-03, -3.4955e-03, -3.4955e-03,  ..., -3.4955e-03,
         -3.4955e-03, -3.4955e-03],
        [-4.1204e-04, -4.1204e-04, -4.1204e-04,  ..., -4.1204e-04,
         -4.1204e-04, -4.1204e-04],
        [-8.5489e-04, -8.5489e-04, -8.5489e-04,  ..., -8.5489e-04,
         -8.5489e-04, -8.5489e-04],
        ...,
        [ 1.0793e-03,  1.0793e-03,  1.0793e-03,  ...,  1.0793e-03,
          1.0793e-03,  1.0793e-03],
        [-2.8738e-05, -2.8738e-05, -2.8738e-05,  ..., -2.8738e-

In [6]:
# Take an update step and get the new weights
optimizer.step()
print("Updated weights -", model[0].weight)

Updated weights - Parameter containing:
tensor([[-0.0289, -0.0058,  0.0171,  ..., -0.0247, -0.0343,  0.0207],
        [-0.0334,  0.0156,  0.0071,  ...,  0.0041, -0.0291,  0.0255],
        [ 0.0292,  0.0074,  0.0222,  ...,  0.0122,  0.0020, -0.0172],
        ...,
        [ 0.0181, -0.0189, -0.0020,  ..., -0.0180,  0.0259, -0.0039],
        [ 0.0346, -0.0135, -0.0160,  ...,  0.0183,  0.0242, -0.0307],
        [-0.0262,  0.0107, -0.0336,  ..., -0.0230, -0.0126,  0.0187]],
       requires_grad=True)
