In [1]:
import torch

# Naive Implementation of Feed Forward Layers

## Neural Network

In [2]:
# dimension
D = 3
K = 5

In [3]:
# specify the parameters of the neural network
## the first layer
W1 = torch.randn((K, D), requires_grad=True)
b1 = torch.randn((K, 1), requires_grad=True)

In [4]:
print(W1)

tensor([[-0.8932,  0.2623, -2.7496],
        [ 1.1011, -0.4415, -2.2481],
        [ 0.9644, -0.7362,  0.7305],
        [ 0.3782, -1.0921,  0.3289],
        [-0.0778, -1.0564, -2.0712]], requires_grad=True)


In [5]:
print(b1)

tensor([[-1.1006],
        [-0.3950],
        [ 0.3838],
        [-0.6522],
        [ 2.0503]], requires_grad=True)


In [6]:
## the second layer
W2 = torch.randn((K, K), requires_grad=True)
b2 = torch.randn((K, 1), requires_grad=True)

In [7]:
## the third layer
W3 = torch.randn((K, K), requires_grad=True)
b3 = torch.randn((K, 1), requires_grad=True)

In [8]:
## the last layer
W4 = torch.randn((1, K), requires_grad=True)
b4 = torch.randn((1,), requires_grad=True)

In [9]:
def sigmoid(x):
    return 1 / (1 + torch.exp(-x))

## Forward

In [18]:
# input
x = torch.randn((D, 1), requires_grad=True)

# ground truth
y_ground = torch.randn((1,))

print(x)
print(y_ground)

tensor([[ 0.6686],
        [ 1.3910],
        [-2.4452]], requires_grad=True)
tensor([-1.0027])


In [19]:
# forward
## the first layer
z1 = torch.matmul(W1, x) + b1
a1 = sigmoid(z1)

print(a1)

tensor([[0.9955],
        [0.9946],
        [0.1441],
        [0.0616],
        [0.9963]], grad_fn=<MulBackward0>)


In [20]:
## the second layer
z2 = torch.matmul(W2, a1) + b2
a2 = sigmoid(z2)

print(a2)

tensor([[0.8472],
        [0.0949],
        [0.7196],
        [0.7907],
        [0.0670]], grad_fn=<MulBackward0>)


In [21]:
## the third layer
z3 = torch.matmul(W3, a2) + b3
a3 = sigmoid(z3)

print(a3)

tensor([[0.3788],
        [0.9726],
        [0.5198],
        [0.7039],
        [0.4933]], grad_fn=<MulBackward0>)


In [22]:
## the last layer
z4 = torch.matmul(W4, a3) + b4
y_pred = sigmoid(z4)

print(y_pred)

tensor([[0.5010]], grad_fn=<MulBackward0>)


In [23]:
# calculate the loss
J = (y_pred - y_ground) ** 2
print(J)

tensor([[2.2612]], grad_fn=<PowBackward0>)


## Backward

In [24]:
print('the gradient of W2:')
print(W2.grad)
print('the gradient of b2:')
print(b2.grad)

the gradient of W2:
None
the gradient of b2:
None


In [25]:
# Compute gradients
J.backward()

In [26]:
print('the gradient of W2:')
print(W2.grad)
print('the gradient of b2:')
print(b2.grad)

the gradient of W2:
tensor([[ 3.5393e-02,  3.5364e-02,  5.1236e-03,  2.1918e-03,  3.5423e-02],
        [ 1.4044e-02,  1.4032e-02,  2.0330e-03,  8.6968e-04,  1.4055e-02],
        [-8.3885e-02, -8.3816e-02, -1.2143e-02, -5.1948e-03, -8.3955e-02],
        [-3.2176e-02, -3.2150e-02, -4.6579e-03, -1.9926e-03, -3.2203e-02],
        [-1.5706e-03, -1.5693e-03, -2.2736e-04, -9.7264e-05, -1.5719e-03]])
the gradient of b2:
tensor([[ 0.0356],
        [ 0.0141],
        [-0.0843],
        [-0.0323],
        [-0.0016]])


## Update the Parameter

W <- W - lr * W_gradient

In [27]:
lr = 0.1

In [28]:
print('the weight of W2 before updation:')
print(W2)

the weight of W2 before updation:
tensor([[ 3.1237e-01, -1.8969e+00, -6.3974e-01,  7.8099e-01,  2.4940e+00],
        [-1.2192e+00, -2.2249e-03,  1.3047e+00, -1.6586e+00, -3.0698e-01],
        [ 1.3101e-01,  1.2648e-01,  8.8051e-01,  6.0813e-01,  1.5349e+00],
        [ 7.2325e-01, -6.9043e-01,  8.3009e-01,  1.9712e+00,  1.5277e+00],
        [-1.7916e+00, -8.0821e-01, -2.3108e-01,  1.0899e+00,  1.1378e-01]],
       requires_grad=True)


In [29]:
expected_new_W2 = W2 - lr * W2.grad
print(expected_new_W2)

tensor([[ 0.3088, -1.9004, -0.6403,  0.7808,  2.4905],
        [-1.2206, -0.0036,  1.3045, -1.6587, -0.3084],
        [ 0.1394,  0.1349,  0.8817,  0.6087,  1.5433],
        [ 0.7265, -0.6872,  0.8306,  1.9714,  1.5309],
        [-1.7914, -0.8080, -0.2311,  1.0899,  0.1139]], grad_fn=<SubBackward0>)


In [30]:
# specify an optimizer
optimizer = torch.optim.SGD([W1, b1, W2, b2, W3, b3, W4, b4], lr=lr)

In [31]:
# Update parameters
optimizer.step()

In [32]:
print('the weight of W2 after updation:')
print(W2)

the weight of W2 after updation:
tensor([[ 0.3088, -1.9004, -0.6403,  0.7808,  2.4905],
        [-1.2206, -0.0036,  1.3045, -1.6587, -0.3084],
        [ 0.1394,  0.1349,  0.8817,  0.6087,  1.5433],
        [ 0.7265, -0.6872,  0.8306,  1.9714,  1.5309],
        [-1.7914, -0.8080, -0.2311,  1.0899,  0.1139]], requires_grad=True)


In [33]:
# check if two tensors, W2 and expected_new_W2, are element-wise equal
torch.eq(W2, expected_new_W2).all()

tensor(True)

# Pytorch Implementation of FFN

## FFN Layer

```
W1 = torch.randn((K, D), requires_grad=True)

b1 = torch.randn((K, 1), requires_grad=True)

def sigmoid(x):
    ...
```

In [34]:
# define
layer1 = torch.nn.Linear(in_features=D, out_features=K, bias=True)

In [35]:
# input
x = torch.randn((D,))

```
z1 = torch.matmul(W1, x) + b1
a1 = sigmoid(z1)
```

In [36]:
# forward
z1 = layer1(x)
a1 = torch.nn.functional.sigmoid(z1)

## Neural Network

In [37]:
class neural_network(torch.nn.Module):
    def __init__(self, D, K):
        super(neural_network, self).__init__()
        self.layer1 = torch.nn.Linear(in_features=D, out_features=K, bias=True)
        self.layer2 = torch.nn.Linear(in_features=K, out_features=K, bias=True)
        self.layer3 = torch.nn.Linear(in_features=K, out_features=K, bias=True)
        self.layer4 = torch.nn.Linear(in_features=K, out_features=1, bias=True)

    def forward(self, x):
        # the first layer
        z1 = self.layer1(x)
        a1 = torch.nn.functional.sigmoid(z1)
        
        # the second layer
        z2 = self.layer2(a1)
        a2 = torch.nn.functional.sigmoid(z2)
        
        # the third layer
        z3 = self.layer3(a2)
        a3 = torch.nn.functional.sigmoid(z3)
        
        # the fourth layer
        z4 = self.layer4(a3)
        a4 = torch.nn.functional.sigmoid(z4)
        return a4

In [38]:
# specify the network and the optimizer
network = neural_network(D, K)

optimizer = torch.optim.SGD(network.parameters(), lr=lr)

In [39]:
# input
x = torch.randn((D,))

# ground truth
y_ground = torch.randn((1,))

In [40]:
# forward
y_pred = network(x)

In [41]:
# calculate the loss
J = (y_pred - y_ground) ** 2
print(J)

tensor([0.1314], grad_fn=<PowBackward0>)


In [42]:
# backward
J.backward()

In [43]:
# update the parameters
optimizer.step()

In [44]:
[x.grad for x in network.parameters()]

[tensor([[ 1.9532e-04,  2.5752e-05,  5.7299e-05],
         [ 1.9530e-04,  2.5749e-05,  5.7293e-05],
         [-4.7596e-05, -6.2753e-06, -1.3963e-05],
         [ 1.9874e-05,  2.6204e-06,  5.8305e-06],
         [ 1.7986e-05,  2.3714e-06,  5.2764e-06]]),
 tensor([ 1.2927e-04,  1.2925e-04, -3.1500e-05,  1.3154e-05,  1.1904e-05]),
 tensor([[-7.2583e-05, -8.7198e-05, -4.9712e-05, -7.4146e-05, -7.9270e-05],
         [ 6.4507e-04,  7.7495e-04,  4.4181e-04,  6.5896e-04,  7.0450e-04],
         [ 4.4140e-04,  5.3028e-04,  3.0232e-04,  4.5091e-04,  4.8207e-04],
         [-5.1966e-04, -6.2429e-04, -3.5591e-04, -5.3085e-04, -5.6753e-04],
         [ 2.7265e-04,  3.2755e-04,  1.8674e-04,  2.7852e-04,  2.9777e-04]]),
 tensor([-0.0002,  0.0014,  0.0010, -0.0011,  0.0006]),
 tensor([[-0.0052, -0.0090, -0.0071, -0.0079, -0.0077],
         [ 0.0042,  0.0073,  0.0057,  0.0064,  0.0062],
         [ 0.0002,  0.0003,  0.0002,  0.0003,  0.0003],
         [-0.0025, -0.0044, -0.0035, -0.0039, -0.0038],
         [

In [45]:
optimizer.zero_grad()

In [46]:
[x.grad for x in network.parameters()]

[None, None, None, None, None, None, None, None]

## Loading data from numpy  

In [47]:
# Create a numpy array.
import numpy as np
x = np.array([[1, 2], [3, 4]])

In [48]:
# Convert the numpy array to a torch tensor.
y = torch.from_numpy(x)

In [49]:
print(y)

tensor([[1, 2],
        [3, 4]], dtype=torch.int32)


In [50]:
# Convert the torch tensor to a numpy array.
z = y.numpy()

In [51]:
print(z)

[[1 2]
 [3 4]]


## Input pipeline 

In [68]:
# Download and construct CIFAR-10 dataset.
import torchvision
import torchvision.transforms as transforms
train_dataset = torchvision.datasets.CIFAR10(root='../../data/',
                                             train=True, 
                                             transform=transforms.ToTensor(),
                                             download=True)

Files already downloaded and verified


In [53]:
# Fetch one data pair (read data from disk).
image, label = train_dataset[0]
print (image.size())
print (label)

torch.Size([3, 32, 32])
6


In [54]:
# Data loader (this provides queues and threads in a very simple way).
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=64, 
                                           shuffle=True)

In [55]:
# When iteration starts, queue and thread start to load data from files.
data_iter = iter(train_loader)

In [56]:
# Get a mini-batch of images and labels
images, labels = next(data_iter)

In [None]:
for images, labels in train_loader:
    # Training code should be written here.
    # Zero the gradients
    optimizer.zero_grad()

    # Reshape images to match the input size expected by the network
    images = images.view(images.size(0), -1)[:, :D]

    # Forward pass
    outputs = network(images)

    # Calculate the loss
    loss = torch.nn.functional.mse_loss(outputs, labels.float().view(-1, 1))

    # Backward pass
    loss.backward()

    # Update the parameters
    optimizer.step()

In [70]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torch.nn.functional as F

# Define the CNN architecture
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 10)
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 128 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Define the transformations for the training and test sets
transform = transforms.Compose(
    [transforms.RandomHorizontalFlip(),
     transforms.RandomCrop(32, padding=4),
     transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

# Load the CIFAR-10 dataset 
# The first time you run this, it will download the dataset (../../data/ is the directory to save the dataset)
train_dataset = torchvision.datasets.CIFAR10(root='../../data/', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)

test_dataset = torchvision.datasets.CIFAR10(root='../../data/', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

# Initialize the network, loss function, and optimizer
network = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(network.parameters(), lr=0.001)

# Training loop
for epoch in range(10):  # number of epochs
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = network(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 100 == 99:  # print every 100 mini-batches
            print(f'[Epoch {epoch + 1}, Batch {i + 1}] loss: {running_loss / 100:.3f}')
            running_loss = 0.0

print('Finished Training')

# Save the trained model
torch.save(network.state_dict(), 'cnn_cifar10.pth')

# Evaluate the network on the test data
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = network(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct / total:.2f}%')

Files already downloaded and verified
Files already downloaded and verified
[Epoch 1, Batch 100] loss: 2.013
[Epoch 1, Batch 200] loss: 1.746
[Epoch 1, Batch 300] loss: 1.613
[Epoch 1, Batch 400] loss: 1.539
[Epoch 1, Batch 500] loss: 1.477
[Epoch 1, Batch 600] loss: 1.417
[Epoch 1, Batch 700] loss: 1.347
[Epoch 2, Batch 100] loss: 1.299
[Epoch 2, Batch 200] loss: 1.265
[Epoch 2, Batch 300] loss: 1.218
[Epoch 2, Batch 400] loss: 1.167
[Epoch 2, Batch 500] loss: 1.197
[Epoch 2, Batch 600] loss: 1.166
[Epoch 2, Batch 700] loss: 1.112
[Epoch 3, Batch 100] loss: 1.075
[Epoch 3, Batch 200] loss: 1.062
[Epoch 3, Batch 300] loss: 1.045
[Epoch 3, Batch 400] loss: 1.057
[Epoch 3, Batch 500] loss: 1.012
[Epoch 3, Batch 600] loss: 1.029
[Epoch 3, Batch 700] loss: 0.999
[Epoch 4, Batch 100] loss: 0.977
[Epoch 4, Batch 200] loss: 0.959
[Epoch 4, Batch 300] loss: 0.920
[Epoch 4, Batch 400] loss: 0.938
[Epoch 4, Batch 500] loss: 0.925
[Epoch 4, Batch 600] loss: 0.891
[Epoch 4, Batch 700] loss: 0.919


In [78]:
# Load the saved model state dictionary
state_dict = torch.load('cnn_cifar10.pth')
print(state_dict.keys())

# Initialize the model architecture
model = CNN()
print(model)

# Load the state dictionary into the model
model.load_state_dict(state_dict)

# Set the model to evaluation mode
model.eval()

odict_keys(['conv1.weight', 'conv1.bias', 'conv2.weight', 'conv2.bias', 'conv3.weight', 'conv3.bias', 'fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias'])
CNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=2048, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=10, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)


  state_dict = torch.load('cnn_cifar10.pth')


CNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=2048, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=10, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [65]:
correct = 0
total = 0

# Disable gradient calculation for evaluation
with torch.no_grad():
    for images, labels in train_loader:
        # Reshape images to match the input size expected by the network
        images = images.view(images.size(0), -1)[:, :D]

        # Forward pass
        outputs = network(images)

        # Convert outputs to predicted labels
        predicted = outputs.round().view(-1).long()

        # Update the total and correct counts
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Calculate accuracy
accuracy = 100 * correct / total
print(f'Accuracy of the network on the CIFAR-10 training dataset: {accuracy:.2f}%')

Accuracy of the network on the CIFAR-10 training dataset: 10.00%


In [63]:
# You can build your custom dataset as below.
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self):
        # Download and construct CIFAR-10 dataset.
        self.dataset = torchvision.datasets.CIFAR10(root='../../data/',
                                                    train=True, 
                                                    transform=transforms.ToTensor(),
                                                    download=True)
    def __getitem__(self, index):
        # Fetch one data pair (image and label).
        image, label = self.dataset[index]
        return image, label

    def __len__(self):
        # Return the total size of the dataset.
        return len(self.dataset)

# You can then use the prebuilt data loader. 
custom_dataset = CustomDataset()
print(len(custom_dataset))

train_loader = torch.utils.data.DataLoader(dataset=custom_dataset,
                                           batch_size=64, 
                                           shuffle=True)

Files already downloaded and verified
50000
