## PyTorch exercises

### Tensors

1. Make a tensor of size (2, 17)
2. Make a torch.FloatTensor of size (3, 1)
3. Make a torch.LongTensor of size (5, 2, 1)
  - fill the entire tensor with 7s
4. Make a torch.ByteTensor of size (5,)
  - fill the middle 3 indices with ones such that it records [0, 1, 1, 1, 0]
5. Perform a matrix multiplication of two tensors of size (2, 4) and (4, 2). Then do it in-place.
6. Do element-wise multiplication of two randomly filled $(n_1,n_2,n_3)$ tensors. Then store the result in an Numpy array.

### Forward-prop/backward-prop
1. Create a Tensor that `requires_grad` of size (5, 5).
2. Sum the values in the Tensor.
3. Multiply the tensor by 2 and assign the result to a new python variable (i.e. `x = result`)
4. Sum the variable's elements and assign to a new python variable
5. Print the gradients of all the variables
6. Now perform a backward pass on the last variable (NOTE: for each new python variable that you define, call `.retain_grad()`)
7. Print all gradients again

In [1]:
import torch

# Task 1
tensor1 = torch.tensor([[0.0] * 17, [0.0] * 17])
print(" Tensor of size (2, 17):")
print(tensor1)


 Tensor of size (2, 17):
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])


In [2]:
# Task 2
tensor2 = torch.FloatTensor(3, 1)
print(" FloatTensor of size (3, 1):")
print(tensor2)


 FloatTensor of size (3, 1):
tensor([[0.],
        [0.],
        [0.]])


In [3]:
# Task 3
tensor3 = torch.LongTensor(5, 2, 1).fill_(7)
print(" LongTensor of size (5, 2, 1) filled with 7s:")
print(tensor3)


 LongTensor of size (5, 2, 1) filled with 7s:
tensor([[[7],
         [7]],

        [[7],
         [7]],

        [[7],
         [7]],

        [[7],
         [7]],

        [[7],
         [7]]])


In [4]:
# Task 4
tensor4 = torch.ByteTensor([0, 1, 1, 1, 0])
print(" ByteTensor of size (5,) with [0, 1, 1, 1, 0]:")
print(tensor4)


 ByteTensor of size (5,) with [0, 1, 1, 1, 0]:
tensor([0, 1, 1, 1, 0], dtype=torch.uint8)


In [5]:
# Task 5
A = torch.randn(2, 4)
B = torch.randn(4, 2)
result = torch.matmul(A, B)

print(" Matrix multiplication result (2x4) * (4x2):")
print(result)
A = torch.matmul(A, B)
print(" In-place overwrite of A with result:")
print(A)


 Matrix multiplication result (2x4) * (4x2):
tensor([[-0.8889, -0.2717],
        [ 0.5498, -0.2543]])
 In-place overwrite of A with result:
tensor([[-0.8889, -0.2717],
        [ 0.5498, -0.2543]])


In [6]:
import numpy as np

# Task 6
n1, n2, n3 = 2, 3, 4
tensor_a = torch.rand(n1, n2, n3)
tensor_b = torch.rand(n1, n2, n3)
elementwise_result = tensor_a * tensor_b
numpy_result = elementwise_result.numpy()

print(" Element-wise multiplication result:")
print(elementwise_result)
print(" Result converted to NumPy array:")
print(numpy_result)


 Element-wise multiplication result:
tensor([[[1.6274e-04, 2.1577e-03, 5.6251e-01, 3.6718e-02],
         [4.2516e-01, 2.0396e-02, 1.0948e-01, 3.0705e-01],
         [8.7753e-01, 5.4295e-01, 8.4559e-01, 2.1721e-01]],

        [[4.0951e-01, 1.8030e-01, 5.1912e-01, 4.9944e-01],
         [1.3409e-01, 5.2260e-01, 9.6824e-01, 3.4192e-01],
         [1.5617e-01, 3.5520e-01, 1.4171e-01, 3.8616e-03]]])
 Result converted to NumPy array:
[[[1.6273654e-04 2.1577068e-03 5.6250918e-01 3.6718372e-02]
  [4.2515942e-01 2.0395765e-02 1.0947785e-01 3.0704728e-01]
  [8.7752855e-01 5.4294688e-01 8.4558940e-01 2.1720695e-01]]

 [[4.0951216e-01 1.8030490e-01 5.1911980e-01 4.9944228e-01]
  [1.3409378e-01 5.2259552e-01 9.6824056e-01 3.4191865e-01]
  [1.5616520e-01 3.5519552e-01 1.4170951e-01 3.8616296e-03]]]


In [7]:
# Task 1
tensor = torch.randn(5, 5, requires_grad=True)
print(" Tensor with requires_grad=True:")
print(tensor)


 Tensor with requires_grad=True:
tensor([[-0.3628, -0.8236, -1.0490,  1.0515, -0.8683],
        [-1.4116, -0.3529, -1.6874, -1.0593,  2.5627],
        [ 0.7779,  0.8759, -0.2971, -1.5095,  2.0342],
        [-0.9027, -2.4532, -2.3985, -0.1613,  1.2084],
        [ 0.6173, -1.5067, -1.2930,  0.4448,  1.0610]], requires_grad=True)


In [8]:
# Task 2
sum_tensor = tensor.sum()
sum_tensor.retain_grad()
print("Sum of all elements in the tensor:")
print(sum_tensor)


Sum of all elements in the tensor:
tensor(-7.5032, grad_fn=<SumBackward0>)


In [9]:
# Task 3
x = tensor * 2
x.retain_grad()
print("Tensor multiplied by 2:")
print(x)


Tensor multiplied by 2:
tensor([[-0.7256, -1.6472, -2.0979,  2.1030, -1.7366],
        [-2.8232, -0.7058, -3.3748, -2.1187,  5.1255],
        [ 1.5557,  1.7518, -0.5942, -3.0190,  4.0684],
        [-1.8055, -4.9064, -4.7970, -0.3226,  2.4167],
        [ 1.2345, -3.0134, -2.5860,  0.8897,  2.1221]], grad_fn=<MulBackward0>)


In [10]:
# Task 4
y = x.sum()
y.retain_grad()
print("Sum of elements after multiplication (final output):")
print(y)


Sum of elements after multiplication (final output):
tensor(-15.0065, grad_fn=<SumBackward0>)


In [11]:
# Task 5
print(" Gradients before backward:")
print("Grad of tensor:", tensor.grad)
print("Grad of x:", x.grad)
print("Grad of y:", y.grad)
print("Grad of sum_tensor:", sum_tensor.grad)


 Gradients before backward:
Grad of tensor: None
Grad of x: None
Grad of y: None
Grad of sum_tensor: None


In [12]:
# Task 6
y.backward()
print(" Backward pass done.")


 Backward pass done.


In [13]:
# Task 7
print("Gradients after backward:")
print("Grad of tensor:\n", tensor.grad)
print("Grad of x:\n", x.grad)
print("Grad of y:\n", y.grad)
print("Grad of sum_tensor:\n", sum_tensor.grad)


Gradients after backward:
Grad of tensor:
 tensor([[2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2.]])
Grad of x:
 tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]])
Grad of y:
 tensor(1.)
Grad of sum_tensor:
 None


### Deep-forward NNs
1. Look at Lab 3. In Exercise 12 there, you had to build an $L$-layer neural network with the following structure: *[LINEAR -> RELU]$\times$(L-1) -> LINEAR -> SIGMOID*. Reimplement the manual code in PyTorch.
2. Compare test accuracy using different optimizers: SGD, Adam, Momentum.

In [15]:
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

layers_dims = [12288, 20, 7, 5, 1]

class DeepNet(nn.Module):
    def __init__(self, layers_dims):
        super(DeepNet, self).__init__()
        layers = []
        for i in range(len(layers_dims) - 2):
            layers.append(nn.Linear(layers_dims[i], layers_dims[i+1]))
            layers.append(nn.ReLU())
        layers.append(nn.Linear(layers_dims[-2], layers_dims[-1]))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        x = self.net(x)
        return torch.sigmoid(x)


def train_model(model, train_loader, optimizer, criterion, epochs=1000):
    model.train()
    for epoch in range(epochs):
        for xb, yb in train_loader:
            optimizer.zero_grad()
            output = model(xb)
            loss = criterion(output, yb)
            loss.backward()
            optimizer.step()

    return model

def evaluate_model(model, X_test, Y_test):
    model.eval()
    with torch.no_grad():
        preds = model(X_test).round()
        accuracy = (preds.eq(Y_test).sum().float() / Y_test.shape[0]).item()
    return accuracy

train_x = torch.randn(209, 12288) 
train_y = torch.randint(0, 2, (209, 1)).float()
test_x = torch.randn(50, 12288)
test_y = torch.randint(0, 2, (50, 1)).float()

train_loader = DataLoader(TensorDataset(train_x, train_y), batch_size=32, shuffle=True)

optimizers = {
    'SGD': lambda params: optim.SGD(params, lr=0.0075),
    'Momentum': lambda params: optim.SGD(params, lr=0.0075, momentum=0.9),
    'Adam': lambda params: optim.Adam(params, lr=0.0075)
}

criterion = nn.BCELoss()

for name, opt_fn in optimizers.items():
    model = DeepNet(layers_dims)
    optimizer = opt_fn(model.parameters())
    model = train_model(model, train_loader, optimizer, criterion, epochs=1000)
    acc = evaluate_model(model, test_x, test_y)
    print(f"Test accuracy with {name}: {acc:.4f}")


Test accuracy with SGD: 0.4400
Test accuracy with Momentum: 0.4400
Test accuracy with Adam: 0.5400
