In [1]:
import numpy as np
import torch

# Part 1 Warm up

In [5]:
# Part 1.1
a = np.array([1,2,3,4,5,6,7,8])
shape_a = a.reshape(2,4)

print(a)


[1 2 3 4 5 6 7 8]


In [7]:
# Part 1.2

a = torch.tensor([1, 3, 5, 6], dtype=torch.float32)
b = torch.tensor([5, 6, 8, 9], dtype=torch.float32)

addition_result = a + b

print(addition_result)

multiplication_result = a * b

print(multiplication_result)

power_result = a ** b

print(power_result)

dot_product = torch.dot(a, b)

print(dot_product)

special_dot_prod = torch.dot(torch.exp(a), torch.log(b))

print(special_dot_prod)

tensor([ 6.,  9., 13., 15.])
tensor([ 5., 18., 40., 54.])
tensor([1.0000e+00, 7.2900e+02, 3.9062e+05, 1.0078e+07])
tensor(117.)
tensor(1235.4036)


# Part 1.3 Written sections....


$$\frac{dg}{dx} = e^x (2x + x^2)$$
$$\frac{dg}{dy} = 3e^y (2y + y^2)$$
$$\frac{dg}{dz} = 5e^z (2z + z^2)$$
$$\frac{dg}{dk} = 6e^k (2k + k^2)$$

$$\frac{dg}{dx} = e^5 \times 35$$
$$\frac{dg}{dy} = 3e^6 \times 48$$
$$\frac{dg}{dz} = 5e^8 \times 80$$
$$\frac{dg}{dk} = 6e^9 \times 99$$

In [16]:
# part 1.3 (a)

def g_function(x, y, z, k):
    return torch.exp(x) * x**2 + 3 * torch.exp(y) * y**2 + 5 * torch.exp(z) * z**2 + 6 * torch.exp(k) * k**2

# Initialize variables with autograd enabled
x = torch.tensor(5.0, requires_grad=True)
y = torch.tensor(6.0, requires_grad=True)
z = torch.tensor(8.0, requires_grad=True)
k = torch.tensor(9.0, requires_grad=True)

# Compute the function value
g_value = g_function(x, y, z, k)

# Compute gradients
g_value.backward()

# Output gradients
for i, name in enumerate(['x', 'y', 'z', 'k']):
    variables = locals()[name]
    print(f"dg/d{name}: {variables.grad.item()}")

dg/dx: 5194.4609375
dg/dy: 58093.75
dg/dz: 1192383.25
dg/dk: 4813232.0


# Part 1.3 (b)
$$f\left(A\right)\ =\ \log\left( \lVert A^T AB^T AA^T AB^T \rVert_2^2 \right)$$
$$f(A) = \log \left( \lVert (A^T A)^2 B^T B \rVert_2^2 \right)$$


In [None]:
# 1.3 part (b)

# Enable autograd for tensor A
A = torch.tensor([[4.0, 3.0], [7.0, 9.0]], requires_grad=True)
B = torch.tensor([[3.0, 5.0], [1.0, 11.0]], requires_grad=False)

# Compute the function f(A)
AT = A.t()
BT = B.t()
term = torch.matmul(torch.matmul(AT, A), B.t())
term = torch.matmul(torch.matmul(term, A), A.t())
term = torch.matmul(term, B)
L2_norm_squared = torch.norm(term, p=2)**2
f_A = torch.log(L2_norm_squared)

# Compute the gradient
f_A.backward()

# Output the gradient
print(A.grad)

# 1.3 (C)

$$F\left(x,y\right)\ =\ \tanh\left(x\right)\ +\ \tanh\left(y\right)$$

Take the following derivatives: $$\frac{dF}{dx}=1-\tanh^2\left(x\right)$$
$$\frac{dF}{dx}=1-\tanh^2\left(y\right)$$

When x = 3.0, and y = 7.0 these derivatives should become quite small reaching close to 1, because of that $1-\tanh^{2}(x)$ and $1-\tanh^{2}(y)$ closes to zero. \
at the point ($x=3.0, y=7.0$)

For the values when x = 3.0 we get: $$\frac{dF}{dx}=1−\tanh2(3)=1−0.99505475368673052\approx0.0099$$
For the values when y = 7.0  we get: $$\frac{dF}{dy}=1−\tanh2(7)=1−0.99999833694394512\approx0.00000334$$

(Similar to what our code below is generating using pytorch)



In [18]:
# Initialize x and y as tensors and set them to require gradients
x = torch.tensor(3.0, requires_grad=True)
y = torch.tensor(7.0, requires_grad=True)

# Define the function F(x, y)
F = torch.tanh(x) + torch.tanh(y)

# Compute the gradients
F.backward()

# Get the gradients
grad_x = x.grad.item()
grad_y = y.grad.item()

print(f"The gradient with respect to x is {grad_x}")
print(f"The gradient with respect to y is {grad_y}")

The gradient with respect to x is 0.009865999221801758
The gradient with respect to y is 3.337860107421875e-06


# Part 1.4 

* Let a be a torch integer tensor containing the values [1, 2, 3].
* Convert a to a numpy array and store it under a new variable b
* convert a into a float tensor

In [20]:

# Create an integer tensor 'a'
a = torch.tensor([1, 2, 3], dtype=torch.int)

# Convert 'a' to a numpy array 'b'
b = np.asarray(a)

# Convert 'a' into a float tensor
a_float = a.to(dtype=torch.float)

# Print the results
print(f"Original tensor a: {a}")
print(f"Numpy array b: {b}")
print(f"Float tensor a_float: {a_float}")

Original tensor a: tensor([1, 2, 3], dtype=torch.int32)
Numpy array b: [1 2 3]
Float tensor a_float: tensor([1., 2., 3.])


# 1.5 

* Answer the following questions using the package Numpy:
    * What is the product of matrices of matrices [[1, 3, 5], [2, 1, 5]] and [[8, 4], [3, 6], [2, 7]]?


    * What is the Frobenius norm of the 1 x 3 matrix [100, 2, 1]?
    Forbenius Norm is given by $\sqrt{trace(A^TA)}$
    $$\sqrt{100^2+2^2+1}$$
    

In [28]:
A = np.array([[1, 3, 5], [2, 1, 5]])
B = np.array([[8, 4], [3, 6], [2, 7]])

result = np.matmul(A, B)
print(f"{result} This is the product of the matrices given")

C = np.array([[100, 2, 1]])  # Make into 1x3 matrix
frobenius_norm = np.linalg.norm(C, ord='fro')
print(f"{frobenius_norm} This is the Frobenius Norm of the 1x3 matrix.")

print(f"{frobenius_norm} This is the Frobenius Norm as calculated (See above for exact calculation)")

[[27 57]
 [29 49]] This is the produce of the matrices given
100.024996875781 This is the Frobenius Norm of the 1x3 matrix.
100.024996875781 This is the Frobenius Norm as calculated (See above for exact calculation)
