In [3]:
%matplotlib inline
import numpy as np
# import pandas as pd
# import seaborn as sns
import matplotlib as plt 
from matplotlib_inline import backend_inline

In [6]:
def f(x):
    return 3 * x ** 2- 4 * x
f(1)

-1

In [8]:
f(1) , f(2), f(5)

(-1, 4, 55)

In [9]:
10.0**np.arange(-1,-6,-1)

array([1.e-01, 1.e-02, 1.e-03, 1.e-04, 1.e-05])

In [10]:
for h in 10.0**np.arange(-1,-6,-1):
    print(f'h={h:.5f}, numerical limit={(f(1+h)-f(1))/h:.5f}')
 

h=0.10000, numerical limit=2.30000
h=0.01000, numerical limit=2.03000
h=0.00100, numerical limit=2.00300
h=0.00010, numerical limit=2.00030
h=0.00001, numerical limit=2.00003


In [11]:
 def use_svg_display(): #@save
     backend_inline.set_matplotlib_formats('svg')
 """Use the svg format to display a plot in Jupyter."""

'Use the svg format to display a plot in Jupyter.'

In [1]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

In [2]:
import torch

# 2.5 Automatic Differentiation

### 2.5.1 A Simple Function

In [29]:
x = torch.arange(4.0)
x

tensor([0., 1., 2., 3.])

In [30]:
# Can also create x = torch.arange(4.0, requires_grad=True)
x.requires_grad_(True)
x.grad # The gradient is None by default

In [31]:
y = 2 * torch.dot(x, x) # 2X^TX  # dot product and then multiplied by 2
y

tensor(28., grad_fn=<MulBackward0>)

In [32]:
 # Wecannowtake the gradient of y with respect to x by calling its backward method. Next,
 # we can access the gradient via x‚Äôs grad attribute.
y.backward()
x.grad

tensor([ 0.,  4.,  8., 12.])

In [33]:
x.grad == 4*x

tensor([True, True, True, True])

# 2.5.2 Backward for Non-Scalar Variables

In [34]:
x.grad.zero_() # Reset the gradient
y = x.sum()
y.backward()
x.grad   

tensor([1., 1., 1., 1.])

# 2.5.3 Detaching Computation

In [35]:
x.grad.zero_()
y = x * x
u = y.detach()
# print(u)
z = u * x
z.sum().backward()

In [36]:
x.grad == u

tensor([True, True, True, True])

In [37]:
x.grad

tensor([0., 1., 4., 9.])

In [38]:
u

tensor([0., 1., 4., 9.])

In [39]:
x.grad.zero_()
y.sum().backward()
x.grad == 2 * x

tensor([True, True, True, True])

# 2.5.4 Gradients and Python Control Flow

In [41]:
def fun(a):
    b = a * 2
    while b.norm() < 1000:
        b = b*2
    if b.sum() > 0:
        c = b
    else:
        c = 100 * b
    return c

In [43]:
a = torch.randn(size=(), requires_grad=True)
a
d = fun(a)
d.backward()

In [44]:
a.grad

tensor(819200.)

In [45]:
a.grad == d / a

tensor(True)

In [47]:
d/a

tensor(819200., grad_fn=<DivBackward0>)

In [49]:
a = torch.randn(size=(), requires_grad=True)
a

tensor(0.8130, requires_grad=True)

In [50]:
a.norm()

tensor(0.8130, grad_fn=<LinalgVectorNormBackward0>)

In [51]:
b=a*2
b.norm()

tensor(1.6260, grad_fn=<LinalgVectorNormBackward0>)

In [56]:
torch.arange(4) #.norm()

tensor([0, 1, 2, 3])

# Vector Norms (L1, L2, and Infinity Norm)

<img src="images/norms.png" width="3000">

### L2 Norm (Euclidean Norm)

In [59]:
import torch
v = torch.tensor([3.0, -4.0, 0.0])

# Calculated as: sqrt(3^2 + (-4)^2 + 0^2) = sqrt(9 + 16) = 5.0
l2_norm = torch.norm(v) 
# l2_norm will be tensor(5.)
print(l2_norm)

tensor(5.)


In [60]:
l1_norm = torch.norm(v, p=1)
# Calculated as: |3| + |-4| + |0| = 7.0
# l1_norm will be tensor(7.)
print(l1_norm)

tensor(7.)


In [61]:
inf_norm = torch.norm(v, p=float('inf'))
# Calculated as: max(|3|, |-4|, |0|) = 4.0
# inf_norm will be tensor(4.)
print(inf_norm)

tensor(4.)


## Matrix Norms (Calculating Norms over Dimensions)

In [62]:
M = torch.tensor([[1.0, 2.0, 3.0], 
                  [4.0, 5.0, 6.0]])

In [65]:
# Norm over Columns (dim=0)
col_norms = torch.norm(M, dim=0)
# Column 1: sqrt(1^2 + 4^2) = sqrt(17) ‚âà 4.12
# Column 2: sqrt(2^2 + 5^2) = sqrt(29) ‚âà 5.39
# Column 3: sqrt(3^2 + 6^2) = sqrt(45) ‚âà 6.71
# col_norms will be tensor([4.1231, 5.3852, 6.7082])
col_norms

tensor([4.1231, 5.3852, 6.7082])

In [69]:
type(col_norms), col_norms.shape

(torch.Tensor, torch.Size([3]))

In [67]:
row_norms = torch.norm(M, dim=1)
# Row 1: sqrt(1^2 + 2^2 + 3^2) = sqrt(14) ‚âà 3.74
# Row 2: sqrt(4^2 + 5^2 + 6^2) = sqrt(77) ‚âà 8.77
# row_norms will be tensor([3.7417, 8.7750])
print(row_norms)

tensor([3.7417, 8.7750])


In [68]:
row_norms.shape

torch.Size([2])

In [71]:
# Equivalent to torch.norm(M)
fro_norm = torch.norm(M, p='fro')
# Calculated as: sqrt(1^2 + ... + 6^2) = sqrt(14 + 77) = sqrt(91) ‚âà 9.54
# fro_norm will be tensor(9.5394)
print(fro_norm)

tensor(9.5394)
