In [1]:
import numpy as np
import torch

# Tensors

### Initializing a tensor

In [2]:
data = [[1, 2],[3, 4]]
tensor = torch.tensor(data)
print(type(tensor))
print(tensor)

<class 'torch.Tensor'>
tensor([[1, 2],
        [3, 4]])


In [3]:
np_array = np.array(data)
tensor_np = torch.from_numpy(np_array)
print(tensor_np)

tensor([[1, 2],
        [3, 4]])


In [4]:
np.add(np_array, 1, out=np_array)
print(f"numpy: {np_array}")
print(f"tensor: {tensor_np}")

numpy: [[2 3]
 [4 5]]
tensor: tensor([[2, 3],
        [4, 5]])


In [5]:
shape = (2,3,)
rand_tensor = torch.rand(shape)
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)

print(f"Random Tensor: \n {rand_tensor} \n")
print(f"Ones Tensor: \n {ones_tensor} \n")
print(f"Zeros Tensor: \n {zeros_tensor}")

Random Tensor: 
 tensor([[0.2529, 0.5166, 0.1033],
        [0.2668, 0.9063, 0.8086]]) 

Ones Tensor: 
 tensor([[1., 1., 1.],
        [1., 1., 1.]]) 

Zeros Tensor: 
 tensor([[0., 0., 0.],
        [0., 0., 0.]])


### Attributes of tensor

In [6]:
tensor = torch.rand(3,4)

print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")

Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


## Operations on tensors

### operation locations

In [7]:
if torch.cuda.is_available():
    tensor = tensor.to("cuda")
    
print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")

Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cuda:0


### indexing and slicing

In [8]:
tensor = torch.tensor([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])
print(f"{tensor}")
print(f"Shape of tensor: {tensor.shape}")
print(f"First row: {tensor[0]}")
print(f"First column: {tensor[:, 0]}")
print(f"Last column: {tensor[..., -1]}")
tensor[:,1] = 0
print(tensor)


tensor([[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9],
        [10, 11, 12]])
Shape of tensor: torch.Size([4, 3])
First row: tensor([1, 2, 3])
First column: tensor([ 1,  4,  7, 10])
Last column: tensor([ 3,  6,  9, 12])
tensor([[ 1,  0,  3],
        [ 4,  0,  6],
        [ 7,  0,  9],
        [10,  0, 12]])


In [9]:
x_t = torch.ones(2,3,4)
sub_tensor = torch.randn(2,4)
x_t[0,1:3 ,:] = sub_tensor
print(x_t)

tensor([[[ 1.0000,  1.0000,  1.0000,  1.0000],
         [-0.6588,  2.8546,  1.0704,  1.6985],
         [ 0.4942,  0.9827, -0.6635, -0.6075]],

        [[ 1.0000,  1.0000,  1.0000,  1.0000],
         [ 1.0000,  1.0000,  1.0000,  1.0000],
         [ 1.0000,  1.0000,  1.0000,  1.0000]]])


In [10]:
x_t[0,1:3,:] = 1
print(x_t)
sub_tensor = torch.randn(1,4)
x_t[0, 1:3 ,:] = sub_tensor # broad casting
print(x_t)

tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]])
tensor([[[ 1.0000,  1.0000,  1.0000,  1.0000],
         [ 0.9485, -0.6543,  1.6534,  0.0303],
         [ 0.9485, -0.6543,  1.6534,  0.0303]],

        [[ 1.0000,  1.0000,  1.0000,  1.0000],
         [ 1.0000,  1.0000,  1.0000,  1.0000],
         [ 1.0000,  1.0000,  1.0000,  1.0000]]])


### joining

In [11]:
tensor = torch.tensor([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])
t1 = torch.cat([tensor, tensor, tensor], dim=1)
print(t1)

tensor([[ 1,  2,  3,  1,  2,  3,  1,  2,  3],
        [ 4,  5,  6,  4,  5,  6,  4,  5,  6],
        [ 7,  8,  9,  7,  8,  9,  7,  8,  9],
        [10, 11, 12, 10, 11, 12, 10, 11, 12]])


### arithmetic operations

In [12]:
x_t = torch.tensor([1, 2, 3], dtype=torch.float32)
c = 10

x_t = x_t * c #broadcasting
print(x_t)

tensor([10., 20., 30.])


In [13]:
xl_t = torch.ones((1,2))
x2_t = torch.ones((1,2))
xl_t + x2_t


tensor([[2., 2.]])

In [14]:
x1_t = torch.tensor([[1,2], [3,4]], dtype = torch.float32)
x2_t = torch.tensor([[1,2,3],[4,5,6]], dtype = torch.float32)

y1 = torch.matmul(x1_t, x2_t) # Returns tensor([[9,12,15],[19,26,33]])
y2 = x1_t.matmul(x2_t)
y3 = x1_t @ x2_t

y4 = torch.rand_like(y1)
torch.matmul(x1_t, x2_t, out=y4)

print(y1)
print(y2)
print(y3)
print(y4)


tensor([[ 9., 12., 15.],
        [19., 26., 33.]])
tensor([[ 9., 12., 15.],
        [19., 26., 33.]])
tensor([[ 9., 12., 15.],
        [19., 26., 33.]])
tensor([[ 9., 12., 15.],
        [19., 26., 33.]])


In [15]:
tensor = torch.tensor([[1,2], [3,4]], dtype = torch.float32)

# This computes the element-wise product. z1, z2, z3 will have the same value
z1 = tensor * tensor
z2 = tensor.mul(tensor)

z3 = torch.rand_like(tensor)
torch.mul(tensor, tensor, out=z3)

tensor([[ 1.,  4.],
        [ 9., 16.]])

In [16]:
agg = tensor.sum(axis = 0)
agg_item = agg[0].item()
print(agg, agg_item)

tensor([4., 6.]) 4.0


# Automatic Differentiations (autograd)

In [17]:
x = torch.ones(5)  # input tensor shape : (5)
y = torch.zeros(3)  # output shape : (3)
w = torch.randn(5, 3, requires_grad=True) # Weight matrix with shape (5,3)
b = torch.randn(3, requires_grad=True) # Bias with shape (3)
z = torch.matmul(x, w) + b

loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)

<img src="https://pytorch.org/tutorials/_images/comp-graph.png" alt="image" width="600" height="auto">


Forward pass에서 autograd는 아래 두가지 일을 수행:
* run the requested operation to compute a resulting tensor
* maintain the operation’s gradient function in the DAG (directed acyclic graph).

The backward pass kicks off when .backward() is called on the DAG root. autograd then:
* computes the gradients from each .grad_fn,
* accumulates them in the respective tensor’s .grad attribute
* using the chain rule, propagates all the way to the leaf tensors.

In [18]:
print(z)
print(f"Gradient function for x = {x.grad_fn}")
print(f"Gradient function for w = {w.grad_fn}")
print(f"Gradient function for z = {z.grad_fn}") # this function knwo how to compute in backward direction
print(f"Gradient function for loss = {loss.grad_fn}")

tensor([-2.7929,  0.0471,  1.6694], grad_fn=<AddBackward0>)
Gradient function for x = None
Gradient function for w = None
Gradient function for z = <AddBackward0 object at 0x7f5e86e50fd0>
Gradient function for loss = <BinaryCrossEntropyWithLogitsBackward0 object at 0x7f5e86e512d0>


In [19]:
loss.backward()
print(w.grad)
print(b.grad)

tensor([[0.0192, 0.1706, 0.2805],
        [0.0192, 0.1706, 0.2805],
        [0.0192, 0.1706, 0.2805],
        [0.0192, 0.1706, 0.2805],
        [0.0192, 0.1706, 0.2805]])
tensor([0.0192, 0.1706, 0.2805])


In [20]:
z = torch.matmul(x, w) + b
print(z.requires_grad)

with torch.no_grad():
    z = torch.matmul(x, w) + b
print(z.requires_grad)

True
False


In [28]:
x = torch.tensor(2.0, requires_grad=True)
y = torch.tensor(3.0, requires_grad=True)
z = torch.tensor(1.5, requires_grad=True)
f = x**2+y**2+z**2
f.backward()
print(f"grads: ", x.grad, y.grad, z.grad)
f.backward() #This cause error. gradient calculations using backward can be done once once for a given graph

grads:  tensor(4.) tensor(6.) tensor(3.)


RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.

In [29]:
x = torch.tensor(2.0, requires_grad=True)
y = torch.tensor(3.0, requires_grad=True)
z = torch.tensor(1.5, requires_grad=True)
f = x**2+y**2+z**2

f.backward(retain_graph=True)
print(f"grads on First call: ", x.grad, y.grad, z.grad)
f.backward(retain_graph=True)
print(f"grads on Second call: ", x.grad, y.grad, z.grad) # PyTorch accumulates the gradients (실제학습에서는 optimizer를 

grads on First call:  tensor(4.) tensor(6.) tensor(3.)
grads on Second call:  tensor(8.) tensor(12.) tensor(6.)
