# 1. Data Manipulation

Update: May 23, 2024

Author: Languisher Lin

In [76]:
import torch

## Tensor

### Basic Attrubutes

In [77]:
### --- Tensor attributes and characteristics --- ###

# Creating tensors prepopulated with values
x = torch.arange(12, dtype=torch.float32)
xx = torch.tensor([[2, 3, 4, 5, 6], [1, 2, 3, 4, 5]])

x, xx

(tensor([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11.]),
 tensor([[2, 3, 4, 5, 6],
         [1, 2, 3, 4, 5]]))

In [85]:
# Number of elements of the tensor
num_elements = x.numel()

# Shape of tensor, first number correspond to the outer layer of the list, 
# (in this case, the number of elements of each column)
xx_shape = xx.shape

num_elements, xx_shape

(12, torch.Size([2, 5]))

In [79]:
# Reshape the shape without altering its size of values
# e.g. (12, ) -> (3, 4)
X = x.reshape(3, 4)

# To automatically infer ONE COMPONENT of the shape
# (obviously it could be at least inferred manually)
Y = x.reshape(-1, 4)

X, Y

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.]]),
 tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.]]))

In [80]:
# Create tensors with all 0s or 1s
zero_tensor = torch.zeros((2, 3, 4))
one_tensor = torch.ones((2, 5))

# Create tensor with random values drawn from 
# a std Gaussian (normal) distrubution with mean 0 and deviation 1
normal_tensor = torch.randn((3, 4))

zero_tensor, one_tensor, normal_tensor

(tensor([[[0., 0., 0., 0.],
          [0., 0., 0., 0.],
          [0., 0., 0., 0.]],
 
         [[0., 0., 0., 0.],
          [0., 0., 0., 0.],
          [0., 0., 0., 0.]]]),
 tensor([[1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.]]),
 tensor([[ 0.7742, -0.8226, -0.0597,  1.4078],
         [-0.8552,  0.1352, -0.8441,  0.6571],
         [-1.4065, -0.5721, -0.1290, -2.4971]]))

In [81]:
# Indexing: Similar to list

# Output the values of the tesnor
last_line = X[-1]
last_two_lines = X[1:3] # 1 and 2 but 3 is not contained

# Assign single elements and multiple elements with the same value
Y = X.clone() # Clone to "deepcopy"
Y[1, 2] = 17 
Y[:2, :] = 12

X, last_line, last_two_lines, Y

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.]]),
 tensor([ 8.,  9., 10., 11.]),
 tensor([[ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.]]),
 tensor([[12., 12., 12., 12.],
         [12., 12., 12., 12.],
         [ 8.,  9., 10., 11.]]))

In [82]:
# Unary Operations: Functions operate like: f: R^p -> R^n
exp_x = torch.exp(x)

# Binary Operations: Between tensors
a = torch.tensor([2, 3])
b = torch.tensor([3, 4])
atimesb = a ** b

# Test for every element if equal or not
aequalb = a == b

x, exp_x, atimesb, aequalb

(tensor([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11.]),
 tensor([1.0000e+00, 2.7183e+00, 7.3891e+00, 2.0086e+01, 5.4598e+01, 1.4841e+02,
         4.0343e+02, 1.0966e+03, 2.9810e+03, 8.1031e+03, 2.2026e+04, 5.9874e+04]),
 tensor([ 8, 81]),
 tensor([False, False]))

In [83]:
# Concatenation multiple tensors
# Concate two matrices along rows: axis 0
# Concate two matrices along columns: axis 1

X = torch.arange(12, dtype=torch.float32).reshape((3, 4))
Y = torch.tensor([[2.0, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])
row_concate = torch.cat((X, Y), dim=0)
col_concate = torch.cat((X, Y), dim=1)

row_concate, col_concate

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.],
         [ 2.,  1.,  4.,  3.],
         [ 1.,  2.,  3.,  4.],
         [ 4.,  3.,  2.,  1.]]),
 tensor([[ 0.,  1.,  2.,  3.,  2.,  1.,  4.,  3.],
         [ 4.,  5.,  6.,  7.,  1.,  2.,  3.,  4.],
         [ 8.,  9., 10., 11.,  4.,  3.,  2.,  1.]]))

In [84]:
# Broadcasting
a = torch.arange(3).reshape((3, 1))
b = torch.arange(2).reshape((1, 2))

# In order to make the shape of a and b becomes identical, broadcasting
# is adopted. That is to say, (3, 1) -> (3, 2) by coping the column
# (1, 2) -> (3, 2) by coping two times the line

a, b, a + b

(tensor([[0],
         [1],
         [2]]),
 tensor([[0, 1]]),
 tensor([[0, 1],
         [1, 2],
         [2, 3]]))

In [91]:
# Memory saving
# Take this as an example: We want to update the value of a certain tensor,
# without allocating a num memory but only covering (or replacing) the original
# data

Z = torch.zeros((3, 4))
id_orig = id(Z)

X = Y = torch.ones((3, 4))
Z[:] = X + Y # By adding a "[:]", we could avoid allocating new memory spaces
id_new = id(Z)

id_orig == id_new

True

### Conversion to other Python objects

In [98]:
X = torch.arange(12, dtype=torch.float32).reshape((3, 4))

# Torch -> Numpy: numpy method
# Numpy -> Torch: from_numpy method
# Attention: Two use cases are DIFFERENT !!!
A = X.numpy()
B = torch.from_numpy(A)

type(A), type(B)

(numpy.ndarray, torch.Tensor)

## References

- [2.1 Data Manipulation](https://d2l.ai/chapter_preliminaries/ndarray.html)