In [1]:
# https://github.com/deep-learning-with-pytorch/dlwpt-code/blob/master/p1ch3/1_tensors.ipynb

# Suboptimal to use python list to store the vector
1. Numbers in python are full fledged objects.
![figure](pictures/tensor_unboxed.png)
2. Lists in Python are meant for sequential collection of objects. (No operations are defined, no memory optimization and 1 dimensional)
3. Python interpreter is slow compared with optimized, compiled code.

In [5]:
a = [1.0, 2.0, 1.0]

# access an element
print(a[0])
# assign a new value
a[2] = 3.0
print(a)

1.0
[1.0, 2.0, 3.0]


# 2.1 Introduce dedicated data structures: PyTorch tensors

In [6]:
import torch

In [8]:
a = torch.ones(3)

print(a)
print(a[1])
print(float(a[1]))
a[2] = 2.0
print(a)

tensor([1., 1., 1.])
tensor(1.)
1.0
tensor([1., 1., 2.])


In [13]:
# overwrite those zeros with the values you want
points = torch.zeros(6)
points[0] = 1.0
points[1] = 4.0
points[2] = 2.0
points[3] = 1.0
points[4] = 3.0
points[5] = 5.0

#  pass a list 
points = torch.tensor([1.0, 4.0, 2.0, 1.0, 3.0, 5.0])
print(points)
print(float(points[0]), float(points[1]))

tensor([1., 4., 2., 1., 3., 5.])
1.0 4.0


In [20]:
# pass a list of list
points = torch.tensor([[1.0, 4.0], [2.0, 1.0], [3.0, 5.0]])
print(points)
print(points.shape)
print(points.size())

# initialize a tensor
points = torch.zeros(3, 2) 
print(points)

# access elements
points = torch.FloatTensor([[1.0, 4.0], [2.0, 1.0], [3.0, 5.0]]) 
print(points)
print(points[0,1])
print(points[0])

tensor([[1., 4.],
        [2., 1.],
        [3., 5.]])
torch.Size([3, 2])
torch.Size([3, 2])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[1., 4.],
        [2., 1.],
        [3., 5.]])
tensor(4.)
tensor([1., 4.])


# 2.2 Tensors and storages
- A storage is a one-dimensional array of numerical data, such as a contiguous block of memory containing numbers of a given type, perhaps a float or int32.
- Multiple tensors can index the same storage even if they index into the data differ- ently. 
![figure](pictures/storage.png)

In [34]:
points = torch.tensor([[1.0, 4.0], [2.0, 1.0], [3.0, 5.0]])

# access
points_storage = points.storage() 
print(points_storage)
print(points_storage[0])
print(points.storage()[1])

 1.0
 4.0
 2.0
 1.0
 3.0
 5.0
[torch.FloatStorage of size 6]
1.0
4.0


In [24]:
# replace elements
points = torch.tensor([[1.0, 4.0], [2.0, 1.0], [3.0, 5.0]]) 
points_storage = points.storage()
points_storage[0] = 2.0
print(points)

tensor([[2., 4.],
        [2., 1.],
        [3., 5.]])


# 2.3 Size, storage offset, and strides
- Size: tuple indicating how many elements across each dimension the tensor represents.
- Storage offset: index in the storage that corresponds to the first elements in the tensor.
![figure](pictures/size_storage_stride.png)
- Stride: number of elements in the storage that need to be skipped to obtain the next element along each dimension.

In [35]:
points = torch.tensor([[1.0, 4.0], [2.0, 1.0], [3.0, 5.0]]) 
second_point = points[1]
print(second_point.storage_offset())

print(second_point)
print(second_point.size())
print(second_point.shape)
print(torch.Size([2]))

print(points.stride())
print(second_point.stride())

2
tensor([2., 1.])
torch.Size([2])
torch.Size([2])
torch.Size([2])
(2, 1)
(1,)


In [38]:
points = torch.tensor([[1.0, 4.0], [2.0, 1.0], [3.0, 5.0]]) 
# repalce value
second_point = points[1]
second_point[0] = 10.0
print(points)

# cannot replace value
second_point = points[1].clone()
second_point[0] = 10.0
print(points)

tensor([[ 1.,  4.],
        [10.,  1.],
        [ 3.,  5.]])
tensor([[ 1.,  4.],
        [10.,  1.],
        [ 3.,  5.]])


In [45]:
# transpose
points = torch.tensor([[1.0, 4.0], [2.0, 1.0], [3.0, 5.0]]) 
print(points)

points_t = points.t()
print(points_t)

# verify two tensors share storage
print(id(points.storage()) == id(points_t.storage()))
print(points.storage())
print(points_t.storage())
# differ
print(points.stride(), points_t.stride())

tensor([[1., 4.],
        [2., 1.],
        [3., 5.]])
tensor([[1., 2., 3.],
        [4., 1., 5.]])
True
 1.0
 4.0
 2.0
 1.0
 3.0
 5.0
[torch.FloatStorage of size 6]
 1.0
 4.0
 2.0
 1.0
 3.0
 5.0
[torch.FloatStorage of size 6]
(2, 1) (1, 2)


In [53]:
print(points_t)
# obtain a new contiguous tensor from a noncontiguous one by using the con- tiguous method
# the content of the tensor stays the same, but the stride changes, as does the storage
points_t_cont = points_t.contiguous() 
print(points_t_cont)
print(points_t_cont.stride())

# storage has been reshuffled
print(points_t_cont.storage())

tensor([[1., 2., 3.],
        [4., 1., 5.]])
tensor([[1., 2., 3.],
        [4., 1., 5.]])
(3, 1)
 1.0
 2.0
 3.0
 4.0
 1.0
 5.0
[torch.FloatStorage of size 6]


# 2.4 Numeric types
![figure](pictures/dtypes.png)

In [61]:
double_points = torch.ones(10, 2, dtype=torch.double) 
short_points = torch.tensor([[1, 2], [3, 4]], dtype=torch.short)

print(double_points.dtype)
print(short_points.dtype)

# or
double_points = torch.zeros(10, 2).double() 
short_points = torch.ones(10, 2).short()

# convert
double_points = torch.zeros(10, 2).to(torch.double) 
short_points = torch.ones(10, 2).to(dtype=torch.short)

torch.float64
torch.int16
torch.float64
torch.int16


# 2.5 Indexing tensors

# 2.6 Numpy interoperability

In [64]:
points = torch.ones(3, 4)
# tensor -> numpy (saved in CPU)
points_np = points.numpy() 
print(points_np)

# numpy -> tensor
points = torch.from_numpy(points_np)
print(points)

[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]
tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])


# 2.7 Serializing tensors

PyTorch uses pickle under the hood to serialize the tensor object, as well as dedicated serialization code for the storage. 

In [None]:
# save
torch.save(points, '../data/p1ch3/ourpoints.t')
with open('../data/p1ch3/ourpoints.t','wb') as f:
    torch.save(points, f)
    
# read   
points = torch.load('../data/p1ch3/ourpoints.t')
with open('../data/p1ch3/ourpoints.t','rb') as f:
    points = torch.load(f)



In [None]:
# h5py (save your points tensor by converting it to a NumPy array)
# save
f = h5py.File('../data/p1ch3/ourpoints.hdf5', 'w') 
dset = f.create_dataset('coords', data=points.numpy()) 
f.close()

# read
f = h5py.File('../data/p1ch3/ourpoints.hdf5', 'r') dset = f['coords']
last_points = dset[1:]

# Moving tensors to GPU

In [None]:
points_gpu = torch.tensor([[1.0, 4.0], [2.0, 1.0], [3.0, 4.0]],
device='cuda')

points_gpu = points.to(device='cuda')
points_gpu = points.to(device='cuda:0')

In [None]:
points = 2 * points
points_gpu = 2 * points.to(device='cuda')
points_gpu = points_gpu + 4
points_cpu = points_gpu.to(device='cpu')

points_gpu = points.cuda()
points_gpu = points.cuda(0)
points_cpu = points_gpu.cpu()