In [1]:
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Tensors: Multidimensional arrays
# From Python lists to PyTorch tensors
# Let’s see list indexing in action so we can compare it to tensor indexing. Take a list
# of three numbers in Python
a = [1.0, 2.0, 3.0]
# We can access the first element of the list using the corresponding zero-based index:
a[0]

1.0

In [3]:
# Let’s construct our first PyTorch tensor and see what it looks like. It won’t be a particularly meaningful tensor for now, just three ones in a column:
a = torch.ones(3)
a

tensor([1., 1., 1.])

In [4]:
a[2]

tensor(1.)

In [5]:
float(a[1])

1.0

In [6]:
a[2] = 3.0
a

tensor([1., 1., 3.])

In [7]:
# Although on the surface this example doesn’t differ
# much from a list of number objects, under the hood things are completely different.
# Python lists or tuples of numbers are collections of Python objects that are individually
# allocated in memory. PyTorch tensors or NumPy
# arrays, on the other hand, are views over (typically) contiguous memory blocks containing unboxed C numeric types rather than Python objects. 

In [8]:
# Say we have a list of coordinates we’d like to use to represent a geometrical object: perhaps a 2D triangle with vertices at coordinates (4, 1), (5, 3), and (2, 1). 
# Instead of having coordinates as numbers in a Python list, as we did earlier, we can use a one-dimensional tensor by storing Xs in the even indices 
# and Ys in the odd indices,
# like this:
points = torch.zeros(6)
points

tensor([0., 0., 0., 0., 0., 0.])

In [9]:
points[0], points[1],points[2], points[3], points[4], points[5] = [4, 1, 5, 3, 2, 1]

In [10]:
points

tensor([4., 1., 5., 3., 2., 1.])

In [11]:
# We can also pass a Python list to the constructor, to the same effect:
points = torch.tensor([4, 1, 5, 3, 2, 1])
points

tensor([4, 1, 5, 3, 2, 1])

In [12]:
# To get the coordinates of the first point, we do the following:
float(points[0]), float(points[1])

(4.0, 1.0)

In [13]:
# This is OK, although it would be practical to have the first index refer to individual 2D
# points rather than point coordinates. For this, we can use a 2D tensor:
points = torch.tensor([[4.0, 1.0], [5.0, 3.0], [2.0, 1.0]])
points

tensor([[4., 1.],
        [5., 3.],
        [2., 1.]])

In [14]:
points[0]

tensor([4., 1.])

In [15]:
# Here, we pass a list of lists to the constructor. We can ask the tensor about its shape:
points.shape

torch.Size([3, 2])

In [16]:
# we can access an individual element in the tensor using two indices:
points[0, 0], points[1,1], points[2, 0]

(tensor(4.), tensor(3.), tensor(2.))

In [17]:
# Indexing tensors
# What if we need to obtain a tensor containing all points but the first? That’s easy using
# range indexing notation, which also applies to standard Python lists. Here’s a
# reminder
somelist = list(range(6))
somelist

[0, 1, 2, 3, 4, 5]

In [18]:
somelist[1:]

[1, 2, 3, 4, 5]

In [19]:
somelist[1:4:2]   # From element 1 inclusive to element 4 exclusive, in steps of 2

[1, 3]

In [20]:
# we can use the same notation for PyTorch tensors, with the added
# benefit that, just as in NumPy and other Python scientific libraries, we can use range
# indexing for each of the tensor’s dimensions:
points

tensor([[4., 1.],
        [5., 3.],
        [2., 1.]])

In [21]:
points[1:, 0] # all rows after the first, first column

tensor([5., 2.])

In [22]:
points[None]  # add a dimension of size 1 - just like unsqueeze

tensor([[[4., 1.],
         [5., 3.],
         [2., 1.]]])

In [23]:
# Named tensors
# The dimensions (or axes) of our tensors usually index something like pixel locations
# or color channels. This means when we want to index into a tensor, we need to
# remember the ordering of the dimensions and write our indexing accordingly
# imagine that we have a 3D tensor like img_t from section
# 2.1.4 (we will use dummy data for simplicity here), and we want to convert it to grayscale
img_t = torch.randn(3, 5, 5)   # shape [channels, rows, columns]
img_t

tensor([[[-0.3548,  1.1896, -1.1210, -0.5517,  0.9556],
         [ 1.0760, -1.6869,  0.2269,  0.4788,  0.4645],
         [ 0.8012,  0.5099,  0.5212,  0.4574,  0.1634],
         [ 0.5559,  0.1012,  0.0905,  1.7435,  2.0226],
         [-1.0296, -0.2199, -0.4711, -0.2198,  2.6266]],

        [[-0.7974, -1.3237,  1.6206,  0.1641,  0.9851],
         [ 0.0720,  0.1864,  1.6281,  0.5755,  1.5257],
         [ 0.3105, -0.4134,  0.5031,  0.5214, -0.6756],
         [ 1.7749,  2.1326, -0.2099, -1.2695,  0.8215],
         [-0.7103, -2.3042, -1.1936,  2.2267, -0.6394]],

        [[-0.4040, -0.1682, -0.1198, -1.8118,  2.7900],
         [ 1.1330, -0.5533,  0.7714, -1.3545, -0.5088],
         [ 2.4578,  1.5919, -0.5967,  0.6212,  0.2458],
         [-0.0823,  0.1477, -1.1103, -0.1843, -0.4339],
         [-1.8364,  0.4226,  0.2673,  0.0290, -0.6807]]])

In [24]:
batch_t = torch.randn(2, 3, 5, 5) # shape [batch, channels, rows, columns]
batch_t

tensor([[[[ 1.0356, -1.0156, -1.6433, -0.2836,  1.7230],
          [ 1.2525,  0.4818, -0.4201, -0.1181, -0.2859],
          [ 1.8440,  1.8716, -1.3844, -1.1459, -0.7177],
          [-1.1183,  0.0586, -0.5382, -0.7843, -0.2889],
          [-0.5446,  0.1936,  0.7380,  1.1355,  0.8134]],

         [[ 0.3302,  0.1876,  0.0488,  1.1624, -1.0594],
          [ 0.1395, -0.0300, -0.6096,  0.5992, -1.7151],
          [ 0.1968, -0.4802,  0.8737,  1.0496, -1.1073],
          [ 0.4446,  0.4609, -0.5842, -2.2817,  0.0064],
          [-2.4679, -1.0416, -0.6903, -0.0493,  0.7675]],

         [[-0.2497, -0.1667, -2.2239, -0.5858, -0.1180],
          [ 0.1467, -1.1797, -0.6866, -0.4683,  0.2237],
          [-1.3110,  0.0627, -0.7885,  1.3973,  0.5915],
          [ 0.5110,  2.8850,  1.3243, -1.1376, -0.9300],
          [ 0.8158, -1.9224, -0.2683, -0.8228,  0.0860]]],


        [[[ 2.2045, -0.7154,  0.2302, -1.2521, -1.0561],
          [ 0.1737,  0.6896,  1.1324,  0.8446,  2.5281],
          [-1.0914,  0.

In [25]:
# So sometimes the RGB channels are in dimension 0, and sometimes they are in dimension 1. But we can generalize by counting from the end:
# they are always in dimension –3, the third from the end.

In [26]:
# The lazy, unweighted mean can thus be written as follows:
img_gray_naive = img_t.mean(-3)
batch_gray_naive = batch_t.mean(-3)
img_gray_naive.shape, batch_gray_naive.shape

(torch.Size([5, 5]), torch.Size([2, 5, 5]))

In [27]:
weights = torch.tensor([0.2126, 0.7152, 0.0722])
weights.shape

torch.Size([3])

In [28]:
unsqueeze_weights = weights.unsqueeze(-1).unsqueeze_(-1)

In [29]:
img_weights = (img_t * unsqueeze_weights)
batch_weights = (batch_t * unsqueeze_weights)
img_gray_weighted = img_weights.sum(-3)
batch_gray_weighted = batch_weights.sum(-3)

In [30]:
batch_weights.shape, batch_t.shape, unsqueeze_weights.shape

(torch.Size([2, 3, 5, 5]), torch.Size([2, 3, 5, 5]), torch.Size([3, 1, 1]))

In [31]:
# Because this gets messy quickly—and for the sake of efficiency—the PyTorch function
# einsum (adapted from NumPy) specifies an indexing mini-language2 giving index
# names to dimensions for sums of such products. As often in Python, broadcasting—a
# form of summarizing unnamed things—is done using three dots '…'; but don’t worry
# too much about einsum, because we will not use it in the following
img_gray_weighted_fancy = torch.einsum('...chw,c->...hw', img_t, weights)
batch_gray_weighted_fancy = torch.einsum('...chw,c->...hw', batch_t, weights)
batch_gray_weighted_fancy.shape

torch.Size([2, 5, 5])

In [32]:
# This is error-prone, especially when the locations where tensors are created and used are far apart in our code.
# PyTorch 1.3 added named tensors as an experimental feature (see https://pytorch
# .org/tutorials/intermediate/named_tensor_tutorial.html and https://pytorch.org/
# docs/stable/named_tensor.html). Tensor factory functions such as tensor and rand
# take a names argument. The names should be a sequence of strings:
weighted_name = torch.tensor([0.2126, 0.7152, 0.0722], names=['channels'])
weighted_name

  weighted_name = torch.tensor([0.2126, 0.7152, 0.0722], names=['channels'])


tensor([0.2126, 0.7152, 0.0722], names=('channels',))

In [33]:
# When we already have a tensor and want to add names (but not change existing
# ones), we can call the method refine_names on it. Similar to indexing, the ellipsis (…)
# allows you to leave out any number of dimensions
img_named = img_t.refine_names(..., 'channels', 'rows', 'columns')
batch_names = batch_t.refine_names(..., 'channels', 'rows', 'columns')
print('img named: ', img_named.shape, img_named.names)
print("batch named: ", batch_names.shape, batch_names.names)

img named:  torch.Size([3, 5, 5]) ('channels', 'rows', 'columns')
batch named:  torch.Size([2, 3, 5, 5]) (None, 'channels', 'rows', 'columns')


In [34]:
weighted_name.shape, 

(torch.Size([3]),)

In [35]:
# For operations with two inputs, in addition to the usual dimension checks—whether
# sizes are the same, or if one is 1 and can be broadcast to the other—PyTorch will now
# check the names for us. So far, it does not automatically align dimensions, so we need
# to do this explicitly. The method align_as returns a tensor with missing dimensions
# added and existing ones permuted to the right order:
weights_aligned = weighted_name.align_as(img_named)
weights_aligned.shape, weights_aligned.names

(torch.Size([3, 1, 1]), ('channels', 'rows', 'columns'))

In [36]:
# Functions accepting dimension arguments, like sum, also take named dimensions:
gray_named = (img_named * weights_aligned).sum('channels')
gray_named

tensor([[-0.6749, -0.7059,  0.9121, -0.1308,  1.1092],
        [ 0.3620, -0.2653,  1.2684,  0.4156,  1.1532],
        [ 0.5699, -0.0723,  0.4275,  0.5150, -0.4307],
        [ 1.3816,  1.5574, -0.2110, -0.5506,  0.9862],
        [-0.8595, -1.6642, -0.9345,  1.5479,  0.0520]],
       names=('rows', 'columns'))

In [37]:
gray_named.shape

torch.Size([5, 5])

In [38]:
img_named

tensor([[[-0.3548,  1.1896, -1.1210, -0.5517,  0.9556],
         [ 1.0760, -1.6869,  0.2269,  0.4788,  0.4645],
         [ 0.8012,  0.5099,  0.5212,  0.4574,  0.1634],
         [ 0.5559,  0.1012,  0.0905,  1.7435,  2.0226],
         [-1.0296, -0.2199, -0.4711, -0.2198,  2.6266]],

        [[-0.7974, -1.3237,  1.6206,  0.1641,  0.9851],
         [ 0.0720,  0.1864,  1.6281,  0.5755,  1.5257],
         [ 0.3105, -0.4134,  0.5031,  0.5214, -0.6756],
         [ 1.7749,  2.1326, -0.2099, -1.2695,  0.8215],
         [-0.7103, -2.3042, -1.1936,  2.2267, -0.6394]],

        [[-0.4040, -0.1682, -0.1198, -1.8118,  2.7900],
         [ 1.1330, -0.5533,  0.7714, -1.3545, -0.5088],
         [ 2.4578,  1.5919, -0.5967,  0.6212,  0.2458],
         [-0.0823,  0.1477, -1.1103, -0.1843, -0.4339],
         [-1.8364,  0.4226,  0.2673,  0.0290, -0.6807]]],
       names=('channels', 'rows', 'columns'))

In [39]:
img_named.shape

torch.Size([3, 5, 5])

In [40]:
img_named[..., :3]

tensor([[[-0.3548,  1.1896, -1.1210],
         [ 1.0760, -1.6869,  0.2269],
         [ 0.8012,  0.5099,  0.5212],
         [ 0.5559,  0.1012,  0.0905],
         [-1.0296, -0.2199, -0.4711]],

        [[-0.7974, -1.3237,  1.6206],
         [ 0.0720,  0.1864,  1.6281],
         [ 0.3105, -0.4134,  0.5031],
         [ 1.7749,  2.1326, -0.2099],
         [-0.7103, -2.3042, -1.1936]],

        [[-0.4040, -0.1682, -0.1198],
         [ 1.1330, -0.5533,  0.7714],
         [ 2.4578,  1.5919, -0.5967],
         [-0.0823,  0.1477, -1.1103],
         [-1.8364,  0.4226,  0.2673]]], names=('channels', 'rows', 'columns'))

In [41]:
weighted_name

tensor([0.2126, 0.7152, 0.0722], names=('channels',))

In [42]:
# If we try to combine dimensions with different names, we get an error:
gray_named = (img_named * weighted_name).sum('channels')
gray_named

RuntimeError: Error when attempting to broadcast dims ['channels', 'rows', 'columns'] and dims ['channels']: dim 'columns' and dim 'channels' are at the same position from the right but do not match.

In [None]:
gray_named

In [43]:
# If we want to use tensors outside functions that operate on named tensors, we need to
# drop the names by renaming them to None. The following gets us back into the world
# of unnamed dimensions
gray_plain = gray_named.rename(None)
gray_plain.shape, gray_plain.names

(torch.Size([5, 5]), (None, None))

In [44]:
# Managing a tensor’s dtype attribute
# In order to allocate a tensor of the right numeric type, we can specify the proper
# dtype as an argument to the constructor. For example:
double_points = torch.ones(10, 2, dtype=torch.double)

In [45]:
double_points.dtype

torch.float64

In [46]:
short_points = torch.tensor([[1,2], [3,4]], dtype=torch.short)
short_points.dtype

torch.int16

In [47]:
# We can also cast the output of a tensor creation function to the right type using the
# corresponding casting method, such as
double_points = torch.zeros(10,2).double()
double_points

tensor([[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]], dtype=torch.float64)

In [48]:
short_points = torch.zeros(10,5).short()
short_points

tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]], dtype=torch.int16)

In [49]:
# or the more convenient to method:
double_points = torch.zeros(10,2).to(torch.double)
double_points

tensor([[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]], dtype=torch.float64)

In [50]:
short_points = torch.zeros(10,5).to(torch.short)
short_points

tensor([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]], dtype=torch.int16)

In [51]:
# When mixing input types in operations, the inputs are converted to the larger type
# automatically. Thus, if we want 32-bit computation, we need to make sure all our
# inputs are (at most) 32-bit:
points_64 = torch.rand(5).double()
points_64

tensor([0.9175, 0.4239, 0.6690, 0.5684, 0.1272], dtype=torch.float64)

In [52]:
points_short = points_64.short()
points_short

tensor([0, 0, 0, 0, 0], dtype=torch.int16)

In [53]:
points_64 * points_short

tensor([0., 0., 0., 0., 0.], dtype=torch.float64)

In [54]:
# The tensor API

# the vast majority of operations on and between tensors are available in the
# torch module and can also be called as methods of a tensor object. For instance, the
# transpose function we encountered earlier can be used from the torch module
a = torch.rand(3,2)
a

tensor([[0.4221, 0.2350],
        [0.3681, 0.7925],
        [0.5724, 0.1722]])

In [55]:
a_t = torch.transpose(a, 0, 1)
a_t

tensor([[0.4221, 0.3681, 0.5724],
        [0.2350, 0.7925, 0.1722]])

In [56]:
# or as a method of the a tensor:
a = torch.rand(3,2).double()
a

tensor([[0.2951, 0.2059],
        [0.8454, 0.0831],
        [0.7378, 0.4983]], dtype=torch.float64)

In [57]:
a_t = a.transpose(0,1)
a_t

tensor([[0.2951, 0.8454, 0.7378],
        [0.2059, 0.0831, 0.4983]], dtype=torch.float64)

In [58]:
# Tensors: Scenic views of storage
# Indexing into storage
# Let’s see how indexing into the storage works in practice with our 2D points. The storage for a given tensor is accessible using the 
# .storage property:
points = torch.tensor([[4.0, 1.0], [5.0, 3.0], [2.0, 1.0]])
points

tensor([[4., 1.],
        [5., 3.],
        [2., 1.]])

In [59]:
points.storage()

 4.0
 1.0
 5.0
 3.0
 2.0
 1.0
[torch.storage._TypedStorage(dtype=torch.float32, device=cpu) of size 6]

In [60]:
points[0][1]

tensor(1.)

In [61]:
# Even though the tensor reports itself as having three rows and two columns, the storage under the hood is a contiguous array of size 6. In this sense, the tensor just knows
# how to translate a pair of indices into a location in the storage.
# We can also index into a storage manually. For instance:
points_storage = points.storage()
points_storage[-1]

1.0

In [62]:
points.storage()[2]

5.0

In [63]:
# We can’t index a storage of a 2D tensor using two indices. The layout of a storage is
# always one-dimensional, regardless of the dimensionality of any and all tensors that
# might refer to it
points = torch.tensor([[4.0, 1.0], [5.0, 3.0], [2.0, 1.0]])
points_storage = points.storage()
points_storage[0] = 2
points_storage, points

( 2.0
  1.0
  5.0
  3.0
  2.0
  1.0
 [torch.storage._TypedStorage(dtype=torch.float32, device=cpu) of size 6],
 tensor([[2., 1.],
         [5., 3.],
         [2., 1.]]))

In [64]:
# a small
# number of operations exist only as methods of the Tensor object. They are recognizable from a trailing underscore in their name, like zero_, 
# which indicates that the method operates in place by modifying the input instead of creating a new output tensor
# and returning it. For instance, the zero_ method zeros out all the elements of the input.
# Any method without the trailing underscore leaves the source tensor unchanged and
# instead returns a new tensor:
a = torch.ones(3,2)
a

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])

In [65]:
a.zero_()
a

tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])

In [66]:
# Tensor metadata: Size, offset, and stride
# The storage offset is the index in the storage corresponding to the first element in the tensor.
# The stride is the number of elements in the storage that need to be skipped over to
# obtain the next element along each dimension
# Views of another tensor’s storage
# We can get the second point in the tensor by providing the corresponding index:
points = torch.tensor([[4.0, 1.0], [5.0, 3.0], [2.0, 1.0]])
points

tensor([[4., 1.],
        [5., 3.],
        [2., 1.]])

In [67]:
second_point = points[1]
second_point.storage_offset()

2

In [68]:
# The resulting tensor has offset 2 in the storage (since we need to skip the first point,
# which has two items)

In [69]:
# size is an instance of the Size class containing one element, since the tensor is one-dimensional. It’s important to note that this is the
# same information contained in the shape property of tensor objects
second_point.size(), second_point.shape

(torch.Size([2]), torch.Size([2]))

In [70]:
points.stride()

(2, 1)

In [71]:
# The stride is a tuple indicating the number of elements in the storage that have to be
# skipped when the index is increased by 1 in each dimension. For instance, our points
# tensor has a stride of (2, 1)
points.stride()

(2, 1)

In [72]:
# here if we print the points:
points

tensor([[4., 1.],
        [5., 3.],
        [2., 1.]])

In [73]:
# we can see the to go from 4 to 5 -( from first row to second) we have to skip 2 numbers, that is why the first number is 2
# as for 1, that is column wise, to go from column 1 to column 2, we dont have to skip any number. That is why it is 1

In [74]:
# Accessing an element i, j in a 2D tensor results in accessing the storage_offset +
# stride[0] * i + stride[1] * j element in the storage. The offset will usually be
# zero; if this tensor is a view of a storage created to hold a larger tensor, the offset might
# be a positive value.
# This indirection between Tensor and Storage makes some operations inexpensive, like transposing a tensor or extracting a subtensor, because they do not lead to
# memory reallocations. Instead, they consist of allocating a new Tensor object with a
# different value for size, storage offset, or stride.
# We already extracted a subtensor when we indexed a specific point and saw the
# storage offset increasing. Let’s see what happens to the size and stride as well:

In [75]:
second_point = points[1]
second_point.size()

torch.Size([2])

In [77]:
second_point.storage_offset()

2

In [78]:
second_point.stride()

(1,)

In [79]:
# The bottom line is that the subtensor has one less dimension, as we would expect,
# while still indexing the same storage as the original points tensor. This also means
# changing the subtensor will have a side effect on the original tensor:
points = torch.tensor([[4.0, 1.0], [5.0, 3.0], [2.0, 1.0]])
second_point = points[1]
second_point

tensor([5., 3.])

In [80]:
second_point[0] = 10.0
second_point

tensor([10.,  3.])

In [81]:
points

tensor([[ 4.,  1.],
        [10.,  3.],
        [ 2.,  1.]])

In [82]:
# This might not always be desirable, so we can eventually clone the subtensor into a
# new tensor:
points = torch.tensor([[4.0, 1.0], [5.0, 3.0], [2.0, 1.0]])
second_point = points[1].clone()
second_point

tensor([5., 3.])

In [83]:
second_point[0] = 10
second_point

tensor([10.,  3.])

In [84]:
points

tensor([[4., 1.],
        [5., 3.],
        [2., 1.]])

In [85]:
# Transposing without copying
# Let’s take our points tensor, which has individual points in
# the rows and X and Y coordinates in the columns, and turn it around so that individual points are in the columns. We take this opportunity to introduce the t function, a
# shorthand alternative to transpose for two-dimensional tensors:

In [9]:
points = torch.tensor([[4.0, 1.0], [5.0, 3.0], [2.0, 1.0]])
points

tensor([[4., 1.],
        [5., 3.],
        [2., 1.]])

In [89]:
# taking transponse and storing it in the variable
points_t = points.t()
points_t

tensor([[4., 5., 2.],
        [1., 3., 1.]])

In [92]:
# We can easily verify that the two tensors share the same storage
id(points.storage()), id(points_t.storage())

(2558251755472, 2558251755472)

In [95]:
# they differ only in shape and stride:
points.stride(), points_t.stride()

((2, 1), (1, 2))

In [98]:
points[1, 0]

tensor(5.)

In [103]:
# Transposing in higher dimensions
# Transposing in PyTorch is not limited to matrices. We can transpose a multidimensional array 
# by specifying the two dimensions along which transposing (flipping shape
# and stride) should occur:
some_t = torch.ones(3,4,5)
some_t

tensor([[[1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.]],

        [[1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.]],

        [[1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.]]])

In [104]:
some_t.shape

torch.Size([3, 4, 5])

In [107]:
transpose_t = some_t.transpose(0,1)
transpose_t

tensor([[[1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.]],

        [[1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.]],

        [[1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.]],

        [[1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.]]])

In [108]:
transpose_t.shape

torch.Size([4, 3, 5])

In [109]:
some_t.stride(), transpose_t.stride()

((20, 5, 1), (5, 20, 1))

In [110]:
# Contiguous tensors
# Some tensor operations in PyTorch only work on contiguous tensors, such as view,
# which we’ll encounter in the next chapter. In that case, PyTorch will throw an informative exception 
# and require us to call contiguous explicitly. It’s worth noting that
# calling contiguous will do nothing (and will not hurt performance) if the tensor is
# already contiguous.
# In our case, points is contiguous, while its transpose is not:
points.is_contiguous(), points_t.is_contiguous()

(True, False)

In [111]:
# We can obtain a new contiguous tensor from a non-contiguous one using the contiguous method. 
# The content of the tensor will be the same, but the stride will change, as
# will the storage:
points = torch.tensor([[4.0, 1.0], [5.0, 3.0], [2.0, 1.0]])
points_t = points.t()
points, points_t

(tensor([[4., 1.],
         [5., 3.],
         [2., 1.]]),
 tensor([[4., 5., 2.],
         [1., 3., 1.]]))

In [117]:
points_t.storage(), points.storage(), points.stride(), points_t.stride()

( 4.0
  1.0
  5.0
  3.0
  2.0
  1.0
 [torch.storage._TypedStorage(dtype=torch.float32, device=cpu) of size 6],
  4.0
  1.0
  5.0
  3.0
  2.0
  1.0
 [torch.storage._TypedStorage(dtype=torch.float32, device=cpu) of size 6],
 (2, 1),
 (1, 2))

In [118]:
points_t_cont = points_t.contiguous()
points_t_cont

tensor([[4., 5., 2.],
        [1., 3., 1.]])

In [119]:
points_t_cont.stride()

(3, 1)

In [120]:
points_t_cont.storage()

 4.0
 5.0
 2.0
 1.0
 3.0
 1.0
[torch.storage._TypedStorage(dtype=torch.float32, device=cpu) of size 6]

In [8]:
# Moving tensors to the GPU
# PyTorch tensors also can be stored on a different kind of processor: a graphics
# processing unit (GPU). Every PyTorch tensor can be transferred to (one of) the
# GPU(s) in order to perform massively parallel, fast computations. All 
# operations that will be performed on the tensor will be carried out using 
# GPU-specific routines that come with PyTorch.
# Managing a tensor’s device attribute
# In addition to dtype, a PyTorch Tensor also has the notion of device, which is
# where on the computer the tensor data is placed. Here is how we can create a 
# tensor on the GPU by specifying the corresponding argument to the constructor:
# points_gpu = torch.tensor([[4, 1], [5, 3], [2, 1]], device='cuda').float()
# points_gpu      # here the cuda is not installed, it will throw error

In [11]:
# we can also create a copy of a tensor created on cpu on gpu
# points_gpu = points.to(device='cuda')

In [12]:
# If our machine has more than one GPU, we can also decide on which GPU we 
# allocate the tensor by passing a zero-based integer identifying the GPU on 
# the machine, such as
# points_gpu = points.to(device='cuda:0') # or 1, 2, 3 -- 


In [16]:
# We can also use the shorthand methods cpu and cuda instead of the to method to
# achieve the same goal:
# points_gpu = points.cuda() # default GPU == 0
# points_gpu = points.cuda(0) # default GPU == 1
# points_cpu = points_gpu.cpu()

In [17]:
# It’s also worth mentioning that by using the to method, we can change the 
# placement and the data type simultaneously by providing both device and 
# dtype as arguments.

In [23]:
# NumPy interoperability
# PyTorch tensors can be converted to NumPy arrays and vice versa very efficientl
# To get a NumPy array out of our points tensor, we just call
points = torch.ones(3,4)
points_np = points.numpy()
points_np

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]], dtype=float32)

In [21]:
# the returned array shares the same underlying buffer with the tensor storage. 
# This means the numpy method can be effectively executed at
# basically no cost, as long as the data sits in CPU RAM.
# It also means modifying the NumPy array will lead to a change in the 
# originating tensor
# Conversely, we can obtain a PyTorch tensor from a NumPy array this way
points = torch.from_numpy(points_np)
points

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [24]:
# While the default numeric type in PyTorch is 32-bit floating-point, for
# NumPy it is 64-bit. As discussed in section 3.5.2, we usually want to use 32-bit
# floating-points, so we need to make sure we have tensors of dtype torch
# .float after converting

In [32]:
# execises
a = torch.tensor(list(range(9)))
a.stride(), a.storage_offset(), a.size()

((1,), 0, torch.Size([9]))

In [38]:
id(a)

2417431613488

In [34]:
b = a.view(3,3)
b

tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])

In [35]:
b.stride(), b.storage_offset(), b.size()

((3, 1), 0, torch.Size([3, 3]))

In [37]:
id(b)

2417313429232

In [46]:
c = b[1:, 1:]
c

tensor([[4, 5],
        [7, 8]])

In [44]:
c.stride(), c.storage_offset(), c.size()

((3, 1), 4, torch.Size([2, 2]))

In [47]:
c.storage()

 0
 1
 2
 3
 4
 5
 6
 7
 8
[torch.storage._TypedStorage(dtype=torch.int64, device=cpu) of size 9]

In [52]:
d = torch.sqrt(a)
d

tensor([0.0000, 1.0000, 1.4142, 1.7321, 2.0000, 2.2361, 2.4495, 2.6458, 2.8284])