In [None]:
# In this notebook, you learn:
#
# 1) What is stride of a tensor?
# 2) How to access underlying tensor storage in PyTorch?
# 3) What is the difference between tensor.data_ptr() and tensor.storage().data_ptr()?

In [23]:
import torch
import ctypes

In [24]:
# Resources to go through before contuning further in this notebook:
#
# 1) https://martinlwx.github.io/en/how-to-reprensent-a-tensor-or-ndarray/
#       -- This blog post explains how a tensor is represented in memory and how are the values accessed.
# 2) https://discuss.pytorch.org/t/contigious-vs-non-contigious-tensor/30107/2
#      -- This forum post explains what is a contiguous tensor and how to make a tensor contiguous.

## [torch.tensor](https://pytorch.org/docs/stable/tensors.html#torch-tensor)

In [39]:
t1 = torch.tensor(data=[[[[0, 1], [2, 3], [4, 5]], [[6, 7], [8, 9], [10, 11]]], [[[12, 13], [14, 15], [16, 17]], [[18, 19], [20, 21], [22, 23]]]], dtype=torch.int64)
print(t1)
print("shape: ", t1.shape)
# Stride is a tuple of integers each of which represents the number of elements in the storage that need to be jumped over 
# to obtain the next element along each dimension.
# To get to the next element:
# 1) In dimension 0, we need to jump 12 elements.
#       -- Imagine you are at 0. 12 is the next element along dimension 0. So, you need to jump 12 elements to get to the next element.
# 2) In dimension 1, we need to jump 6 elements.
#       -- Imagine you are at 0. 6 is the next element along dimension 1. So, you need to jump 6 elements to get to the next element.
# 3) In dimension 2, we need to jump 2 elements.
#       -- Imagine you are at 0. 2 is the next element along dimension 2. So, you need to jump 2 elements to get to the next element.
# 4) In dimension 3, we need to jump 1 element.
#       -- Imagine you are at 0. 1 is the next element along dimension 3. So, you need to jump 1 element to get to the next element.
#
# So, the stride is (12, 6, 2, 1).
print("stride: ", t1.stride())

tensor([[[[ 0,  1],
          [ 2,  3],
          [ 4,  5]],

         [[ 6,  7],
          [ 8,  9],
          [10, 11]]],


        [[[12, 13],
          [14, 15],
          [16, 17]],

         [[18, 19],
          [20, 21],
          [22, 23]]]])
shape:  torch.Size([2, 2, 3, 2])
stride:  (12, 6, 2, 1)


In [40]:
# Contrary to the popular belief, a tensor is not a multi-dimensional array. It is a view 
# of a storage. A storage is a contiguous block of memory. A tensor is a view of this storage.
#
# Tensor can be thought of as an object that saves the information which tells how to view 
# the data in the storage. So, the same storage can be viewed in multiple ways and hence 
# can be represented differently with multiple tensors. 
#
# Example: An array [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] of size 12 can either be viewed 
# as a tensor of shape (2, 6) or a tensor of shape (3, 4). 
#
# However, the operations on one tensor affect the other tensor if they both share the same 
# storage. We will show an example of this below.
t2 = torch.tensor(data=[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]], dtype=torch.int64)
print(t2)
print("shape: ", t2.shape)
print("stride: ", t2.stride())
# Says if the elements in the tensor elements are stored contiguously in memory.
# Note: 'is_contiguous=True' means the elements in the tensor are stored sequentially (continuously)
# within the storage. non-contiguous means the elements in the tensor are not stored
# sequentially within the storage. It has nothing to do with the whether the storage is a 
# continuous block of memory or not. However, I assume the underlying storage is not fragmented 
# but always a continuous block of memory. 
print("is_contiguous: ", t2.is_contiguous())
print("storage: ", t2.storage())

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])
shape:  torch.Size([3, 4])
stride:  (4, 1)
is_contiguous:  True
storage:   0
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
[torch.storage.TypedStorage(dtype=torch.int64, device=cpu) of size 12]


In [41]:
# You might be wondering when will a tensor be non-contiguous. It can happen when you do operations
# like slicing, transposing, etc. Lets see an example of this.
# This is the same as transpose operation in matrices. Rows become columns and columns become rows.
# The underlying storage is not changed. Only the view of the storage is changed which updates the
# stride of the tensor.
t3 = t2.transpose(0, 1)
print(t3)
print("shape: ", t3.shape)
print("stride: ", t3.stride())
# Transpose operation often breaks contiguity.
print("is_continuous: ", t3.is_contiguous())
print("storage: ", t3.storage())

tensor([[ 0,  4,  8],
        [ 1,  5,  9],
        [ 2,  6, 10],
        [ 3,  7, 11]])
shape:  torch.Size([4, 3])
stride:  (1, 4)
is_continuous:  False
storage:   0
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
[torch.storage.TypedStorage(dtype=torch.int64, device=cpu) of size 12]


In [42]:
# The underlying storage for both t2 and t3 is the same. This is because the transposed tensor t3 is a 
# view of the original tensor t2. There is no need to reshape t2 to obtain t3. It is enough just to use
# different strides to have different views of the same storage.
# We will look at how 'view' and 'reshape' work in a different notebook.
#
# tensor.storage().data_ptr() returns the memory address of the first element of the underlying storage.
if t2.storage().data_ptr() == t3.storage().data_ptr():
    print("Both t2 and t3 share the same storage.")
else:
    print("Both t2 and t3 do not share the same storage.")

Both t2 and t3 share the same storage.


In [43]:
# Lets go back to our original 3D tensor t1 and see some of these properties.
# Lets try to access the underlying storage of a tensor.
print("dtype: ", t1.dtype)
# It is a pointer to the first element of the tensor (not the storage - There is a difference).
t1_data_ptr = t1.data_ptr()
print("tensor pointer type: ", type(t1_data_ptr))
print("stride: ", t1.stride())
# The storage is a low level object representing a 1D array of bytes. It is a contiguous 
# block of memory. Pytorch interprets these bytes as a specific data type.
t1_storage = t1.storage()
print("storage: ", t1_storage)
# Gives the size in bytes of an individual element in the storage. torch.int64 is 8 bytes (64 bits).
element_size = t1_storage.element_size()
print("element_size: ", element_size)


dtype:  torch.int64
tensor pointer type:  <class 'int'>
stride:  (12, 6, 2, 1)
storage:   0
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
[torch.storage.TypedStorage(dtype=torch.int64, device=cpu) of size 24]
element_size:  8


In [44]:
# Reading the elements from the underlying storage using pointers (address of the elements). 
# Handling pointers in Python is generally not recommended.
elem_at_index_0 = ctypes.cast(t1_data_ptr, ctypes.POINTER(ctypes.c_int64))[0]
print("element at index 0: ", elem_at_index_0)
index = 4
offset = index * element_size
element_data_ptr = t1_data_ptr + offset
element_at_index_4 = ctypes.cast(element_data_ptr, ctypes.POINTER(ctypes.c_int))[0]
print("element at index 4: ", element_at_index_4)

element at index 0:  0
element at index 4:  4


In [45]:
# There is an important difference between t1.data_ptr() and t1.storage.data_ptr().
# 
# t1.data_ptr() returns the memory address of the first element of the tensor.
# t1.storage().data_ptr() returns the memory address of the first element of the underlying storage.
# 
# The underlying storage is a contiguous block of memory. The tensor is a view of this storage.
# Sometimes, it is possible that the first element in the underlying storage is not the same
# as the first element of the tensor (a view of the storage).

t4 = torch.tensor(data=[[0, 1, 2], [3, 4, 5]], dtype=torch.int64)
print("t4: ", t4)
print("shape: ", t4.shape)
print("stride: ", t4.stride())

t4:  tensor([[0, 1, 2],
        [3, 4, 5]])
shape:  torch.Size([2, 3])
stride:  (3, 1)


In [46]:
t5 = t4[:, 1]
print("t5: ", t5)
print("shape: ", t5.shape)
print("stride: ", t5.stride())

t5:  tensor([1, 4])
shape:  torch.Size([2])
stride:  (3,)


In [47]:
# Notice that the data_ptr addresses are different for t4 and t5. This is because the first element 
# of t4 '0' and the first element of t5 '1' have different starting addresses. 
t4_data_ptr = t4.data_ptr()
print("t4 data pointer: ", t4_data_ptr)
t5_data_ptr = t5.data_ptr()
print("t5 data pointer: ", t5_data_ptr)

t4 data pointer:  94770823539648
t5 data pointer:  94770823539656


In [48]:
# Notice that the storage_data_ptr addresses are same for both t4 and t5. This is because the underlying
# storage is the same for both t4 and t5.
t4_storage_data_ptr = t4.storage().data_ptr()
print("t4 storage data pointer: ", t4_storage_data_ptr)
t5_storage_data_ptr = t5.storage().data_ptr()
print("t5 storage data pointer: ", t5_storage_data_ptr)

t4 storage data pointer:  94770823539648
t5 storage data pointer:  94770823539648
