In [3]:
import open3d.core as o3c
import numpy as np

Tensor Creation

In [5]:
# Tensor from list.
a = o3c.Tensor([0, 1, 2])
print("Created from list:\n{}".format(a))

# Tensor from Numpy.
a = o3c.Tensor(np.array([0, 1, 2]))
print("\nCreated from numpy array:\n{}".format(a))

# Dtype and inferred from list.
a_float = o3c.Tensor([0.0, 1.0, 2.0])
print("\nDefault dtype and device:\n{}".format(a_float))

# Specify dtype.
a = o3c.Tensor(np.array([0, 1, 2]), dtype=o3c.Dtype.Float64)
print("\nSpecified data type:\n{}".format(a))

# # Specify device. : CUDA not installed yet
# a = o3c.Tensor(np.array([0, 1, 2]), device=o3c.Device("CUDA:0"))
# print("\nSpecified device:\n{}".format(a))

Created from list:
[0 1 2]
Tensor[shape={3}, stride={1}, Int64, CPU:0, 0x364d680]

Created from numpy array:
[0 1 2]
Tensor[shape={3}, stride={1}, Int64, CPU:0, 0x3921db0]

Default dtype and device:
[0 1 2]
Tensor[shape={3}, stride={1}, Float64, CPU:0, 0x35f45d0]

Specified data type:
[0 1 2]
Tensor[shape={3}, stride={1}, Float64, CPU:0, 0x39cbe30]


In [6]:
# Shallow copy constructor.
vals = np.array([1, 2, 3])
src = o3c.Tensor(vals)
dst = src
src[0] += 10

# Changes in one will get reflected in other.
print("Source tensor:\n{}".format(src))
print("\nTarget tensor:\n{}".format(dst))
# Stride is the jump necessary to go from one element to the next one in the specified dimension dim 

Source tensor:
[11 2 3]
Tensor[shape={3}, stride={1}, Int64, CPU:0, 0x364f280]

Target tensor:
[11 2 3]
Tensor[shape={3}, stride={1}, Int64, CPU:0, 0x364f280]


Properties of a Tensor

In [13]:
vals = np.array((range(24))).reshape(2, 3, 4) # generates sequence of integers from 0-23 and converts them into 3D array of shape (2 blocks each having, 3 rows, 4 columns) 
a = o3c.Tensor(vals, dtype=o3c.Dtype.Float64) # converts 3D numpy array into a 64 bit float tensor
print(f"a.shape: {a.shape}") # shape: dimensions of tensor
print(f"a.strides: {a.strides}") # Strides: how many bytes you need to skip to move to the next element along each dimension
# To move to the next element in the first dimension (block), you skip 96 bytes.
# To move to the next element in the second dimension (row), you skip 32 bytes.
# To move to the next element in the third dimension (column), you skip 8 bytes (since each float64 element takes 8 bytes).
print(f"a.dtype: {a.dtype}")
print(f"a.device: {a.device}")
print(f"a.ndim: {a.ndim}")

a.shape: SizeVector[2, 3, 4]
a.strides: SizeVector[12, 4, 1]
a.dtype: Float64
a.device: CPU:0
a.ndim: 3


Type Casting

In [14]:
# E.g. float -> int
a = o3c.Tensor([0.1, 1.5, 2.7])
b = a.to(o3c.Dtype.Int32)
print(a)
print(b)

[0.1 1.5 2.7]
Tensor[shape={3}, stride={1}, Float64, CPU:0, 0x3921db0]
[0 1 2]
Tensor[shape={3}, stride={1}, Int32, CPU:0, 0x3903260]


In [15]:
# E.g. int -> float
a = o3c.Tensor([1, 2, 3])
b = a.to(o3c.Dtype.Float32)
print(a)
print(b)

[1 2 3]
Tensor[shape={3}, stride={1}, Int64, CPU:0, 0x38feb80]
[1 2 3]
Tensor[shape={3}, stride={1}, Float32, CPU:0, 0x3721020]


Numpy I/O with direct memory map

In [16]:
# Using constructor.
np_a = np.ones((5,), dtype=np.int32) # creates a 5x1 array, shape (5,) is shorthand for a one-dimensional array
o3_a = o3c.Tensor(np_a)
print(f"np_a: {np_a}")
print(f"o3_a: {o3_a}")
print("")

# Changes to numpy array will not reflect as memory is not shared.
np_a[0] += 100
o3_a[1] += 200
print(f"np_a: {np_a}")
print(f"o3_a: {o3_a}")

np_a: [1 1 1 1 1]
o3_a: [1 1 1 1 1]
Tensor[shape={5}, stride={1}, Int32, CPU:0, 0x39e9cf0]

np_a: [101   1   1   1   1]
o3_a: [1 201 1 1 1]
Tensor[shape={5}, stride={1}, Int32, CPU:0, 0x39e9cf0]


In [17]:
# From numpy.
np_a = np.ones((5,), dtype=np.int32)
o3_a = o3c.Tensor.from_numpy(np_a)

# Changes to numpy array reflects on open3d Tensor and vice versa.
np_a[0] += 100
o3_a[1] += 200
print(f"np_a: {np_a}")
print(f"o3_a: {o3_a}")

np_a: [101 201   1   1   1]
o3_a: [101 201 1 1 1]
Tensor[shape={5}, stride={1}, Int32, CPU:0, 0x35b5ea0]


In [19]:
# To numpy.
o3_a = o3c.Tensor([1, 1, 1, 1, 1], dtype=o3c.Dtype.Int32)
np_a = o3_a.numpy()

# Changes to numpy array reflects on open3d Tensor and vice versa.
np_a[0] += 100
o3_a[1] += 200
print(f"np_a: {np_a}")
print(f"o3_a: {o3_a}")

# For CUDA Tensor, call cpu() before calling numpy().
o3_a = o3c.Tensor([1, 1, 1, 1, 1], device=o3c.Device("CPU:0"))
print(f"\no3_a.cpu().numpy(): {o3_a.cpu().numpy()}")

np_a: [101 201   1   1   1]
o3_a: [101 201 1 1 1]
Tensor[shape={5}, stride={1}, Int32, CPU:0, 0x3522ab0]

o3_a.cpu().numpy(): [1 1 1 1 1]


PyTorch I/O with DLPack memory map

In [20]:
# DLPack is an open in-memory tensor structure for sharing tensors among frameworks.
import torch
import torch.utils.dlpack

# From PyTorch
th_a = torch.ones((5,))
o3_a = o3c.Tensor.from_dlpack(torch.utils.dlpack.to_dlpack(th_a))
print(f"th_a: {th_a}")
print(f"o3_a: {o3_a}")
print("")

# Changes to PyTorch array reflects on open3d Tensor and vice versa
th_a[0] = 100
o3_a[1] = 200
print(f"th_a: {th_a}")
print(f"o3_a: {o3_a}")

th_a: tensor([1., 1., 1., 1., 1.])
o3_a: [1 1 1 1 1]
Tensor[shape={5}, stride={1}, Float32, CPU:0, 0x3a21c40]

th_a: tensor([100., 200.,   1.,   1.,   1.])
o3_a: [100 200 1 1 1]
Tensor[shape={5}, stride={1}, Float32, CPU:0, 0x3a21c40]


In [22]:
# To PyTorch
o3_a = o3c.Tensor([1, 1, 1, 1, 1], device=o3c.Device("CPU:0"))
th_a = torch.utils.dlpack.from_dlpack(o3_a.to_dlpack())
o3_a = o3c.Tensor.from_dlpack(torch.utils.dlpack.to_dlpack(th_a))
print(f"th_a: {th_a}")
print(f"o3_a: {o3_a}")
print("")

# Changes to PyTorch array reflects on open3d Tensor and vice versa
th_a[0] = 100
o3_a[1] = 200
print(f"th_a: {th_a}")
print(f"o3_a: {o3_a}")

th_a: tensor([1, 1, 1, 1, 1])
o3_a: [1 1 1 1 1]
Tensor[shape={5}, stride={1}, Int64, CPU:0, 0x8815fc0]

th_a: tensor([100, 200,   1,   1,   1])
o3_a: [100 200 1 1 1]
Tensor[shape={5}, stride={1}, Int64, CPU:0, 0x8815fc0]


Binary element-wise operation

In [23]:
a = o3c.Tensor([1, 1, 1], dtype=o3c.Dtype.Float32)
b = o3c.Tensor([2, 2, 2], dtype=o3c.Dtype.Float32)
print("a + b = {}".format(a + b))
print("a - b = {}".format(a - b))
print("a * b = {}".format(a * b))
print("a / b = {}".format(a / b))

a + b = [3 3 3]
Tensor[shape={3}, stride={1}, Float32, CPU:0, 0x812e5f0]
a - b = [-1 -1 -1]
Tensor[shape={3}, stride={1}, Float32, CPU:0, 0x3a21c40]
a * b = [2 2 2]
Tensor[shape={3}, stride={1}, Float32, CPU:0, 0x8811740]
a / b = [0.5 0.5 0.5]
Tensor[shape={3}, stride={1}, Float32, CPU:0, 0x889a850]


In [24]:
# Type casting: When two operands with different data types are used in an operation, 
# Python or the underlying library automatically converts one of them to match the other's type, in a way that avoids data loss

# Broadcasting: A technique used in tensor operations where tensors of different shapes can be combined without explicitly reshaping them. 
# The smaller tensor is "broadcast" to match the shape of the larger tensor.

# Automatic type casting is done in a way to avoid data loss.
# Automatic broadcasting.
a = o3c.Tensor.ones((2, 3), dtype=o3c.Dtype.Float32)
b = o3c.Tensor.ones((3,), dtype=o3c.Dtype.Float32) # 1D tensor
print("a + b = \n{}\n".format(a + b)) # The tensor b is broadcast to match the shape of a. 
# This means b is essentially repeated (row-wise) to match the 2 rows of a.

# Automatic type casting.
a = a[0]
print("a + 1 = {}".format(a + 1))  # Float + Int -> Float.
print("a + True = {}".format(a + True))  # Float + Bool -> Float.

# Inplace. Meaning a tensor is modified without creating a new tensor
a -= True # True is again cast to 1.0, and the operation a -= True subtracts 1.0 from each element of a.
print("a = {}".format(a))

a + b = 
[[2 2 2],
 [2 2 2]]
Tensor[shape={2, 3}, stride={3, 1}, Float32, CPU:0, 0x8811b80]

a + 1 = [2 2 2]
Tensor[shape={3}, stride={1}, Float32, CPU:0, 0x896bcd0]
a + True = [2 2 2]
Tensor[shape={3}, stride={1}, Float32, CPU:0, 0x60fd030]
a = [0 0 0]
Tensor[shape={3}, stride={1}, Float32, CPU:0, 0x46a57e0]


Unary element-wise operation

In [25]:
a = o3c.Tensor([4, 9, 16], dtype=o3c.Dtype.Float32)
print("a = {}\n".format(a))
print("a.sqrt = {}\n".format(a.sqrt()))
print("a.sin = {}\n".format(a.sin()))
print("a.cos = {}\n".format(a.cos()))

# Inplace operation, can be noted from the memory location
a.sqrt_()
print(a)

a = [4 9 16]
Tensor[shape={3}, stride={1}, Float32, CPU:0, 0x8835600]

a.sqrt = [2 3 4]
Tensor[shape={3}, stride={1}, Float32, CPU:0, 0x87f80a0]

a.sin = [-0.75680244 0.41211867 -0.2879029]
Tensor[shape={3}, stride={1}, Float32, CPU:0, 0x87fd400]

a.cos = [-0.65364367 -0.9111302 -0.9576596]
Tensor[shape={3}, stride={1}, Float32, CPU:0, 0x889a850]

[2 3 4]
Tensor[shape={3}, stride={1}, Float32, CPU:0, 0x8835600]


Reduction

In [26]:
vals = np.array(range(24)).reshape((2, 3, 4))
a = o3c.Tensor(vals)
print("a.sum = {}\n".format(a.sum()))
print("a.min = {}\n".format(a.min()))
print("a.ArgMax = {}\n".format(a.argmax()))

a.sum = 276
Tensor[shape={}, stride={}, Int64, CPU:0, 0x5ee73c0]

a.min = 0
Tensor[shape={}, stride={}, Int64, CPU:0, 0x8811740]

a.ArgMax = 23
Tensor[shape={}, stride={}, Int64, CPU:0, 0x896bcd0]



In [27]:
# With specified dimension.
vals = np.array(range(24)).reshape((2, 3, 4))
a = o3c.Tensor(vals)

print("Along dim=0\n{}".format(a.sum(dim=(0)))) # sums across the first dimension (dim=0), which corresponds to summing across the two "blocks" of the tensor
print("Along dim=(0, 2)\n{}\n".format(a.sum(dim=(0, 2)))) # sums across both the first dimension (dim=0) which is block and the last dimension (dim=2) which is column 

# Retention of reduced dimension.
print("Shape without retention : {}".format(a.sum(dim=(0, 2)).shape))
print("Shape with retention : {}".format(a.sum(dim=(0, 2), keepdim=True).shape)) # useful when you need to maintain compatibility with the original tensor shape.

Along dim=0
[[12 14 16 18],
 [20 22 24 26],
 [28 30 32 34]]
Tensor[shape={3, 4}, stride={4, 1}, Int64, CPU:0, 0x8811e20]
Along dim=(0, 2)
[60 92 124]
Tensor[shape={3}, stride={1}, Int64, CPU:0, 0x5fdf3c0]

Shape without retention : SizeVector[3]
Shape with retention : SizeVector[1, 3, 1]


Slicing, indexing, getitem, and setitem

In [28]:
vals = np.array(range(24)).reshape((2, 3, 4))
a = o3c.Tensor(vals)
print("a = \n{}\n".format(a))

# Indexing __getitem__.
print("a[1, 2] = {}\n".format(a[1, 2]))

# Slicing __getitem__.
print("a[1:] = \n{}\n".format(a[1:]))

# slice object.
print("a[:, 0:3:2, :] = \n{}\n".format(a[:, 0:3:2, :]))

# Combined __getitem__
print("a[:-1, 0:3:2, 2] = \n{}\n".format(a[:-1, 0:3:2, 2]))

a = 
[[[0 1 2 3],
  [4 5 6 7],
  [8 9 10 11]],
 [[12 13 14 15],
  [16 17 18 19],
  [20 21 22 23]]]
Tensor[shape={2, 3, 4}, stride={12, 4, 1}, Int64, CPU:0, 0x89737f0]

a[1, 2] = [20 21 22 23]
Tensor[shape={4}, stride={1}, Int64, CPU:0, 0x8973890]

a[1:] = 
[[[12 13 14 15],
  [16 17 18 19],
  [20 21 22 23]]]
Tensor[shape={1, 3, 4}, stride={12, 4, 1}, Int64, CPU:0, 0x8973850]

a[:, 0:3:2, :] = 
[[[0 1 2 3],
  [8 9 10 11]],
 [[12 13 14 15],
  [20 21 22 23]]]
Tensor[shape={2, 2, 4}, stride={8, 4, 1}, Int64, CPU:0, 0x896c7a0]
Tensor[shape={2, 2, 4}, stride={12, 8, 1}, Int64, CPU:0, 0x89737f0]

a[:-1, 0:3:2, 2] = 
[[2 10]]
Tensor[shape={1, 2}, stride={2, 1}, Int64, CPU:0, 0x5fdf3c0]
Tensor[shape={1, 2}, stride={12, 8}, Int64, CPU:0, 0x8973800]



In [29]:
vals = np.array(range(24)).reshape((2, 3, 4))
a = o3c.Tensor(vals)

# Changes get reflected.
b = a[:-1, 0:3:2, 2]
b[0] += 100
print("b = {}\n".format(b))
print("a = \n{}".format(a))

b = [[102 110]]
Tensor[shape={1, 2}, stride={2, 1}, Int64, CPU:0, 0x3a21c40]
Tensor[shape={1, 2}, stride={12, 8}, Int64, CPU:0, 0x896b0f0]

a = 
[[[0 1 102 3],
  [4 5 6 7],
  [8 9 110 11]],
 [[12 13 14 15],
  [16 17 18 19],
  [20 21 22 23]]]
Tensor[shape={2, 3, 4}, stride={12, 4, 1}, Int64, CPU:0, 0x896b0e0]


In [30]:
vals = np.array(range(24)).reshape((2, 3, 4))
a = o3c.Tensor(vals)

# Example __setitem__
a[:, :, 2] += 100
print(a)

[[[0 1 102 3],
  [4 5 106 7],
  [8 9 110 11]],
 [[12 13 114 15],
  [16 17 118 19],
  [20 21 122 23]]]
Tensor[shape={2, 3, 4}, stride={12, 4, 1}, Int64, CPU:0, 0x89737f0]


Checking Broadcast Compatibility using Numpy

Let's assume two arrays A and B with shapes (m, n) and (p, q):

If n == q and m == p, they are directly compatible.

If n == q and m == 1, the first array (with shape (1, n)) will be broadcast across the rows.

If n == q and p == 1, the second array (with shape (p, q)) will be broadcast across the rows.

In [32]:
import numpy as np

# Define two arrays
A = np.random.rand(4, 2)
B = np.random.rand(3, 4, 5)

# Check broadcasting compatibility
try:
    np.broadcast(A, B)  # If no error, they are broadcast-compatible
    print("The arrays are broadcast-compatible.")
except ValueError:
    print("The arrays are not broadcast-compatible.")


The arrays are not broadcast-compatible.


Advanced indexing

In [33]:
# Advanced indexing always returns a copy of the data (contrast with basic slicing that returns a view). 
# Integer array indexing Integer array indexing allows selection of arbitrary items in the tensor based on their dimensional index.
# Indexes passed should be broadcast compatible.
vals = np.array(range(24)).reshape((2, 3, 4))
a = o3c.Tensor(vals)

# Along each dimension, a specific element is selected.
print("a[[0, 1], [1, 2], [1, 0]] = {}\n".format(a[[0, 1], [1, 2], [1, 0]]))

# Changes not reflected as it is a copy.
b = a[[0, 0], [0, 1], [1, 1]]
b[0] += 100
print("b = {}\n".format(b))
print("a[[0, 0], [0, 1], [1, 1]] = {}".format(a[[0, 0], [0, 1], [1, 1]]))

a[[0, 1], [1, 2], [1, 0]] = [5 20]
Tensor[shape={2}, stride={1}, Int64, CPU:0, 0x880df70]

b = [101 5]
Tensor[shape={2}, stride={1}, Int64, CPU:0, 0x8815b40]

a[[0, 0], [0, 1], [1, 1]] = [1 5]
Tensor[shape={2}, stride={1}, Int64, CPU:0, 0x8966f90]


Combining Basic and Advanced Indexing

In [34]:
vals = np.array(range(24)).reshape((2, 3, 4))
a = o3c.Tensor(vals)

print("a[1, 0:2, [1, 2]] = \n{}\n".format(a[1, 0:2, [1, 2]]))

# Subtle difference in selection and advanced indexing.
print("a[(0, 1)] = {}\n".format(a[(0, 1)]))
print("a[[0, 1] = \n{}\n".format(a[[0, 1]]))

a = o3c.Tensor(np.array(range(120)).reshape((2, 3, 4, 5)))

# Interleaving slice and advanced indexing.
print("a[1, [[1, 2], [2, 1]], 0:4:2, [3, 4]] = \n{}\n".format(
    a[1, [[1, 2], [2, 1]], 0:4:2, [3, 4]]))

a[1, 0:2, [1, 2]] = 
[[13 17],
 [14 18]]
Tensor[shape={2, 2}, stride={2, 1}, Int64, CPU:0, 0x3a49390]

a[(0, 1)] = [4 5 6 7]
Tensor[shape={4}, stride={1}, Int64, CPU:0, 0x8973810]

a[[0, 1] = 
[[[0 1 2 3],
  [4 5 6 7],
  [8 9 10 11]],
 [[12 13 14 15],
  [16 17 18 19],
  [20 21 22 23]]]
Tensor[shape={2, 3, 4}, stride={12, 4, 1}, Int64, CPU:0, 0x39e2bc0]

a[1, [[1, 2], [2, 1]], 0:4:2, [3, 4]] = 
[[[83 93],
  [104 114]],
 [[103 113],
  [84 94]]]
Tensor[shape={2, 2, 2}, stride={4, 2, 1}, Int64, CPU:0, 0x8967a80]



Boolean Array Indexing

In [35]:
a = o3c.Tensor(np.array([1, -1, -2, 3]))
print("a = {}\n".format(a))

# Add constant to all negative numbers.
a[a < 0] += 20
print("a = {}\n".format(a))

a = [1 -1 -2 3]
Tensor[shape={4}, stride={1}, Int64, CPU:0, 0x89717d0]

a = [1 19 18 3]
Tensor[shape={4}, stride={1}, Int64, CPU:0, 0x89717d0]



Logical Operations

In [36]:
a = o3c.Tensor(np.array([True, False, True, False]))
b = o3c.Tensor(np.array([True, True, False, False]))

print("a AND b = {}".format(a.logical_and(b)))
print("a OR b = {}".format(a.logical_or(b)))
print("a XOR b = {}".format(a.logical_xor(b)))
print("NOT a = {}\n".format(a.logical_not()))

# Only works for boolean tensors.
print("a.any = {}".format(a.any())) # any - returns true if any element in the tensor is true
print("a.all = {}\n".format(a.all())) # all - returns true if all elements in the tensor are true

# If tensor is not boolean, 0 will be treated as False, while non-zero as true.
# The tensor will be filled with 0 or 1 casted to tensor's dtype.
c = o3c.Tensor(np.array([2.0, 0.0, 3.5, 0.0]))
d = o3c.Tensor(np.array([0.0, 3.0, 1.5, 0.0]))
print("c AND d = {}".format(c.logical_and(d)))

a AND b = [True False False False]
Tensor[shape={4}, stride={1}, Bool, CPU:0, 0x3880890]
a OR b = [True True True False]
Tensor[shape={4}, stride={1}, Bool, CPU:0, 0x366c480]
a XOR b = [False True True False]
Tensor[shape={4}, stride={1}, Bool, CPU:0, 0x641cdc0]
NOT a = [False True False True]
Tensor[shape={4}, stride={1}, Bool, CPU:0, 0x3966020]

a.any = True
Tensor[shape={}, stride={}, Bool, CPU:0, 0x5fdf3c0]
a.all = False
Tensor[shape={}, stride={}, Bool, CPU:0, 0x60fd030]

c AND d = [False False True False]
Tensor[shape={4}, stride={1}, Bool, CPU:0, 0x8815b40]


In [37]:
a = o3c.Tensor(np.array([1, 2, 3, 4]), dtype=o3c.Dtype.Float64)
b = o3c.Tensor(np.array([1, 1.99999, 3, 4]))

# Throws exception if the device/dtype is not same.
# Returns false if the shape is not same.
print("allclose : {}".format(a.allclose(b))) # allclose - returns true if two tensors are element wise equal within a tolerance.

# Throws exception if the device/dtype/shape is not same.
print("isclose : {}".format(a.isclose(b))) # isclose - returns tensor with element wise allclose operation.

# Returns false if the device/dtype/shape/ is not same.
print("issame : {}".format(a.issame(b))) # issame - returns true if and only if two tensors are same(even same underlying memory).

allclose : True
isclose : [True True True True]
Tensor[shape={4}, stride={1}, Bool, CPU:0, 0x3966020]
issame : False


Comparison Operations

In [38]:
a = o3c.Tensor([0, 1, -1])
b = o3c.Tensor([0, 0, 0])

print("a > b = {}".format(a > b))
print("a >= b = {}".format(a >= b))
print("a < b = {}".format(a < b))
print("a <= b = {}".format(a <= b))
print("a == b = {}".format(a == b))
print("a != b = {}".format(a != b))

# Throws exception if device/dtype is not shape.
# If shape is not same, then tensors should be broadcast compatible.
print("a > b = {}".format(a > b[0]))

a > b = [False True False]
Tensor[shape={3}, stride={1}, Bool, CPU:0, 0x880df70]
a >= b = [True True False]
Tensor[shape={3}, stride={1}, Bool, CPU:0, 0x60e6550]
a < b = [False False True]
Tensor[shape={3}, stride={1}, Bool, CPU:0, 0x4cc5340]
a <= b = [True False True]
Tensor[shape={3}, stride={1}, Bool, CPU:0, 0x812e5f0]
a == b = [True False False]
Tensor[shape={3}, stride={1}, Bool, CPU:0, 0x4cc5340]
a != b = [False True True]
Tensor[shape={3}, stride={1}, Bool, CPU:0, 0x812e5f0]
a > b = [False True False]
Tensor[shape={3}, stride={1}, Bool, CPU:0, 0x87f8980]


Nonzero operations

In [39]:
a = o3c.Tensor([[3, 0, 0], [0, 4, 0], [5, 6, 0]])

print("a = \n{}\n".format(a)) #
print("a.nonzero() = \n{}\n".format(a.nonzero()))
print("a.nonzero(as_tuple = 1) = \n{}".format(a.nonzero(as_tuple=1)))

a = 
[[3 0 0],
 [0 4 0],
 [5 6 0]]
Tensor[shape={3, 3}, stride={3, 1}, Int64, CPU:0, 0x351dc10]

a.nonzero() = 
[[0 1 2 2]
Tensor[shape={4}, stride={1}, Int64, CPU:0, 0x896b820], [0 1 0 1]
Tensor[shape={4}, stride={1}, Int64, CPU:0, 0x3a49390]]

a.nonzero(as_tuple = 1) = 
[[0 1 2 2],
 [0 1 0 1]]
Tensor[shape={2, 4}, stride={4, 1}, Int64, CPU:0, 0x896bca0]
