In [1]:
import torch

In [2]:
import random
seed = 42
random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x23fce45c330>

## Torch Matrix Multiplications

    - torch.mm
    - torch.mv
    - torch.bmm
    - torch.mul
    - torch.matmul
    - A*B (Here A and B are two tensors)

## 1. torch.mm

In [3]:
# torch.manual_seed(seed)

# mat1 = torch.randint(-10, 10, (2,3))
# print(f"mat1 : {mat1}")
# mat2 = torch.randint(-10, 10, (3,3))
# print(f"mat2 : {mat2}")


mat1 = torch.tensor([
    [-8, -3,  6],
    [ 4, -4,  5]
    ])
mat2 = torch.tensor([
    [-10,  -6,   0],
    [  3,   8,   4],
    [  0,   4,   1]
    ])

matrix multiplication using torch.mm

mat1 (2,3)  * mat2 (3,3) = mat3 (2,3)

In [4]:
mat3 = torch.mm(mat1, mat2)

In [5]:
print(mat3)

tensor([[ 71,  48,  -6],
        [-52, -36, -11]])


## 2. torch.mv

In [6]:
mat = torch.tensor([
    [-8, -3,  6],
    [ 4, -4,  5]
    ])
vec = torch.tensor([-10,  -6,   0])
vec2d = torch.tensor([
    [-10],  
    [-6],   
    [0]])


In [7]:
print("mat.shape : ", mat.shape)
print("vec.shape : ", vec.shape)
print("vec2d.shape : ", vec2d.shape)

mat.shape :  torch.Size([2, 3])
vec.shape :  torch.Size([3])
vec2d.shape :  torch.Size([3, 1])


matrix vector multiplication using torch.mv

mat (2,3) * vec (3,) = result (2,)

In [8]:
result = torch.mv(mat,vec)

In [9]:
print(result)

tensor([ 98, -16])


using torch.mm with two 2d matrix

mat (2,3) * vec2d (3,1) = result (2,1)

In [10]:
result = torch.mm(mat,vec2d)

In [11]:
print(result)

tensor([[ 98],
        [-16]])


## 3. torch.bmm

In [12]:
mat1 = torch.randn(10, 3, 4)
mat2 = torch.randn(10, 4, 5)

In [13]:
# print(mat1)

In [14]:
# print(mat2)

In [15]:
res = torch.bmm(mat1, mat2)
res.size()

torch.Size([10, 3, 5])

## 4. torch.mul

- broadcast
- element wise multiplication 

In [16]:
# example 1
mat1 = torch.tensor([
    [-8, -3,  6],
    [ 4, -4,  5]
    ])
mask = torch.tensor([
    [-10,  -6,   0],
    [  3,   8,   4]
    ])

res = torch.mul(mat1,mask)
print(res)

tensor([[ 80,  18,   0],
        [ 12, -32,  20]])


In [17]:
# example 2
mat1 = torch.tensor([
    [-8, -3,  6],
    [ 4, -4,  5],
    [ 3,  8,  4]
    ])
mask = torch.tensor([
    [-10,  -6,   0]
    ])

res = torch.mul(mat1,mask)
print(res)

tensor([[ 80,  18,   0],
        [-40,  24,   0],
        [-30, -48,   0]])


## 5. A*B

- broadcast
- element wise

In [18]:
# example 1
mat1 = torch.tensor([
    [-8, -3,  6],
    [ 4, -4,  5]
    ])
mask = torch.tensor([
    [-10,  -6,   0],
    [  3,   8,   4]
    ])

res = torch.mul(mat1,mask)
print(res)

tensor([[ 80,  18,   0],
        [ 12, -32,  20]])


In [19]:
# example 2
mat1 = torch.tensor([
    [-8, -3,  6],
    [ 4, -4,  5],
    [ 3,  8,  4]
    ])
mask = torch.tensor([
    [-10,  -6,   0]
    ])

res = torch.mul(mat1,mask)
print(res)

tensor([[ 80,  18,   0],
        [-40,  24,   0],
        [-30, -48,   0]])


## 6. torch.matmul

Matrix product of two tensors.

The behavior depends on the dimensionality of the tensors as follows:

If both tensors are 1-dimensional, the dot product (scalar) is returned.

If both arguments are 2-dimensional, the matrix-matrix product is returned.

If the first argument is 1-dimensional and the second argument is 2-dimensional, a 1 is prepended to its dimension for the purpose of the matrix multiply. After the matrix multiply, the prepended dimension is removed.

If the first argument is 2-dimensional and the second argument is 1-dimensional, the matrix-vector product is returned.

If both arguments are at least 1-dimensional and at least one argument is N-dimensional (where N > 2), then a batched matrix multiply is returned. If the first argument is 1-dimensional, a 1 is prepended to its dimension for the purpose of the batched matrix multiply and removed after. If the second argument is 1-dimensional, a 1 is appended to its dimension for the purpose of the batched matrix multiple and removed after. The non-matrix (i.e. batch) dimensions are broadcasted (and thus must be broadcastable). For example, if input is a (j * 1 * n * n)(j×1×n×n) tensor and other is a (k * n * n)(k×n×n) tensor, out will be a (j * k * n * n)(j×k×n×n) tensor.

Note that the broadcasting logic only looks at the batch dimensions when determining if the inputs are broadcastable, and not the matrix dimensions. For example, if input is a (j * 1 * n * m)(j×1×n×m) tensor and other is a (k * m * p)(k×m×p) tensor, these inputs are valid for broadcasting even though the final two dimensions (i.e. the matrix dimensions) are different. out will be a (j * k * n * p)(j×k×n×p) tensor.

In [20]:
# vector x vector
tensor1 = torch.randn(3)
tensor2 = torch.randn(3)
res = torch.matmul(tensor1, tensor2)
print(res.shape)
print(res)

torch.Size([])
tensor(0.1348)


In [21]:
# matrix x vector
tensor1 = torch.randn(3, 4)
tensor2 = torch.randn(4)
res = torch.matmul(tensor1, tensor2)
print(res.shape)
print(res)

torch.Size([3])
tensor([-2.2125,  0.5829, -0.7331])


In [22]:
# batched matrix x broadcasted vector
tensor1 = torch.randn(4, 3, 4)
print("tensor1: ",tensor1)
tensor2 = torch.randn(4)
print("tensor2: ",tensor2)
res = torch.matmul(tensor1, tensor2)
print(res.shape)
print(res)

tensor1:  tensor([[[-0.3182,  1.2154,  1.4200, -0.0547],
         [ 0.0247, -1.0641, -0.7602, -0.4075],
         [ 0.3105,  0.3715,  0.2697,  0.7900]],

        [[ 0.9446, -1.5824,  0.9871,  1.1457],
         [-2.7107, -0.6130,  1.3917,  0.2229],
         [ 0.6839, -1.3246, -0.5161,  0.6002]],

        [[ 0.4145,  1.3664,  1.3947, -0.7396],
         [-0.4833, -0.7403,  0.3143,  0.1416],
         [-0.3016, -1.4033, -1.3271, -0.9948]],

        [[-0.4940,  1.1366, -0.4618,  1.4200],
         [ 0.8211, -0.0675,  0.9491, -0.3983],
         [ 0.6899, -1.3129,  0.0378, -1.1702]]])
tensor2:  tensor([ 0.2344, -0.3407,  1.0171,  1.1458])
torch.Size([4, 3])
tensor([[ 0.8930, -0.8718,  1.1257],
        [ 3.0773,  1.2443,  0.7744],
        [ 0.2029,  0.6208, -2.0822],
        [ 0.6542,  0.7244, -0.6933]])


In [23]:
# batched matrix x batched matrix
tensor1 = torch.randn(4, 3, 2)
tensor2 = torch.randn(4, 2, 5)
res = torch.matmul(tensor1, tensor2)
print(res.shape)
print(res)

torch.Size([4, 3, 5])
tensor([[[ 0.7550, -0.6106,  0.3191, -1.2923, -1.6739],
         [ 0.3687,  0.5774,  0.6953,  0.2538,  0.2253],
         [ 0.3361,  0.0796,  0.3585, -0.2201, -0.3266]],

        [[-0.5576, -0.8647, -0.9830, -2.7937,  0.7684],
         [ 0.1062,  0.0739,  0.0764, -0.2849,  0.0678],
         [-0.9999, -1.1283, -1.2473, -1.2096,  0.3820]],

        [[-4.4653, -0.9278, -1.8166,  0.4703, -0.4155],
         [ 2.1580,  0.8101,  0.8357, -0.2124,  0.5654],
         [-2.3756, -0.6346, -0.9500,  0.2444, -0.3632]],

        [[-0.5491, -1.5430, -1.1647, -0.9483, -2.0773],
         [-0.0210, -0.7448, -0.4289, -0.2335, -0.4470],
         [ 0.6148, -0.8586, -0.1454,  0.3180,  0.9396]]])


In [24]:
# batched matrix x broadcasted matrix
tensor1 = torch.randn(4, 3, 2)
print("tensor1: ", tensor1)
tensor2 = torch.randn(2, 5)
print("tensor2: ", tensor2)
res = torch.matmul(tensor1, tensor2)
print(res.shape)
print(res)

tensor1:  tensor([[[-0.6228, -0.7892],
         [-0.1678,  1.6433],
         [ 0.5163,  1.6060]],

        [[-0.9815,  0.5361],
         [ 0.2337,  1.4083],
         [-2.0686,  0.8686]],

        [[-0.8032, -1.1209],
         [ 0.1956, -0.7815],
         [-0.6119,  0.6026]],

        [[-0.8362, -0.3333],
         [-0.4801, -1.2872],
         [ 0.7389,  0.0339]]])
tensor2:  tensor([[ 0.2387,  1.2485,  0.0510, -0.0177, -0.7572],
        [-1.6010,  0.3874, -0.5764,  0.2020,  1.0856]])
torch.Size([4, 3, 5])
tensor([[[ 1.1148, -1.0833,  0.4232, -0.1484, -0.3851],
         [-2.6710,  0.4270, -0.9558,  0.3349,  1.9110],
         [-2.4481,  1.2667, -0.8995,  0.3152,  1.3526]],

        [[-1.0925, -1.0177, -0.3591,  0.1256,  1.3251],
         [-2.1990,  0.8373, -0.7999,  0.2803,  1.3519],
         [-1.8843, -2.2462, -0.6061,  0.2120,  2.5093]],

        [[ 1.6029, -1.4370,  0.6052, -0.2122, -0.6086],
         [ 1.2979, -0.0585,  0.4605, -0.1613, -0.9965],
         [-1.1109, -0.5306, -0.3786,  0