# Matrix Aggregation 

Aggregation refers to extracting meaningful informations from matrices or tensors 
(such as summ, mean maximum values)

Essential to understand data pattern and reduce dimension 


In [1]:
import torch

In [2]:
matrix = torch.tensor([[1., 2., 3.],
                       [4., 5., 6.],
                       [7., 8., 9.],])

# Basic aggregation operations..

print("Matrix : ", matrix)

Matrix :  tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])


In [3]:
# Sum
print("Sum of elements : ", matrix.sum())

Sum of elements :  tensor(45.)


In [4]:
# Min 
print("Min of matrix : ", matrix.min())
# Max
print("Max of matrix : ", matrix.max())

# Mean 
print("Mean of matrix : ", matrix.mean())
# Median
print("Median of matrix : ", matrix.median())

Min of matrix :  tensor(1.)
Max of matrix :  tensor(9.)
Mean of matrix :  tensor(5.)
Median of matrix :  tensor(5.)


In [5]:
# Aggregation across specific dimensions and not on all the matrix data
# dim = 0 : operate across rows (loop on  columns)
# dim = 1 : operate across columns (loop on rows)

# Sum 
print("Sum of elements (on rows) : ", matrix.sum(dim=0))

# Min 
print("Min of matrix (on rows): ", matrix.min(dim=0))
# Max
print("Max of matrix (on rows): ", matrix.max(dim=0))


Sum of elements (on rows) :  tensor([12., 15., 18.])
Min of matrix (on rows):  torch.return_types.min(
values=tensor([1., 2., 3.]),
indices=tensor([0, 0, 0]))
Max of matrix (on rows):  torch.return_types.max(
values=tensor([7., 8., 9.]),
indices=tensor([2, 2, 2]))


In [6]:
# Sum 
print("Sum of elements (on cols) : ", matrix.sum(dim=1))

# Min 
print("Min of matrix (on cols): ", matrix.min(dim=1))
# Max
print("Max of matrix (on cols): ", matrix.max(dim=1))

Sum of elements (on cols) :  tensor([ 6., 15., 24.])
Min of matrix (on cols):  torch.return_types.min(
values=tensor([1., 4., 7.]),
indices=tensor([0, 0, 0]))
Max of matrix (on cols):  torch.return_types.max(
values=tensor([3., 6., 9.]),
indices=tensor([2, 2, 2]))


In [7]:
# Cumulative aggregation. 
# based on iteration over dataset, as a cumulative sum over the data from begin to a given index 

matrix.cumsum(dim=1) # cumulative sum based on columns (looping on rows )

tensor([[ 1.,  3.,  6.],
        [ 4.,  9., 15.],
        [ 7., 15., 24.]])

In [8]:
matrix.cumsum(dim=0) # cumulative sum based on rows (looping on columns )

tensor([[ 1.,  2.,  3.],
        [ 5.,  7.,  9.],
        [12., 15., 18.]])

In [9]:
# same way it exists cumulative product operation 
matrix.cumprod(dim=0) # cumulative product over rows (looping on columns )

tensor([[  1.,   2.,   3.],
        [  4.,  10.,  18.],
        [ 28.,  80., 162.]])

In [None]:
# Advance aggregation 

matrix[matrix > 5].sum() # sum only over values greater than 5 over global matrix 
# matrix > 5 cretae a mask used in matrix to access just some values 

tensor(30.)

In [14]:
# counting non zero elements 
non_zero = matrix.nonzero()
print(non_zero)
print(non_zero.shape)
# It return the index of all the values non zeros (all of them)

print(non_zero.size(0)) # get the length of non zero values 

tensor([[0, 0],
        [0, 1],
        [0, 2],
        [1, 0],
        [1, 1],
        [1, 2],
        [2, 0],
        [2, 1],
        [2, 2]])
torch.Size([9, 2])
9


In [15]:
matrix = torch.tensor([[1., 2., 0.],
                       [4., 5., 6.],
                       [7., 0., 9.],])
non_zero = matrix.nonzero()
print(non_zero.size(0)) # get the length of non zero values 

7


In [None]:
# It is possible to get a normalized aggregated values, it is possible to use min and max... 
# Normalized Matrix
matrix = torch.tensor([[1., 2., 3.],
                       [4., 5., 6.],
                       [7., 8., 9.],])

max_val = matrix.max()
min_val = matrix.min()

normalized_matrix = (matrix - min_val) / (max_val - min_val)
print(normalized_matrix) # Now everythin is normalized in the range 0-1

tensor([[0.0000, 0.1250, 0.2500],
        [0.3750, 0.5000, 0.6250],
        [0.7500, 0.8750, 1.0000]])
