<a href="https://colab.research.google.com/github/Tensor-Reloaded/Advanced-Topics-in-Neural-Networks-Template-2023/blob/main/Lab2/TensorOperations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorboard

# Tensor

## Tensor Creation

In [None]:
# From Python lists:
torch.tensor([1, 2, 3])

tensor([1, 2, 3])

In [None]:
# Random tensor with a given size
torch.rand((4, 8))

tensor([[0.7334, 0.5990, 0.5604, 0.0939, 0.1298, 0.3652, 0.7000, 0.2137],
        [0.6621, 0.9500, 0.1914, 0.8693, 0.6357, 0.3245, 0.7121, 0.6523],
        [0.2313, 0.7098, 0.7683, 0.3794, 0.7030, 0.0241, 0.7564, 0.2906],
        [0.7373, 0.9153, 0.2873, 0.3964, 0.2253, 0.2775, 0.1097, 0.0178]])

In [None]:
# Zero tensor
torch.zeros((3, 3, 3))

tensor([[[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]])

In [None]:
torch.arange(5, 15, 2)

tensor([ 5,  7,  9, 11, 13])

In [None]:
torch.full((2, 10), 5)

tensor([[5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
        [5, 5, 5, 5, 5, 5, 5, 5, 5, 5]])

In [None]:
torch.eye(4)

tensor([[1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.]])

## Operations with Tensors

In [None]:
t1 = torch.rand((100, 20, 30))
t2 = torch.rand((100, 20, 30))
print(t1.shape, t2.shape)

torch.Size([100, 20, 30]) torch.Size([100, 20, 30])


In [None]:
print((t1 * t2).shape)
torch.equal(t1 * t2, torch.mul(t1, t2))

torch.Size([100, 20, 30])


True

In [None]:
t2t = t2.transpose(1, 2)
print(t2t.shape)
print((t1 @ t2t).shape)

torch.Size([100, 30, 20])
torch.Size([100, 20, 20])


(True, True)

In [None]:
torch.equal(t1 @ t2t, torch.bmm(t1, t2t))

True

In [None]:
torch.equal(t1 @ t2t, torch.matmul(t1, t2t))

True

## Casting tensors to device

In [None]:
# Tensors are by default on CPU
# Please ensure that you have access to a GPU first (in Google Colab, change Runtime type to T4 GPU).
x = torch.arange(5, 15, 2)
x_cuda = x.to('cuda')

In [None]:
x_cuda

tensor([ 5,  7,  9, 11, 13], device='cuda:0')

In [None]:
# Tensors should be on the same device
x_cuda + x

RuntimeError: ignored

In [None]:
torch.manual_seed(10)
a = torch.rand((10000, 200, 300))
b = torch.rand((10000, 200, 300))
print("a", a.shape)
print("b", b.shape)
print("b transposed", b.transpose(1, 2).shape)

a torch.Size([10000, 200, 300])
b torch.Size([10000, 200, 300])
b transposed torch.Size([10000, 300, 200])


In [None]:
%%timeit
c = (a * b) @ b.transpose(1, 2)
c.mean(axis=1)

5.63 s ± 788 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
a_cuda = a.to('cuda')
b_cuda = b.to('cuda')

In [None]:
%%timeit
# Now it is much faster
c_cuda = (a_cuda * b_cuda) @ b_cuda.transpose(1, 2)
c_cuda.mean(axis=1)

The slowest run took 4.75 times longer than the fastest. This could mean that an intermediate result is being cached.
76 µs ± 59.3 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Tensor reshaping

In [None]:
a = torch.rand((100, 20, 25))
print("a", a.shape, sep="\t\t\t")
print("a.flatten()", a.flatten().shape, sep="\t\t")
print("a.reshape(-1)", a.reshape(-1).shape, sep="\t\t")
print("a.reshape(100, -1)", a.reshape(100, -1).shape, sep="\t")
print("a.reshape(100, 25, 20)", a.reshape(100, 25, 20).shape, sep="\t")
print("a.view(-1, 5, 5)", a.view(-1, 5, 5).shape, sep="\t")
print("a.unsqueeze(0)", a.unsqueeze(0).shape, sep="\t\t")
print("a.unsqueeze(-1)", a.unsqueeze(-1).shape, sep="\t\t")

b = torch.tensor([[[1, 2]]])
print("b", b.shape, sep="\t\t\t")
print("b.squeeze()", b.squeeze().shape, sep="\t\t")

a			torch.Size([100, 20, 25])
a.flatten()		torch.Size([50000])
a.reshape(-1)		torch.Size([50000])
a.reshape(100, -1)	torch.Size([100, 500])
a.reshape(100, 25, 20)	torch.Size([100, 25, 20])
a.view(-1, 5, 5)	torch.Size([2000, 5, 5])
a.unsqueeze(0)		torch.Size([1, 100, 20, 25])
a.unsqueeze(-1)		torch.Size([100, 20, 25, 1])
b			torch.Size([1, 1, 2])
b.squeeze()		torch.Size([2])


## Tensor concatenation and stacking

In [None]:
x = torch.ones((2, 4))
y = torch.zeros((2, 4))

In [None]:
torch.cat((x, y))

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [None]:
aux = torch.stack((x, y))
aux

tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.]]])

In [None]:
aux.shape

torch.Size([2, 2, 4])

## More Tensor operations

In [None]:
a = torch.rand((5, 20))
a.mean()

tensor(0.4964)

In [None]:
a.mean(axis=1)

tensor([0.4728, 0.4986, 0.4916, 0.5373, 0.4817])

In [None]:
a.mean(axis=0)

tensor([0.5597, 0.3429, 0.4982, 0.3765, 0.5175, 0.5259, 0.5448, 0.5924, 0.3048,
        0.3993, 0.5347, 0.3125, 0.5278, 0.5737, 0.6224, 0.7941, 0.4389, 0.6098,
        0.3985, 0.4537])

In [None]:
a.mean(axis=0).clip(min=0.45, max=0.55)

tensor([0.5500, 0.4500, 0.4982, 0.4500, 0.5175, 0.5259, 0.5448, 0.5500, 0.4500,
        0.4500, 0.5347, 0.4500, 0.5278, 0.5500, 0.5500, 0.5500, 0.4500, 0.5500,
        0.4500, 0.4537])

In [None]:
a.norm()

tensor(5.6000)

In [None]:
a[0]

tensor([0.8746, 0.3438, 0.3898, 0.4394, 0.7267, 0.4703, 0.8649, 0.4763, 0.3747,
        0.8247, 0.5802, 0.1777, 0.1182, 0.4460, 0.2637, 0.7817, 0.2338, 0.5566,
        0.4036, 0.1098])

In [None]:
a[0, 2]

tensor(0.3898)

In [None]:
a[[0, 2]]

tensor([[0.8746, 0.3438, 0.3898, 0.4394, 0.7267, 0.4703, 0.8649, 0.4763, 0.3747,
         0.8247, 0.5802, 0.1777, 0.1182, 0.4460, 0.2637, 0.7817, 0.2338, 0.5566,
         0.4036, 0.1098],
        [0.4715, 0.8232, 0.5346, 0.2440, 0.2562, 0.7093, 0.4590, 0.5074, 0.1488,
         0.5472, 0.5946, 0.0998, 0.3136, 0.7988, 0.7241, 0.3884, 0.4512, 0.7282,
         0.8089, 0.2229]])

In [None]:
a[[True, False, True, False, False]]

tensor([[0.8746, 0.3438, 0.3898, 0.4394, 0.7267, 0.4703, 0.8649, 0.4763, 0.3747,
         0.8247, 0.5802, 0.1777, 0.1182, 0.4460, 0.2637, 0.7817, 0.2338, 0.5566,
         0.4036, 0.1098],
        [0.4715, 0.8232, 0.5346, 0.2440, 0.2562, 0.7093, 0.4590, 0.5074, 0.1488,
         0.5472, 0.5946, 0.0998, 0.3136, 0.7988, 0.7241, 0.3884, 0.4512, 0.7282,
         0.8089, 0.2229]])

In [None]:
mask = a > 0.5
mask

tensor([[ True, False, False, False,  True, False,  True, False, False,  True,
          True, False, False, False, False,  True, False,  True, False, False],
        [ True, False, False,  True, False, False,  True,  True, False, False,
          True, False,  True,  True,  True,  True, False, False, False,  True],
        [False,  True,  True, False, False,  True, False,  True, False,  True,
          True, False, False,  True,  True, False, False,  True,  True, False],
        [ True, False,  True,  True,  True,  True,  True,  True, False, False,
         False,  True,  True, False,  True,  True,  True,  True, False,  True],
        [False, False,  True, False,  True, False, False,  True,  True, False,
          True, False,  True,  True,  True,  True, False,  True, False, False]])

In [None]:
mask[0]

tensor([ True, False, False, False,  True, False,  True, False, False,  True,
         True, False, False, False, False,  True, False,  True, False, False])

In [None]:
indices = mask[0].nonzero()
indices

tensor([[ 0],
        [ 4],
        [ 6],
        [ 9],
        [10],
        [15],
        [17]])

In [None]:
mask[0][indices]

tensor([[True],
        [True],
        [True],
        [True],
        [True],
        [True],
        [True]])

In [None]:
mask.all(axis=0)

tensor([False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False, False])

In [None]:
mask.any(axis=1)

tensor([True, True, True, True, True])

In [None]:
b = a[mask]
b

tensor([0.8746, 0.7267, 0.8649, 0.8247, 0.5802, 0.7817, 0.5566, 0.5847, 0.5999,
        0.7767, 0.5080, 0.6828, 0.6198, 0.9024, 0.9161, 0.9114, 0.8034, 0.8232,
        0.5346, 0.7093, 0.5074, 0.5472, 0.5946, 0.7988, 0.7241, 0.7282, 0.8089,
        0.5752, 0.5510, 0.5811, 0.6735, 0.5853, 0.5603, 0.8697, 0.6276, 0.5944,
        0.5223, 0.9904, 0.8626, 0.9022, 0.7115, 0.7493, 0.8991, 0.6004, 0.5862,
        0.6724, 0.9932, 0.6119, 0.6859, 0.8985, 0.6501])

In [None]:
b[b < 0.7] = 0
b

tensor([0.8746, 0.7267, 0.8649, 0.8247, 0.0000, 0.7817, 0.0000, 0.0000, 0.0000,
        0.7767, 0.0000, 0.0000, 0.0000, 0.9024, 0.9161, 0.9114, 0.8034, 0.8232,
        0.0000, 0.7093, 0.0000, 0.0000, 0.0000, 0.7988, 0.7241, 0.7282, 0.8089,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.8697, 0.0000, 0.0000,
        0.0000, 0.9904, 0.8626, 0.9022, 0.7115, 0.7493, 0.8991, 0.0000, 0.0000,
        0.0000, 0.9932, 0.0000, 0.0000, 0.8985, 0.0000])

# Excercises

1. Write a function that receives a batch of gradients (Tensor of shape [B, N, M]) and returns all gradients whose norm are within 1.5 standard deviations from mean. If no gradient has the norm within 1.5 standard deviations from the mean, return None.


In [7]:
from typing import Union
from torch import Tensor
import torch


def get_normal_tensors(x: Tensor) -> Union[Tensor, None]:
    pass


get_normal_tensors(torch.rand((100, 10, 256)))

References:
* [https://pytorch.org/docs/stable/torch.html#creation-ops](https://pytorch.org/docs/stable/torch.html#creation-ops)
* [https://pytorch.org/docs/stable/tensors.html#data-types](https://pytorch.org/docs/stable/tensors.html#data-types)
* [https://pytorch.org/docs/stable/torch.html#math-operations](https://pytorch.org/docs/stable/torch.html#math-operations)