# DEEP LEARNING
# Practice Lab -Tensors- PyTorch

In [None]:
%matplotlib inline


Tensors
--------------------------------------------

Tensors are a specialized data structure that are very similar to arrays
and matrices. In PyTorch, we use tensors to encode the inputs and
outputs of a model, as well as the model’s parameters.

Tensors are similar to NumPy’s ndarrays, except that tensors can run on
GPUs or other specialized hardware to accelerate computing. If you’re familiar with ndarrays, you’ll
be right at home with the Tensor API. If not, follow along in this quick
API walkthrough.




In [None]:
import torch # importing torch module
import numpy as np #importing numpy

Tensor Initialization
~~~~~~~~~~~~~~~~~~~~~

Tensors can be initialized in various ways. Take a look at the following examples:

**Directly from data**

Tensors can be created directly from data. The data type is automatically inferred.



In [None]:
data = [[1, 2],[3, 4]]
x_data = torch.tensor(data) #using tensor function from torch module to create tensor from data
print(x_data)

d = [[1,3],[5,4],[2,3]]
x_d = torch.tensor(d)
print(x_d)

tensor([[1, 2],
        [3, 4]])
tensor([[1, 3],
        [5, 4],
        [2, 3]])


**From a NumPy array**

Tensors can be created from NumPy arrays (and vice versa - see `bridge-to-np-label`).



In [None]:
np_array = np.array(data) # converting data to numpy array
x_np = torch.from_numpy(np_array) #creating a torch from numpy array

np_array = np.array(d)
d_np = torch.from_numpy(np_array)
print(d_np)

print(d_np.numpy()) # tensor to numpy array

tensor([[1, 3],
        [5, 4],
        [2, 3]])
[[1 3]
 [5 4]
 [2 3]]


**From another tensor:**

The new tensor retains the properties (shape, datatype) of the argument tensor, unless explicitly overridden.



In [None]:
x_ones = torch.ones_like(x_data) # retains the properties of x_data
# x_ones tensor have same shape and dtype as x_data with all elements as one
print(f"Ones Tensor: \n {x_ones} \n")

x_rand = torch.rand_like(x_data, dtype=torch.float) # overrides the datatype of x_data
# x_rand have same shape as x_data with random float elements
print(f"Random Tensor: \n {x_rand} \n")
print(x_rand.dtype)

Ones Tensor: 
 tensor([[1, 1],
        [1, 1]]) 

Random Tensor: 
 tensor([[0.8087, 0.7314],
        [0.2827, 0.7291]]) 

torch.float32


**With random or constant values:**

``shape`` is a tuple of tensor dimensions. In the functions below, it determines the dimensionality of the output tensor.



In [None]:
shape = (3,4,)
rand_tensor = torch.rand(shape)     # torch.rand() creates a shape (2,3) tensor with random values
ones_tensor = torch.ones(shape)     # torch.ones() creates a shape (2,3) tensor full of ones
zeros_tensor = torch.zeros(shape)   # torch.zeros() creates a shape (2,3) tensor full of zeros

print(f"Random Tensor: \n {rand_tensor} \n")
print(f"Ones Tensor: \n {ones_tensor} \n")
print(f"Zeros Tensor: \n {zeros_tensor}")

Random Tensor: 
 tensor([[0.3817, 0.3508, 0.1552, 0.2815],
        [0.9732, 0.8914, 0.6358, 0.6483],
        [0.9047, 0.9208, 0.9152, 0.1587]]) 

Ones Tensor: 
 tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]]) 

Zeros Tensor: 
 tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])


--------------




Tensor Attributes
~~~~~~~~~~~~~~~~~

Tensor attributes describe their shape, datatype, and the device on which they are stored.



In [None]:
tensor = torch.rand(3,4)
print(tensor)
print(f"Shape of tensor: {tensor.shape}")   #shape mean amount of elements exists in a tensor
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")

tensor([[0.4551, 0.7329, 0.7171, 0.8822],
        [0.2666, 0.0958, 0.9873, 0.8850],
        [0.2309, 0.6633, 0.2378, 0.0573]])
Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


--------------




Tensor Operations
~~~~~~~~~~~~~~~~~

Over 100 tensor operations, including transposing, indexing, slicing,
mathematical operations, linear algebra, random sampling, and more are
comprehensively described
`here <https://pytorch.org/docs/stable/torch.html>`__.

Each of them can be run on the GPU (at typically higher speeds than on a
CPU). If you’re using Colab, allocate a GPU by going to Edit > Notebook
Settings.




In [None]:
# By default, tensors are created on the CPU. We need to explicitly move tensors to the GPU using .to method (after checking for GPU availability).
# We move our tensor to the GPU if available
if torch.cuda.is_available():
  tensor = tensor.to('cuda')

Try out some of the operations from the list.
If you're familiar with the NumPy API, you'll find the Tensor API a breeze to use.




**Standard numpy-like indexing and slicing:**



In [None]:
tensor = torch.rand(4, 4)
tensor[:,2] = 0
print(tensor)
print(tensor[1,:3])
print(tensor[:3,1])
print(tensor[2:3,1])

# practice
t = torch.rand(3,3)
t[1,:] = 0 # All second row elements are zero
print(f"\n\n{t}")
print(t[2,2:3]) # third row third column element
print(t[:,1]) # second column elements

tensor([[0.7540, 0.6046, 0.0000, 0.4025],
        [0.3668, 0.7603, 0.0000, 0.2815],
        [0.9901, 0.9484, 0.0000, 0.5971],
        [0.2871, 0.6856, 0.0000, 0.6752]])
tensor([0.3668, 0.7603, 0.0000])
tensor([0.6046, 0.7603, 0.9484])
tensor([0.9484])


tensor([[0.8276, 0.6937, 0.3250],
        [0.0000, 0.0000, 0.0000],
        [0.8910, 0.0893, 0.2729]])
tensor([0.2729])
tensor([0.6937, 0.0000, 0.0893])


**Joining tensors** You can use ``torch.cat`` to concatenate a sequence of tensors along a given dimension.
See also `torch.stack <https://pytorch.org/docs/stable/generated/torch.stack.html>`__,
another tensor joining op that is subtly different from ``torch.cat``.



In [None]:
# concatenation using torch.cat
t1 = torch.cat([tensor, tensor, tensor], dim=1)
print(t1)
print(f"Shape of t1 tensor: {t1.shape}\n\n")

# concatenation using torch.stack
t2 = torch.stack([tensor, tensor, tensor], dim=1)
print(t2)
print(f"Shape of t2 tensor: {t2.shape}")

tensor([[0.2323, 0.9469, 0.0000, 0.6924, 0.2323, 0.9469, 0.0000, 0.6924, 0.2323,
         0.9469, 0.0000, 0.6924],
        [0.5277, 0.9597, 0.0000, 0.8637, 0.5277, 0.9597, 0.0000, 0.8637, 0.5277,
         0.9597, 0.0000, 0.8637],
        [0.8422, 0.1734, 0.0000, 0.3032, 0.8422, 0.1734, 0.0000, 0.3032, 0.8422,
         0.1734, 0.0000, 0.3032],
        [0.9549, 0.4886, 0.0000, 0.2563, 0.9549, 0.4886, 0.0000, 0.2563, 0.9549,
         0.4886, 0.0000, 0.2563]])
Shape of t1 tensor: torch.Size([4, 12])


tensor([[[0.2323, 0.9469, 0.0000, 0.6924],
         [0.2323, 0.9469, 0.0000, 0.6924],
         [0.2323, 0.9469, 0.0000, 0.6924]],

        [[0.5277, 0.9597, 0.0000, 0.8637],
         [0.5277, 0.9597, 0.0000, 0.8637],
         [0.5277, 0.9597, 0.0000, 0.8637]],

        [[0.8422, 0.1734, 0.0000, 0.3032],
         [0.8422, 0.1734, 0.0000, 0.3032],
         [0.8422, 0.1734, 0.0000, 0.3032]],

        [[0.9549, 0.4886, 0.0000, 0.2563],
         [0.9549, 0.4886, 0.0000, 0.2563],
         [0.9549, 

**Multiplying tensors**



In [None]:
# This computes the element-wise product
print(f"tensor.mul(tensor) \n {tensor.mul(tensor)} \n")
# Alternative syntax:
print(f"tensor * tensor \n {tensor * tensor}\n\n\n")

# practice
print(f"t.mul(t) \n {t.mul(t)}")
print(f"t * t \n {t * t}")

tensor.mul(tensor) 
 tensor([[0.5686, 0.3656, 0.0000, 0.1620],
        [0.1346, 0.5781, 0.0000, 0.0793],
        [0.9803, 0.8994, 0.0000, 0.3566],
        [0.0824, 0.4701, 0.0000, 0.4559]]) 

tensor * tensor 
 tensor([[0.5686, 0.3656, 0.0000, 0.1620],
        [0.1346, 0.5781, 0.0000, 0.0793],
        [0.9803, 0.8994, 0.0000, 0.3566],
        [0.0824, 0.4701, 0.0000, 0.4559]])



t.mul(t) 
 tensor([[0.6849, 0.4812, 0.1056],
        [0.0000, 0.0000, 0.0000],
        [0.7939, 0.0080, 0.0745]])
t * t 
 tensor([[0.6849, 0.4812, 0.1056],
        [0.0000, 0.0000, 0.0000],
        [0.7939, 0.0080, 0.0745]])


This computes the matrix multiplication between two tensors



In [None]:
print(f"tensor.matmul(tensor.T) \n {tensor.matmul(tensor.T)} \n")
# Alternative syntax:
print(f"tensor @ tensor.T \n {tensor @ tensor.T}\n\n\n")

print(f"t.matmul(t.T) \n {t.matmul(t.T)}")
print(f"t @ t.T \n {t @t.T}")

tensor.matmul(tensor.T) 
 tensor([[1.0962, 0.8496, 1.5603, 0.9028],
        [0.8496, 0.7919, 1.2523, 0.8167],
        [1.5603, 1.2523, 2.2363, 1.3377],
        [0.9028, 0.8167, 1.3377, 1.0084]]) 

tensor @ tensor.T 
 tensor([[1.0962, 0.8496, 1.5603, 0.9028],
        [0.8496, 0.7919, 1.2523, 0.8167],
        [1.5603, 1.2523, 2.2363, 1.3377],
        [0.9028, 0.8167, 1.3377, 1.0084]])



t.matmul(t.T) 
 tensor([[1.2718, 0.0000, 0.8881],
        [0.0000, 0.0000, 0.0000],
        [0.8881, 0.0000, 0.8764]])
t @ t.T 
 tensor([[1.2718, 0.0000, 0.8881],
        [0.0000, 0.0000, 0.0000],
        [0.8881, 0.0000, 0.8764]])


**In-place operations**
Operations that have a ``_`` suffix are in-place. For example: ``x.copy_(y)``, ``x.t_()``, will change ``x``.



In [None]:
print(tensor, "\n")
tensor.add_(5)  # adding 5 to all elements in tensor
print(tensor)

tensor([[0.0242, 0.7602, 0.0000, 0.6087],
        [0.9032, 0.3869, 0.0000, 0.9505],
        [0.6046, 0.8818, 0.0000, 0.5558],
        [0.8368, 0.0237, 0.0000, 0.8123]]) 

tensor([[5.0242, 5.7602, 5.0000, 5.6087],
        [5.9032, 5.3869, 5.0000, 5.9505],
        [5.6046, 5.8818, 5.0000, 5.5558],
        [5.8368, 5.0237, 5.0000, 5.8123]])


In [None]:
# practice
print(t,"\n")
print(f"Transpose of t: \n {t.t_()}") # prints transpose of tensor t

tensor([[0.8276, 0.6937, 0.3250],
        [0.0000, 0.0000, 0.0000],
        [0.8910, 0.0893, 0.2729]]) 

Transpose of t: 
 tensor([[0.8276, 0.0000, 0.8910],
        [0.6937, 0.0000, 0.0893],
        [0.3250, 0.0000, 0.2729]])


<div class="alert alert-info"><h4>Note</h4><p>In-place operations save some memory, but can be problematic when computing derivatives because of an immediate loss
     of history. Hence, their use is discouraged.</p></div>



--------------





Bridge with NumPy
~~~~~~~~~~~~~~~~~
Tensors on the CPU and NumPy arrays can share their underlying memory
locations, and changing one will change	the other.



Tensor to NumPy array
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^



In [None]:
t = torch.ones(5)
print(f"t: {t}")
n = t.numpy()
print(f"n: {n}")

t: tensor([1., 1., 1., 1., 1.])
n: [1. 1. 1. 1. 1.]


A change in the tensor reflects in the NumPy array.



In [None]:
t.add_(1)
print(f"t: {t}")
print(f"n: {n}")

t: tensor([2., 2., 2., 2., 2.])
n: [2. 2. 2. 2. 2.]


In [None]:
# practice
N = tensor.numpy()
print(f"N: \n{N}\n\n")
tensor.sub_(0.1) # subtracting 0.1 from tensor
print(f"tensor: \n{tensor}\n\n")
print(f"N: \n{N}") #it is reflected in numpy array

N: 
[[0.7540403  0.3668148  0.9900768  0.2871253 ]
 [0.6046128  0.7602976  0.9483932  0.6856206 ]
 [0.         0.         0.         0.        ]
 [0.4025194  0.28151608 0.5971245  0.67521626]]


tensor: 
tensor([[ 0.6540,  0.2668,  0.8901,  0.1871],
        [ 0.5046,  0.6603,  0.8484,  0.5856],
        [-0.1000, -0.1000, -0.1000, -0.1000],
        [ 0.3025,  0.1815,  0.4971,  0.5752]])


N: 
[[ 0.6540403   0.2668148   0.89007676  0.1871253 ]
 [ 0.5046128   0.6602976   0.8483932   0.5856206 ]
 [-0.1        -0.1        -0.1        -0.1       ]
 [ 0.3025194   0.18151608  0.49712452  0.57521623]]


NumPy array to Tensor
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^



In [None]:
n = np.ones(5)
t = torch.from_numpy(n)

Changes in the NumPy array reflects in the tensor.



In [None]:
np.add(n, 1, out=n)
print(f"t: {t}")
print(f"n: {n}")

t: tensor([2., 2., 2., 2., 2.], dtype=torch.float64)
n: [2. 2. 2. 2. 2.]


In [None]:
# practice
N = np.ones(4)
T = torch.from_numpy(N)
print(T,"\n\n")

np.multiply(N, 0.1, out = N)
print(f"N: {N}")
print(f"T: {T}") # multiplying 0.1 with numpy array is reflected in tensor

tensor([1., 1., 1., 1.], dtype=torch.float64) 


[0.1 0.1 0.1 0.1]
tensor([0.1000, 0.1000, 0.1000, 0.1000], dtype=torch.float64)


In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt

## Initialise tensors

In [None]:
x = torch.ones(3, 2)
print(x)
x = torch.zeros(3, 2)
print(x)
x = torch.rand(3, 2)
print(x)

In [None]:
x = torch.empty(3, 2)
print(x)
y = torch.zeros_like(x)
print(y)

In [None]:
x = torch.linspace(0, 1, steps=5)
print(x)

In [None]:
x = torch.tensor([[1, 2],
                 [3, 4],
                 [5, 6]])
print(x)

## Slicing tensors

In [None]:
print(x.size())
print(x[:, 1])
print(x[0, :])

In [None]:
y = x[1, 1]
print(y)
print(y.item())

## Reshaping tensors

In [None]:
print(x)
y = x.view(2, 3)
print(y)

In [None]:
y = x.view(6,-1)
print(y)

## Simple Tensor Operations

In [None]:
x = torch.ones([3, 2])
y = torch.ones([3, 2])
z = x + y
print(z)
z = x - y
print(z)
z = x * y
print(z)

In [None]:
z = y.add(x)
print(z)
print(y)

In [None]:
z = y.add_(x)
print(z)
print(y)

## Numpy <> PyTorch

In [None]:
x_np = x.numpy()
print(type(x), type(x_np))
print(x_np)

In [None]:
a = np.random.randn(5)
print(a)
a_pt = torch.from_numpy(a)
print(type(a), type(a_pt))
print(a_pt)

In [None]:
np.add(a, 1, out=a)
print(a)
print(a_pt)

In [None]:
%%time
for i in range(100):
  a = np.random.randn(100,100)
  b = np.random.randn(100,100)
  c = np.matmul(a, b)

In [None]:
%%time
for i in range(100):
  a = torch.randn([100, 100])
  b = torch.randn([100, 100])
  c = torch.matmul(a, b)

In [None]:
%%time
for i in range(10):
  a = np.random.randn(10000,10000)
  b = np.random.randn(10000,10000)
  c = a + b

In [None]:
%%time
for i in range(10):
  a = torch.randn([10000, 10000])
  b = torch.randn([10000, 10000])
  c = a + b

## CUDA support

In [None]:
print(torch.cuda.device_count())

In [None]:
print(torch.cuda.device(0))
print(torch.cuda.get_device_name(0))

In [None]:
cuda0 = torch.device('cuda:0')

In [None]:
a = torch.ones(3, 2, device=cuda0)
b = torch.ones(3, 2, device=cuda0)
c = a + b
print(c)

In [None]:
print(a)

In [None]:
%%time
for i in range(10):
  a = np.random.randn(10000,10000)
  b = np.random.randn(10000,10000)
  np.add(b, a)

In [None]:
%%time
for i in range(10):
  a_cpu = torch.randn([10000, 10000])
  b_cpu = torch.randn([10000, 10000])
  b_cpu.add_(a_cpu)

In [None]:
%%time
for i in range(10):
  a = torch.randn([10000, 10000], device=cuda0)
  b = torch.randn([10000, 10000], device=cuda0)
  b.add_(a)

In [None]:
%%time
for i in range(10):
  a = np.random.randn(10000,10000)
  b = np.random.randn(10000,10000)
  np.matmul(b, a)

In [None]:
%%time
for i in range(10):
  a_cpu = torch.randn([10000, 10000])
  b_cpu = torch.randn([10000, 10000])
  torch.matmul(a_cpu, b_cpu)

In [None]:
%%time
for i in range(10):
  a = torch.randn([10000, 10000], device=cuda0)
  b = torch.randn([10000, 10000], device=cuda0)
  torch.matmul(a, b)

## Autodiff

In [None]:
x = torch.ones([3, 2], requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]], requires_grad=True)


In [None]:
y = x + 5
print(y)

tensor([[6., 6.],
        [6., 6.],
        [6., 6.]], grad_fn=<AddBackward0>)


In [None]:
z = y*y + 1
print(z)

tensor([[37., 37.],
        [37., 37.],
        [37., 37.]], grad_fn=<AddBackward0>)


In [None]:
t = torch.sum(z)
print(t)

tensor(222., grad_fn=<SumBackward0>)


In [None]:
t.backward()

In [None]:
print(x.grad)

tensor([[12., 12.],
        [12., 12.],
        [12., 12.]])


$t = \sum_i z_i, z_i = y_i^2 + 1, y_i = x_i + 5$

$\frac{\partial t}{\partial x_i} = \frac{\partial z_i}{\partial x_i} = \frac{\partial z_i}{\partial y_i} \frac{\partial y_i}{\partial x_i} = 2y_i \times 1$


At x = 1, y = 6, $\frac{\partial t}{\partial x_i} = 12$

In [None]:
x = torch.ones([3, 2], requires_grad=True)
y = x + 5
r = 1/(1 + torch.exp(-y))
print(r)
s = torch.sum(r)
s.backward()
print(x.grad)

tensor([[0.9975, 0.9975],
        [0.9975, 0.9975],
        [0.9975, 0.9975]], grad_fn=<MulBackward0>)
tensor([[0.0025, 0.0025],
        [0.0025, 0.0025],
        [0.0025, 0.0025]])


In [None]:
x = torch.ones([3, 2], requires_grad=True)
y = x + 5
r = 1/(1 + torch.exp(-y))
a = torch.ones([3, 2])
r.backward(a)
print(x.grad)

tensor([[0.0025, 0.0025],
        [0.0025, 0.0025],
        [0.0025, 0.0025]])


$\frac{\partial{s}}{\partial{x}} = \frac{\partial{s}}{\partial{r}} \cdot \frac{\partial{r}}{\partial{x}}$

For the above code $a$ represents $\frac{\partial{s}}{\partial{r}}$ and then $x.grad$ gives directly $\frac{\partial{s}}{\partial{x}}$



## Autodiff example that looks like what we have been doing

In [None]:
x = torch.randn([20, 1], requires_grad=True)
y = 3*x - 2

In [None]:
w = torch.tensor([1.], requires_grad=True)
b = torch.tensor([1.], requires_grad=True)

y_hat = w*x + b

loss = torch.sum((y_hat - y)**2)

In [None]:
print(loss)

tensor(318.2823, grad_fn=<SumBackward0>)


In [None]:
loss.backward()

In [None]:
print(w.grad, b.grad)

tensor([-106.4956]) tensor([141.1912])


## Do it in a loop

In [None]:
learning_rate = 0.01

w = torch.tensor([1.], requires_grad=True)
b = torch.tensor([1.], requires_grad=True)

print(w.item(), b.item())

for i in range(10):

  x = torch.randn([20, 1])
  y = 3*x - 2

  y_hat = w*x + b
  loss = torch.sum((y_hat - y)**2)

  loss.backward()

  with torch.no_grad():
    w -= learning_rate * w.grad
    b -= learning_rate * b.grad

    w.grad.zero_()
    b.grad.zero_()

  print(w.item(), b.item())


1.0 1.0
1.694516658782959 -0.32816600799560547
2.5244972705841064 -0.9011859893798828
2.6990771293640137 -1.3381099700927734
2.7810328006744385 -1.5905817747116089
2.821857213973999 -1.7378290891647339
2.943121910095215 -1.868725061416626
2.9525837898254395 -1.9191371202468872
2.9741718769073486 -1.9551563262939453
2.9911296367645264 -1.972025752067566
2.994936943054199 -1.9838125705718994


## Do it for a large problem

In [None]:
%%time
learning_rate = 0.001
N = 10000000
epochs = 200

w = torch.rand([N], requires_grad=True)
b = torch.ones([1], requires_grad=True)

# print(torch.mean(w).item(), b.item())

for i in range(epochs):

  x = torch.randn([N])
  y = torch.dot(3*torch.ones([N]), x) - 2

  y_hat = torch.dot(w, x) + b
  loss = torch.sum((y_hat - y)**2)

  loss.backward()

  with torch.no_grad():
    w -= learning_rate * w.grad
    b -= learning_rate * b.grad

    w.grad.zero_()
    b.grad.zero_()

#   print(torch.mean(w).item(), b.item())


CPU times: user 36.7 s, sys: 443 ms, total: 37.2 s
Wall time: 37.2 s


In [None]:
%%time
learning_rate = 0.001
N = 10000000
epochs = 200

w = torch.rand([N], requires_grad=True, device=cuda0)
b = torch.ones([1], requires_grad=True, device=cuda0)

# print(torch.mean(w).item(), b.item())

for i in range(epochs):

  x = torch.randn([N], device=cuda0)
  y = torch.dot(3*torch.ones([N], device=cuda0), x) - 2

  y_hat = torch.dot(w, x) + b
  loss = torch.sum((y_hat - y)**2)

  loss.backward()

  with torch.no_grad():
    w -= learning_rate * w.grad
    b -= learning_rate * b.grad

    w.grad.zero_()
    b.grad.zero_()

  #print(torch.mean(w).item(), b.item())


CPU times: user 467 ms, sys: 305 ms, total: 772 ms
Wall time: 784 ms
