<a href="https://colab.research.google.com/github/Arpitkamal/Deep-learning-107-Introduction-to-Pytorch/blob/master/PytochIntroduction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Outline


1.   PyTorch
2.   What are tensors
3.   Initialisation, slicing, reshaping tensors
4.   Numpy and PyTorch interfacing
5.   GPU support for PyTorch + Enabling GPUs on Google Colab
6.   Speed comparisons, Numpy -- PyTorch -- PyTorch on GPU
7.   Autograd concepts and application
8.   Writing a basic learning loop using utograd




In [0]:
import torch
import numpy as np
import matplotlib.pyplot as plt

# Initialise tensors

In [0]:
x = torch.ones(3, 2)
print(x)
x = torch.zeros(3, 2)
print(x)
x = torch.rand(3, 2)
print(x)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[0.1177, 0.4077],
        [0.9510, 0.5064],
        [0.5405, 0.3287]])


In [0]:
x = torch.empty(3, 2)
print(x)
y = torch.zeros_like(x)
print(y)

tensor([[9.4965e-36, 0.0000e+00],
        [3.3631e-44, 0.0000e+00],
        [       nan, 0.0000e+00]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])


In [0]:
x = torch.linspace(0, 1, steps=5)
print(x)

tensor([0.0000, 0.2500, 0.5000, 0.7500, 1.0000])


In [0]:
x = torch.tensor([[1, 2],
                  [3, 4],
                  [5, 6]])
print(x)

tensor([[1, 2],
        [3, 4],
        [5, 6]])


# Slicing tensors

In [0]:
print(x.size())
print(x[:, 0])
print(x[0, :])

torch.Size([3, 2])
tensor([1, 3, 5])
tensor([1, 2])


In [0]:
y = x[1, 0]
print(y)
print(y.item())

tensor(3)
3


# Reshaping tensors

In [0]:
print(x)
y = x.view(2, 3)
print(y)

tensor([[1, 2],
        [3, 4],
        [5, 6]])
tensor([[1, 2, 3],
        [4, 5, 6]])


In [0]:
y = x.view(6, -1)
print(y)

tensor([[1],
        [2],
        [3],
        [4],
        [5],
        [6]])


# Simple Tensor Operations


In [0]:
x = torch.ones([3, 2])
y = torch.ones([3, 2])
z = x + y
print(z)
z = x - y
print(z)
z = x * y
print(z)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])


In [0]:
z = y.add(x)
print(z)
print(y)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[1., 1.],
        [1., 1.],
        [1., 1.]])


In [0]:
z = y.add_(x)
print(z)
print(y)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])
tensor([[2., 2.],
        [2., 2.],
        [2., 2.]])


# Numpy <> PyTorch

In [0]:
x_np = x.numpy()
print(type(x), type(x_np))
print(x_np)

<class 'torch.Tensor'> <class 'numpy.ndarray'>
[[1. 1.]
 [1. 1.]
 [1. 1.]]


In [0]:
a = np.random.randn(5)
print(a)
a_pt = torch.from_numpy(a)
print(type(a), type(a_pt))
print(a_pt)

[-1.02886155  1.41561835 -0.69787448 -1.69084527  2.60505143]
<class 'numpy.ndarray'> <class 'torch.Tensor'>
tensor([-1.0289,  1.4156, -0.6979, -1.6908,  2.6051], dtype=torch.float64)


In [0]:
np.add(a, 1, out=a)
print(a)
print(a_pt)

[-0.02886155  2.41561835  0.30212552 -0.69084527  3.60505143]
tensor([-0.0289,  2.4156,  0.3021, -0.6908,  3.6051], dtype=torch.float64)


In [0]:
%%time
for i in range(100):
  a = np.random.randn(100,100)
  b = np.random.randn(100,100)
  c = np.matmul(a, b)

CPU times: user 156 ms, sys: 124 ms, total: 279 ms
Wall time: 158 ms


In [0]:
%%time
for i in range(100):
  a = torch.randn(100,100)
  b = torch.randn(100,100)
  c = torch.matmul(a, b)

CPU times: user 66.9 ms, sys: 88.1 ms, total: 155 ms
Wall time: 105 ms


In [0]:
%%time
for i in range(10):
  a = np.random.randn(10000,10000)
  b = np.random.randn(10000,10000)
  c = a + b

CPU times: user 1min 34s, sys: 654 ms, total: 1min 35s
Wall time: 1min 35s


In [0]:
%%time
for i in range(10):
  a = torch.randn(10000,10000)
  b = torch.randn(10000,10000)
  c = a + b

CPU times: user 26.2 s, sys: 7.92 ms, total: 26.2 s
Wall time: 26.2 s


# CUDA support

In [21]:
print(torch.cuda.device_count())

1


In [22]:
print(torch.cuda.device(0))
print(torch.cuda.get_device_name(0))

<torch.cuda.device object at 0x7f73439e1fd0>
Tesla T4


In [0]:
cuda0 = torch.device('cuda:0')

In [24]:
a = torch.ones(3, 2, device=cuda0)
b = torch.ones(3, 2, device=cuda0)
c = a + b
print(c)

tensor([[2., 2.],
        [2., 2.],
        [2., 2.]], device='cuda:0')


In [0]:
%%time
for i in range(10):
  a = np.random.randn(10000,10000)
  b = np.random.randn(10000,10000)
  np.add(b, a)

CPU times: user 1min 36s, sys: 185 ms, total: 1min 36s
Wall time: 1min 36s


In [0]:
%%time
for i in range(10):
  a_cpu = torch.randn(10000,10000)
  b_cpu = torch.randn(10000,10000)
  b_cpu.add_(a_cpu)

CPU times: user 26.2 s, sys: 13.9 ms, total: 26.2 s
Wall time: 26.2 s


In [0]:
%%time
for i in range(10):
  a = torch.randn(10000,10000, device=cuda0)
  b = torch.randn(10000,10000, device=cuda0)
  b.add_(a)

CPU times: user 1.84 ms, sys: 4.01 ms, total: 5.86 ms
Wall time: 12.9 ms


In [0]:
%%time
for i in range(10):
  a = np.random.randn(10000,10000)
  b = np.random.randn(10000,10000)
  np.matmul(b, a)

CPU times: user 19min 51s, sys: 4.83 s, total: 19min 56s
Wall time: 10min 56s


In [0]:
%%time
for i in range(10):
  a_cpu = torch.randn(10000,10000)
  b_cpu = torch.randn(10000,10000)
  b_cpu.matmul(a_cpu)

CPU times: user 4min 45s, sys: 127 ms, total: 4min 45s
Wall time: 4min 45s


In [0]:
%%time
for i in range(10):
  a = torch.randn(10000,10000, device=cuda0)
  b = torch.randn(10000,10000, device=cuda0)
  b.matmul(a)

CPU times: user 7.31 ms, sys: 6.99 ms, total: 14.3 ms
Wall time: 17.5 ms


# Autograd

In [0]:
x = torch.ones([3, 2], requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.],
        [1., 1.]], requires_grad=True)


In [0]:
y = x + 5
print(y)

tensor([[6., 6.],
        [6., 6.],
        [6., 6.]], grad_fn=<AddBackward0>)


In [0]:
z = y*y + 1
print(z)

tensor([[37., 37.],
        [37., 37.],
        [37., 37.]], grad_fn=<AddBackward0>)


In [0]:
t = torch.sum(z)
print(t)

tensor(222., grad_fn=<SumBackward0>)


In [0]:
t.backward()

In [0]:
print(x.grad)

tensor([[12., 12.],
        [12., 12.],
        [12., 12.]])


In [0]:
x = torch.ones([3, 2], requires_grad=True)
y = x + 5
r = 1/(1 + torch.exp(-y))
print(r)
s = torch.sum(r)
s.backward()
print(x.grad)

tensor([[0.9975, 0.9975],
        [0.9975, 0.9975],
        [0.9975, 0.9975]], grad_fn=<MulBackward0>)
tensor([[0.0025, 0.0025],
        [0.0025, 0.0025],
        [0.0025, 0.0025]])


In [0]:
x = torch.ones([3, 2], requires_grad=True)
y = x + 5
r = 1/(1 + torch.exp(-y))
a = torch.ones([3, 2])
r.backward(a)
print(x.grad)

tensor([[0.0025, 0.0025],
        [0.0025, 0.0025],
        [0.0025, 0.0025]])


# Autograd example that looks like what we have been doing

In [0]:
x = torch.randn([20, 1], requires_grad=True)
y = 3*x - 2

In [0]:
w = torch.tensor([1.], requires_grad=True)
b = torch.tensor([1.], requires_grad=True)

y_hat = w*x + b

loss = torch.sum((y_hat - y)**2)

In [4]:
print(loss)

tensor(272.0872, grad_fn=<SumBackward0>)


In [0]:
loss.backward()

In [6]:
print(w.grad, b.grad)

tensor([-76.6144]) tensor([130.3153])


when we increase w then loss decrease and when we decrease b then loss increase

If w.grad returns a positive number, what does that say about the value of w

ANS- The selected w is higher than the optimal value

# Do it in a loop

In [7]:
learning_rate = 0.01

w = torch.tensor([1.], requires_grad=True)
b = torch.tensor([1.], requires_grad=True)

print(w.item(), b.item())

for  i in range(10):

  x = torch.randn([20, 1]) #forward propagation
  y = 3*x - 2

  y_hat = w*x + b 
  loss = torch.sum((y_hat - y)**2) # loss- squared error

  loss.backward()                #backward propagation

  with torch.no_grad():      
    w -= learning_rate * w.grad  #updating parameters
    b -= learning_rate * b.grad

    w.grad.zero_()
    b.grad.zero_()

  print(w.item(), b.item())

1.0 1.0
1.8852834701538086 -0.3429412841796875
2.3043291568756104 -1.0878775119781494
2.766594409942627 -1.5056363344192505
2.8524563312530518 -1.6897825002670288
2.865809202194214 -1.7924087047576904
2.911557674407959 -1.8694320917129517
2.9465408325195312 -1.9208853244781494
2.964899778366089 -1.9482589960098267
2.9876933097839355 -1.9739067554473877
2.9924943447113037 -1.9851192235946655


# Do it for a large problem

In [26]:
%%time
learning_rate = 0.001
N = 1000000
epochs = 2000

w = torch.rand([N], requires_grad=True)
b = torch.ones([1], requires_grad=True)

#print(torch.mean(w).item(), b.item())

for  i in range(epochs):

  x = torch.randn([N])                   #forward propagation
  y = torch.dot(3*torch.ones([N]), x) - 2

  y_hat = torch.dot(w, x) + b 
  loss = torch.sum((y_hat - y)**2) # loss- squared error

  loss.backward()                #backward propagation

  with torch.no_grad():      
    w -= learning_rate * w.grad  #updating parameters
    b -= learning_rate * b.grad

    w.grad.zero_()
    b.grad.zero_()

  #print(torch.mean(w).item(), b.item())

CPU times: user 27.5 s, sys: 1.01 s, total: 28.5 s
Wall time: 28.6 s


In [28]:
%%time
learning_rate = 0.001
N = 1000000
epochs = 2000

w = torch.rand([N], requires_grad=True, device=cuda0)
b = torch.ones([1], requires_grad=True, device=cuda0)

#print(torch.mean(w).item(), b.item())

for  i in range(epochs):

  x = torch.randn([N], device=cuda0)            #forward propagation
  y = torch.dot(3*torch.ones([N], device=cuda0), x) - 2

  y_hat = torch.dot(w, x) + b 
  loss = torch.sum((y_hat - y)**2) # loss- squared error

  loss.backward()                #backward propagation

  with torch.no_grad():      
    w -= learning_rate * w.grad  #updating parameters
    b -= learning_rate * b.grad

    w.grad.zero_()
    b.grad.zero_()

  #print(torch.mean(w).item(), b.item())

CPU times: user 1.32 s, sys: 202 ms, total: 1.52 s
Wall time: 1.53 s
