<a href="https://colab.research.google.com/github/Ezzaldin97/pytorch-coding/blob/main/Tensors_AutoGrad_in_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import torch
import torchvision

torch.__version__

'2.0.0+cu118'

In [2]:
# checking if GPU exist-->if exist assign device to GPU else CPU...
# here we don't have GPUs so all operation by default will be on CPU
# default operations run on CPU
# we don't need to move tensors to GPU as it does't exist..
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
# decide here the precision of digits..
torch.set_printoptions(precision=5)
device

device(type='cpu')

In [3]:
empty_tensor = torch.empty(size = (2,2), dtype = torch.float32)
print(empty_tensor.size())
print(empty_tensor)
print(empty_tensor.dtype)

torch.Size([2, 2])
tensor([[8.35276e-03, 2.89576e+32],
        [3.49703e-38, 1.01412e+31]])
torch.float32


In [4]:
# create random tensor..
random_tensor = torch.randn(size = (2,3), dtype=torch.float16)
print(random_tensor.size())
print(random_tensor)
print(random_tensor.dtype)

torch.Size([2, 3])
tensor([[ 0.01910,  0.62402, -0.64648],
        [ 0.18530,  1.19434, -0.85498]], dtype=torch.float16)
torch.float16


In [5]:
# converting torch tensor to numpy array.. 
random_arr = random_tensor.numpy()
type(random_arr)

numpy.ndarray

In [6]:
# becareful here of shared memory when converting tensor to numpy array, or vice versa 
# change something in one of them, change the other as well....
random_arr

array([[ 0.0191,  0.624 , -0.6465],
       [ 0.1853,  1.194 , -0.855 ]], dtype=float16)

In [7]:
ones_arr = np.ones(5)
print(ones_arr.shape)
print(ones_arr)

(5,)
[1. 1. 1. 1. 1.]


In [8]:
# convert numpy array to torch tensor..
ones_tensor = torch.from_numpy(ones_arr)
ones_tensor

tensor([1., 1., 1., 1., 1.], dtype=torch.float64)

In [9]:
ones_arr[0] = 5
# shared memory again..
ones_tensor

tensor([5., 1., 1., 1., 1.], dtype=torch.float64)

In [10]:
# in-place modification 
tensor1 = torch.randn((1,4))
tensor1

tensor([[-0.51564, -0.66723, -0.71623,  0.49457]])

In [11]:
# let's transpose tensor1
tensor1.t()

tensor([[-0.51564],
        [-0.66723],
        [-0.71623],
        [ 0.49457]])

In [12]:
# let's transpose again and modify without assignment..
tensor1.t_()

tensor([[-0.51564],
        [-0.66723],
        [-0.71623],
        [ 0.49457]])

In [13]:
tensor1

tensor([[-0.51564],
        [-0.66723],
        [-0.71623],
        [ 0.49457]])

In [14]:
# add three to all of elements and modify tensor1...
tensor1.add_(3)

tensor([[2.48436],
        [2.33277],
        [2.28377],
        [3.49457]])

In [15]:
# tensor1 in CPU... 
tensor1.device

device(type='cpu')

In [16]:
# We will use ``torch.device`` objects to move tensors in and out of GPU
# if cuda exist remove tensor to GPU..
tensor1 = tensor1.to(device)
tensor1.device

device(type='cpu')

In [17]:
tensor1 = tensor1.to("cpu")
tensor1.device

device(type='cpu')

In [18]:
data_dir = './data'
cifar = torchvision.datasets.CIFAR10(data_dir, train = True, download = True)
cifar.data.shape

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 78960200.31it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data


(50000, 32, 32, 3)

In [19]:
# now we have 50000 images with 32 by 32 pixels (width, height) and 3 channels(r, b, g)
# use permute to change dimension of data.
# cifar data is numpy ndarray...
type(cifar.data[0])

numpy.ndarray

In [20]:
# convert array to torch tensors..
X = torch.from_numpy(cifar.data).permute(0, 3, 1, 2).float()
X = X / 255.
print(X.type(), X.size(), X.min().item(), X.max().item())

torch.FloatTensor torch.Size([50000, 3, 32, 32]) 0.0 1.0


In [21]:
# by default requires grad parameter is set to false, which means in computation graph no gradient will be built with respect to this graph
# during the computation.
# set the requires grad parameter to True to compute gradient automatically when computing the graph... 
w = torch.tensor([0.5], requires_grad = True)
b = torch.tensor([2.0], requires_grad=True)
xx = torch.tensor([0.5], requires_grad=True)

In [22]:
xx.requires_grad

True

In [23]:
def loss_func(x, y, weight, b):
  y_hat = torch.exp(weight*x+b)
  print(y_hat)
  return torch.sum((y_hat-y)**2)

In [24]:
ystar = torch.randn_like(xx)
l = loss_func(xx, ystar, w, b)

tensor([9.48774], grad_fn=<ExpBackward0>)


In [25]:
l

tensor(81.05015, grad_fn=<SumBackward0>)

In [26]:
l.requires_grad

True

In [27]:
# here nothing calculated yet because we must compute the backward path first....
print(w.grad)

None


In [28]:
l.backward(retain_graph=True)

In [29]:
print(xx.grad)
print(w.grad)

tensor([85.41605])
tensor([85.41605])


In [30]:
# if we call backward method for the second time it accumulates the gradient..
# backward deletes the computational graph when it is used so that you will get an error below
# you must retain the computational graph..
l.backward(retain_graph=True)
print(xx.grad)
print(w.grad)

tensor([170.83211])
tensor([170.83211])


In [31]:
# grads must manually set to zeros.
xx.data.zero_()
w.data.zero_()
b.data.zero_()

tensor([0.])

In [32]:
l.backward(retain_graph=True)

In [34]:
l.backward()
print(xx.grad)
print(w.grad)

tensor([170.83211])
tensor([170.83211])
