Deep Learning is a branch of Machine Learning where algorithms are written that mimic the functioning of a human brain. The most commonly used libraries in deep learning are Tensorflow and PyTorch. Pytorch is an open-source deep learning framework available with a Python and C++ interface. The PyTorch resides inside the torch module. In PyTorch, the data that has to be processed is input in the form of a tensor.

to install cuda library 

In [22]:
# Windows 11 x86_64: 

# https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64&target_version=11&target_type=exe_local


In [None]:
# Linux Ubuntu 24.04 arm64-sbsa :

# wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/sbsa/cuda-ubuntu2404.pin
# sudo mv cuda-ubuntu2404.pin /etc/apt/preferences.d/cuda-repository-pin-600
# wget https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda-repo-ubuntu2404-12-6-local_12.6.3-560.35.05-1_arm64.deb
# sudo dpkg -i cuda-repo-ubuntu2404-12-6-local_12.6.3-560.35.05-1_arm64.deb
# sudo cp /var/cuda-repo-ubuntu2404-12-6-local/cuda-*-keyring.gpg /usr/share/keyrings/
# sudo apt-get update
# sudo apt-get -y install cuda-toolkit-12-6

# sudo apt-get install -y cuda-drivers

In [1]:
import torch
import numpy as np


In [4]:
# Check available devices
print(torch.cuda.device_count())  # Number of GPUs
# print(torch.cuda.current_device())  # Current GPU ID
# print(torch.cuda.device(torch.cuda.current_device()))  # Current GPU properties

# Check CPU
print(torch.device("cpu"))

0
cpu


In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [6]:
# Set device to GPU (if available)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [7]:
device

device(type='cpu')

In [8]:
# Set device to CPU
device = torch.device("cpu")

In [9]:
device

device(type='cpu')

In [10]:
# Move tensors to GPU
data = torch.randn(3, 3)  # Example tensor  multi-dimensional array similar to NumPy arrays.

In [11]:
data

tensor([[-0.0922,  2.2213,  1.2276],
        [ 0.7965, -0.9639,  0.0869],
        [-0.8715, -1.3944,  0.1125]])

In [67]:
# list of values to be stored as tensor
data1 = [1, 2, 3, 4, 5, 6]
data2 = np.array([1.5, 3.4, 6.8,
                9.3, 7.0, 2.8])
 
# creating tensors and printing 
t1 = torch.tensor(data1)
t2 = torch.Tensor(data1)
t3 = torch.as_tensor(data2)
t4 = torch.from_numpy(data2)

In [12]:
d_tensor = torch.tensor(data) # Example

  d_tensor = torch.tensor(data) # Example


In [14]:
d_tensor

tensor([[-0.0922,  2.2213,  1.2276],
        [ 0.7965, -0.9639,  0.0869],
        [-0.8715, -1.3944,  0.1125]])

In [20]:
data = data.to(device) 

In [21]:
data

tensor([[-0.0922,  2.2213,  1.2276],
        [ 0.7965, -0.9639,  0.0869],
        [-0.8715, -1.3944,  0.1125]])

In [39]:
x = torch.tensor([1, 2, 3])

In [40]:
y = x + 2  # broadcast

In [25]:
y

tensor([3, 4, 5])

In [41]:
z = y * 3 # broadcast

In [42]:
z

tensor([ 9, 12, 15])

In [49]:
import torch.nn as nn #

In [50]:
# Initialization:
# The __init__ method is the constructor of the class, which is called when an instance of the class is created.
# Super Function:
# The super(SimpleNN, self).__init__() line calls the constructor of the parent class (nn.Module) to ensure proper initialization.
# Defining a Linear Layer:
# The self.fc1 = nn.Linear(10, 5) line defines a linear (fully connected) layer with the following properties:
# 10 is the number of input features.
# 5 is the number of output features.

In [51]:
class SimpleNN(nn.Module):
    def __init__(self):  # constructor,  A constructor is a special method in a class that is automatically called when an 
        # object of that class is created. It is used to initialize the attributes of the class.
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(10, 5)  # atributes
 

In [54]:
NN = SimpleNN()

In [55]:
NN

SimpleNN(
  (fc1): Linear(in_features=10, out_features=5, bias=True)
)

In [56]:
model = NN.fc1

In [57]:
model

Linear(in_features=10, out_features=5, bias=True)

In [61]:
inputs = torch.randn(3,10)

In [62]:
inputs

tensor([[-0.7969,  0.3970, -0.5916,  0.3226, -0.4522,  1.6186,  0.4672,  0.1621,
         -1.2545,  0.0917],
        [-0.2732,  0.2622,  0.2583, -1.1032, -1.3730,  0.2344, -0.0325,  0.1791,
         -0.0940,  0.3402],
        [ 0.2753,  0.7800,  0.1288, -1.3370, -0.6918,  0.5433,  0.1228,  0.7991,
         -0.1958,  1.1173]])

add a method to a class explicitly using the following approaches:

In [65]:
def forward(self, x):  # method
        return self.fc1(x)

setattr(SimpleNN, "forward", forward)


In [66]:
NN.forward(inputs)

tensor([[-0.4305,  0.2883, -0.0256, -0.8229, -0.2528],
        [-0.1999, -0.1669, -0.2008, -0.5687, -0.4268],
        [-0.0696, -0.0399, -0.3899, -0.1732, -0.6606]],
       grad_fn=<AddmmBackward0>)

decorator  

decorator is a design pattern in Python that allows you to wrap another function or class in order to extend its behavior without permanently modifying it.

In [None]:
# def decorator(func):
#     def wrapper(*args, **kwargs):
#         # Code to be executed before the original function
#         result = func(*args, **kwargs)
#         # Code to be executed after the original function
#         return result
#     return wrapper

In [69]:
def add_method(cls):
    def decorator(func):
        setattr(cls, func.__name__, func)
        return func
    return decorator

In [68]:
# yield > generator  Generators are a type of iterable, like lists or tuples. However, unlike lists, 
# generators do not store all the values in memory, instead, they generate the values on-the-fly.

In [70]:
@add_method(SimpleNN)
def custom_forward(self, x):
    return torch.relu(self.fc1(x))

In [71]:
NN.custom_forward(inputs)

tensor([[0.0000, 0.2883, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000]], grad_fn=<ReluBackward0>)

In [73]:
def my_decorator(func):
    def wrapper():
        print("Something is happening before the function is called.")
        func()
        print("Something is happening after the function is called.")
    return wrapper

@my_decorator
def say_hello():
    print("Hello!")

say_hello()

Something is happening before the function is called.
Hello!
Something is happening after the function is called.


Threading

Threading is a way to achieve concurrency in Python, allowing multiple threads to run concurrently within a single process.

In [None]:
# What is Threading in CPU?
# Threading in CPU refers to the ability of a central processing unit (CPU) to execute multiple threads or flows of instructions concurrently, improving overall system performance and responsiveness.
# Types of Threading in CPU
# Simultaneous Multithreading (SMT): SMT allows multiple threads to share the same physical core, improving resource utilization.
# Multi-Threading: Multi-threading allows a single core to execute multiple threads, improving responsiveness and system utilization.

In [74]:
import threading
import time

def print_numbers():
    for i in range(10):
        time.sleep(1)
        print(i)

def print_letters():
    for letter in 'abcdefghij':
        time.sleep(1)
        print(letter)

# Create threads
thread1 = threading.Thread(target=print_numbers)
thread2 = threading.Thread(target=print_letters)

# Start threads
thread1.start()
thread2.start()

# Wait for threads to finish
thread1.join()
thread2.join()

0
a
1
b
2
c
3
d
4
e
5
f
6
g
7
h
8
i
9
j


In [75]:
inputs

tensor([[-0.7969,  0.3970, -0.5916,  0.3226, -0.4522,  1.6186,  0.4672,  0.1621,
         -1.2545,  0.0917],
        [-0.2732,  0.2622,  0.2583, -1.1032, -1.3730,  0.2344, -0.0325,  0.1791,
         -0.0940,  0.3402],
        [ 0.2753,  0.7800,  0.1288, -1.3370, -0.6918,  0.5433,  0.1228,  0.7991,
         -0.1958,  1.1173]])

In [80]:
inputs.shape

torch.Size([3, 10])

In [77]:
reshape_input = inputs.reshape(5,6)

In [79]:
reshape_input.shape

torch.Size([5, 6])

In [82]:
resize_input = reshape_input.resize(6,5)



In [83]:
resize_input

tensor([[-0.7969,  0.3970, -0.5916,  0.3226, -0.4522],
        [ 1.6186,  0.4672,  0.1621, -1.2545,  0.0917],
        [-0.2732,  0.2622,  0.2583, -1.1032, -1.3730],
        [ 0.2344, -0.0325,  0.1791, -0.0940,  0.3402],
        [ 0.2753,  0.7800,  0.1288, -1.3370, -0.6918],
        [ 0.5433,  0.1228,  0.7991, -0.1958,  1.1173]])

In [84]:
resize_input.shape

torch.Size([6, 5])

In [90]:
inputs

tensor([[-0.7969,  0.3970, -0.5916,  0.3226, -0.4522,  1.6186,  0.4672,  0.1621,
         -1.2545,  0.0917],
        [-0.2732,  0.2622,  0.2583, -1.1032, -1.3730,  0.2344, -0.0325,  0.1791,
         -0.0940,  0.3402],
        [ 0.2753,  0.7800,  0.1288, -1.3370, -0.6918,  0.5433,  0.1228,  0.7991,
         -0.1958,  1.1173]])

In [89]:
inputs.shape

torch.Size([3, 10])

In [None]:
# What does transpose do?
# The transpose method in PyTorch swaps the dimensions of a tensor. It takes two arguments, dim0 and dim1, which specify the dimensions to be swapped.
# What do the indices (1, 0) mean?
# In PyTorch, tensor dimensions are indexed starting from 0. The indices (1, 0) mean:
# 0 refers to the first dimension (usually the batch size)
# 1 refers to the second dimension (usually row no or sequence length)
# What happens when you call inputs.transpose(1, 0)?
# When you call inputs.transpose(1, 0), PyTorch swaps the first and second dimensions of the inputs tensor.
# Example
# Suppose inputs is a tensor with shape (batch_size, sequence_length, embedding_dim), where:
# batch_size is the number of samples in the batch 1st axis
# sequence_length is the length of each sequence or no of row 2nd axis
# column 3rd axis 
# If inputs has shape (32, 10, 128), calling inputs.transpose(1, 0) would result in a tensor with shape (10, 32, 128).

In [86]:
inputs.transpose(1 ,0 ) # axis 

tensor([[-0.7969, -0.2732,  0.2753],
        [ 0.3970,  0.2622,  0.7800],
        [-0.5916,  0.2583,  0.1288],
        [ 0.3226, -1.1032, -1.3370],
        [-0.4522, -1.3730, -0.6918],
        [ 1.6186,  0.2344,  0.5433],
        [ 0.4672, -0.0325,  0.1228],
        [ 0.1621,  0.1791,  0.7991],
        [-1.2545, -0.0940, -0.1958],
        [ 0.0917,  0.3402,  1.1173]])

In [87]:
inputs.transpose(1 ,0 ).shape

torch.Size([10, 3])

In [92]:
inputs.transpose(0, 1) # axis

tensor([[-0.7969, -0.2732,  0.2753],
        [ 0.3970,  0.2622,  0.7800],
        [-0.5916,  0.2583,  0.1288],
        [ 0.3226, -1.1032, -1.3370],
        [-0.4522, -1.3730, -0.6918],
        [ 1.6186,  0.2344,  0.5433],
        [ 0.4672, -0.0325,  0.1228],
        [ 0.1621,  0.1791,  0.7991],
        [-1.2545, -0.0940, -0.1958],
        [ 0.0917,  0.3402,  1.1173]])

In [96]:
data3d = torch.randn(3, 10 , 5) # no of batch = 3, row in each batch = 10, col in each row = 5

In [97]:
data3d

tensor([[[-0.1404,  1.7859, -0.0808, -0.3776,  0.6276],
         [-0.5920, -0.2097,  0.2644, -1.8041, -1.2006],
         [ 1.9811,  0.4113, -0.3142,  2.2199, -1.0522],
         [ 1.6184, -0.8286, -0.4455, -0.9556,  1.0425],
         [-0.7034,  0.1163,  0.1383, -1.1998, -0.2440],
         [ 0.2585, -0.1913,  0.3366,  0.4548,  0.4323],
         [-2.1225,  0.9055, -2.0818, -0.5666, -0.8725],
         [-0.2414,  0.2855, -0.0700,  0.1300,  0.5473],
         [-1.0568,  1.6392, -0.8053, -0.6102,  0.7528],
         [-0.1919, -1.0545, -1.8967,  0.5810,  0.5674]],

        [[ 0.3482,  2.1756,  2.0345,  1.1289,  0.8111],
         [-0.3351,  1.1704,  0.2937,  0.9796, -0.7959],
         [ 0.8224,  0.3938,  1.0226, -0.4507, -0.3953],
         [ 1.1159, -1.3554,  1.3403, -1.1574,  1.6949],
         [-0.2567,  0.4700,  0.4530, -1.0499,  2.1902],
         [-0.2344,  1.3081, -0.8250, -0.3584, -0.4154],
         [-1.0589, -0.9091, -0.2665,  1.1712,  0.9454],
         [-1.0306,  0.1125, -1.8777, -1.7650, 

In [98]:
data3d.shape

torch.Size([3, 10, 5])

In [99]:
data3d.transpose(0,1)  #  axis batch and row

tensor([[[-0.1404,  1.7859, -0.0808, -0.3776,  0.6276],
         [ 0.3482,  2.1756,  2.0345,  1.1289,  0.8111],
         [ 1.1382, -0.6247,  0.4739,  0.0366, -0.3726]],

        [[-0.5920, -0.2097,  0.2644, -1.8041, -1.2006],
         [-0.3351,  1.1704,  0.2937,  0.9796, -0.7959],
         [ 0.1050,  0.1121,  0.4793,  1.5431,  0.5562]],

        [[ 1.9811,  0.4113, -0.3142,  2.2199, -1.0522],
         [ 0.8224,  0.3938,  1.0226, -0.4507, -0.3953],
         [ 0.0473, -1.2149, -1.8568, -0.1641,  0.6274]],

        [[ 1.6184, -0.8286, -0.4455, -0.9556,  1.0425],
         [ 1.1159, -1.3554,  1.3403, -1.1574,  1.6949],
         [ 0.7754,  0.1308,  0.3213, -0.0047, -1.6980]],

        [[-0.7034,  0.1163,  0.1383, -1.1998, -0.2440],
         [-0.2567,  0.4700,  0.4530, -1.0499,  2.1902],
         [ 2.0351,  0.4026,  0.3492, -1.6231, -1.0142]],

        [[ 0.2585, -0.1913,  0.3366,  0.4548,  0.4323],
         [-0.2344,  1.3081, -0.8250, -0.3584, -0.4154],
         [-1.1630, -0.3773, -1.5198,  

In [100]:
data3d.transpose(0,1).shape

torch.Size([10, 3, 5])

In [101]:
data3d.transpose(1,2) # axis row and column

tensor([[[-0.1404, -0.5920,  1.9811,  1.6184, -0.7034,  0.2585, -2.1225,
          -0.2414, -1.0568, -0.1919],
         [ 1.7859, -0.2097,  0.4113, -0.8286,  0.1163, -0.1913,  0.9055,
           0.2855,  1.6392, -1.0545],
         [-0.0808,  0.2644, -0.3142, -0.4455,  0.1383,  0.3366, -2.0818,
          -0.0700, -0.8053, -1.8967],
         [-0.3776, -1.8041,  2.2199, -0.9556, -1.1998,  0.4548, -0.5666,
           0.1300, -0.6102,  0.5810],
         [ 0.6276, -1.2006, -1.0522,  1.0425, -0.2440,  0.4323, -0.8725,
           0.5473,  0.7528,  0.5674]],

        [[ 0.3482, -0.3351,  0.8224,  1.1159, -0.2567, -0.2344, -1.0589,
          -1.0306,  1.3953, -0.3560],
         [ 2.1756,  1.1704,  0.3938, -1.3554,  0.4700,  1.3081, -0.9091,
           0.1125, -0.9257,  0.2427],
         [ 2.0345,  0.2937,  1.0226,  1.3403,  0.4530, -0.8250, -0.2665,
          -1.8777,  0.7415,  0.3066],
         [ 1.1289,  0.9796, -0.4507, -1.1574, -1.0499, -0.3584,  1.1712,
          -1.7650,  0.9343, -0.4379],

In [102]:
data3d.transpose(1,2).shape

torch.Size([3, 5, 10])

In [107]:
op_data1 = torch.randn(3, 5)

In [108]:
op_data1

tensor([[-0.3187,  1.4922,  0.3375,  0.3446,  0.7711],
        [-0.3616,  0.0701,  0.6352,  0.0218,  0.9515],
        [-0.4573, -0.3381,  0.1886, -0.2862, -0.0316]])

In [109]:
op_data2 = torch.randn(5,3)

In [110]:
op_data2

tensor([[ 0.9052, -0.1994, -0.8003],
        [ 0.5386, -0.4892, -0.2091],
        [-0.8759, -0.5123, -0.9673],
        [-1.5221, -0.1855, -1.5102],
        [-0.3614,  0.0030, -1.6511]])

In [112]:
torch.mul(op_data1,op_data2.T) # elementwise multiplication

tensor([[-0.2885,  0.8037, -0.2956, -0.5246, -0.2787],
        [ 0.0721, -0.0343, -0.3254, -0.0040,  0.0029],
        [ 0.3659,  0.0707, -0.1824,  0.4322,  0.0521]])

In [113]:
torch.div(op_data1,op_data2.T) # elementwise division

tensor([[-3.5205e-01,  2.7703e+00, -3.8527e-01, -2.2642e-01, -2.1336e+00],
        [ 1.8134e+00, -1.4333e-01, -1.2399e+00, -1.1756e-01,  3.1407e+02],
        [ 5.7140e-01,  1.6175e+00, -1.9495e-01,  1.8951e-01,  1.9121e-02]])

In [114]:
op_data1 @ op_data2 # matrix multiplication 3x5 * 5x3 = 3x3

tensor([[-0.5836, -0.9009, -2.1770],
        [-1.2230, -0.2887, -1.9437],
        [-0.3142,  0.2130,  0.7386]])

Pytorch Modules

The PyTorch library modules are essential to create and train neural networks. The three main library modules are Autograd, Optim, and nn.

In [None]:
class Model (nn.Module) : 
    def __init__(self,x,y): 
        super(Model, self).__init__()
        self.linear = torch.nn.Linear(x, y)  
 
    def forward(self, x):
        y_pred = self.linear(x)
        return y_pred

Autograd

Autograd is a reverse automatic differentiation system. Conceptually, autograd records a graph recording all of the operations that created the data as you execute operations, giving you a directed acyclic graph whose leaves are the input tensors and roots are the output tensors. By tracing this graph from roots to leaves, you can automatically compute the gradients using the chain rule.

In [None]:
# Saved tensors x to compute gradient

x = torch.randn(5, requires_grad=True)
y = x.pow(2)

In [118]:
x

tensor([ 0.6967,  0.1186, -0.1475, -0.7892, -0.2692], requires_grad=True)

In [119]:
y # y = x^2

tensor([0.4855, 0.0141, 0.0218, 0.6228, 0.0725], grad_fn=<PowBackward0>)

In [None]:
# y.grad_fn is the gradient function associated with the computation of y. In this case, since y is computed as the 
# square of x, the gradient function is an instance of PowBackward.
# _saved_self is an attribute of the gradient function that stores the original input tensor x. 
# This is done to facilitate gradient computation during the backward pass.

The equal() method checks whether two tensors have the same shape, size, and values.

In [120]:
print(x.equal(y.grad_fn._saved_self))  # True
print(x is y.grad_fn._saved_self)  # True
# y.grad_fn._saved_self refers to the same Tensor object as x

True
True


In [None]:
# in PyTorch, when you compute a new tensor y as a function of an existing tensor x, the resulting tensor y stores 
# a reference to the original tensor x. This is done to facilitate gradient computation during the backward pass.
# In this specific case, since y is computed as the square of x, the resulting tensor y stores a reference to the 
# original tensor x. Therefore, x and y.grad_fn._saved_self refer to the same object in memory.

In [121]:
x = torch.randn(5, requires_grad=True)
y = x.exp()


In [122]:
x

tensor([0.9626, 0.5535, 2.1479, 0.1702, 1.0008], requires_grad=True)

In [123]:
y

tensor([2.6186, 1.7394, 8.5665, 1.1856, 2.7204], grad_fn=<ExpBackward0>)

In [None]:
# y.grad_fn is the gradient function associated with the computation of y. In this case, since y is computed as the exponential of x, the gradient function is an instance of ExpBackward.
# _saved_result is an attribute of the gradient function that stores the result of the forward pass, which is the tensor y itself.

The equal() method checks whether two tensors have the same shape, size, and values.

In [124]:
print(y.equal(y.grad_fn._saved_result))  # True
print(y is y.grad_fn._saved_result)  # False

True
False


In [None]:
# Although y and y.grad_fn._saved_result have the same values and shape, they are not the same object in memory. This is because PyTorch creates a new tensor object for the result of the forward pass, which is stored in y.grad_fn._saved_result.
# In other words, y and y.grad_fn._saved_result are two separate tensor objects that happen to have the same values and shape.
# Why does y.equal(y.grad_fn._saved_result) return True?
# Although y and y.grad_fn._saved_result are not the same object, they have the same values and shape. Therefore, the equal method returns True, indicating that the two tensors are equal in value, even if they are not the same object.

In [138]:
x = torch.ones(5, 5, requires_grad=True)

In [139]:
x

tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]], requires_grad=True)

In [140]:
y = (x + 3) * (x + 4) * 0.5

In [141]:
y

tensor([[10., 10., 10., 10., 10.],
        [10., 10., 10., 10., 10.],
        [10., 10., 10., 10., 10.],
        [10., 10., 10., 10., 10.],
        [10., 10., 10., 10., 10.]], grad_fn=<MulBackward0>)

In [148]:
y.sum()

tensor(105.)

In [130]:
# Define a train function to be used in different threads
def train_fn():
    x = torch.ones(5, 5, requires_grad=True)
    print('x',x)
    # forward
    y = (x + 3) * (x + 4) * 0.5
    print('y',y)
    # backward
    z = y.sum().backward()
    # potential optimizer update

    print(z)

    print('x.grad', x.grad)


In [None]:
# Creates a tensor x with shape (5, 5) and requires gradient.
# Performs a forward pass by computing y using a simple neural network formula.
# Performs a backward pass by computing the gradients of the loss with respect to the inputs using y.sum().backward().
# Optionally, an optimizer update can be performed to update the model parameters.

In [133]:
# User write their own threading code to drive the train_fn
threads = []
for _ in range(10):
    p = threading.Thread(target=train_fn, args=())
    p.start()
    threads.append(p)   

x x x x x x x x x x 

tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]], requires_grad=True)
tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]], requires_grad=True)
y y tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]], requires_grad=True)
y tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]], requires_grad=True)
y tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]], requires_grad=True)
y tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1.

In [None]:
# Creates an empty list threads to store the thread objects.
# Loops 10 times, creating a new thread each time that targets the train_fn function with no arguments.
# Starts each thread using the start() method.
# Appends each thread object to the threads list.
# Loops through the threads list, joining each thread using the join() method. This ensures that the main thread waits for all the child threads to finish before continuing.
# What's Happening Under the Hood
# When you run this code, the following happens:
# The train_fn function is executed in multiple threads, each with its own copy of the tensor x.
# Each thread performs the forward and backward passes independently, computing the gradients of the loss with respect to the inputs.
# The gradients computed by each thread are accumulated in the x.grad tensor.
# Once all threads have finished, the main thread can access the accumulated gradients in x.grad.
# Note that this code assumes that the train_fn function is thread-safe, meaning that it does not access any shared resources that could cause conflicts between threads.

In [134]:
threads

[<Thread(Thread-349 (train_fn), stopped 6044)>,
 <Thread(Thread-350 (train_fn), stopped 15656)>,
 <Thread(Thread-351 (train_fn), stopped 13272)>,
 <Thread(Thread-352 (train_fn), stopped 6688)>,
 <Thread(Thread-353 (train_fn), stopped 8640)>,
 <Thread(Thread-354 (train_fn), stopped 17680)>,
 <Thread(Thread-355 (train_fn), stopped 6856)>,
 <Thread(Thread-356 (train_fn), stopped 19548)>,
 <Thread(Thread-357 (train_fn), stopped 16756)>,
 <Thread(Thread-358 (train_fn), stopped 19572)>]

In [135]:
for p in threads:
    p.join()

Building Neural Network with PyTorch

In [143]:
# trainin input(X) and output(y)
import torch.nn as nn
X = torch.Tensor([[1], [2], [3],
                [4], [5], [6]])
y = torch.Tensor([[5], [10], [15],
                  [20], [25], [30]])

# output coresponding 1 is 5
# output coresponding 2 is 10
# output coresponding 3 is 15
# output coresponding 4 is 20
# output coresponding 5 is 25
# output coresponding 6 is 30


In [144]:
class Model(torch.nn.Module):
 
    # defining layer
    def __init__(self):
        super(Model, self).__init__()
        self.linear = torch.nn.Linear(1, 1) 
     
    # implementing forward pass
    def forward(self, x):
        y_pred = self.linear(x)
        return y_pred

In [145]:
model = Model()

In [146]:
lin_model = model.linear

In [147]:
# defining loss function and optimizer
loss_fn = torch.nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.01 )

In [152]:
for epoch in range(1000):
    
    # predicting y using initial weights
    y_pred = lin_model(X.requires_grad_())
    #X is the input data, and requires_grad_() sets the requires_grad attribute of X to True. 
    # This is necessary because the model's weights need to be updated based on the gradients computed during 
    # backpropagation.
 
    # loss calculation
    loss = loss_fn(y_pred, y)
    #loss_fn is the loss function used to evaluate the model's performance. In this case, it's likely 
    # the mean squared error (MSE) loss function.
 
    # calculating gradients
    loss.backward()
    #backward() is a PyTorch method that computes the gradients of the loss with respect to the model's parameters.
    # The gradients are stored in the grad attribute of each parameter.
 
    # updating weights
    optimizer.step()
    #optimizer is an instance of PyTorch's Optimizer, which updates the model's parameters based on the gradients 
    # computed during backpropagation.
    # step() updates the parameters using the gradients and the optimizer's hyperparameters (e.g., learning rate).
 
    optimizer.zero_grad()
    #zero_grad() sets the gradients of all parameters to zero.
    # This is necessary because PyTorch accumulates gradients by default. If we don't zero the gradients, 
    # they will be added to the gradients computed in the next iteration, leading to incorrect updates.

    if (epoch + 1) % 10 == 0:
        print('epoch: ', epoch+1, 'loss: ', loss.item())

epoch:  10 loss:  6.810786724090576
epoch:  20 loss:  6.3607869148254395
epoch:  30 loss:  5.910787105560303
epoch:  40 loss:  5.460788249969482
epoch:  50 loss:  5.010788440704346
epoch:  60 loss:  4.586870193481445
epoch:  70 loss:  4.232003211975098
epoch:  80 loss:  3.8901708126068115
epoch:  90 loss:  3.5519790649414062
epoch:  100 loss:  3.2141811847686768
epoch:  110 loss:  2.875676393508911
epoch:  120 loss:  2.536119222640991
epoch:  130 loss:  2.2020175457000732
epoch:  140 loss:  1.970568060874939
epoch:  150 loss:  1.7606498003005981
epoch:  160 loss:  1.5572971105575562
epoch:  170 loss:  1.355275273323059
epoch:  180 loss:  1.2316750288009644
epoch:  190 loss:  1.1496325731277466
epoch:  200 loss:  1.0751460790634155
epoch:  210 loss:  1.0027934312820435
epoch:  220 loss:  0.986588180065155
epoch:  230 loss:  0.9734523892402649
epoch:  240 loss:  0.9591490626335144
epoch:  250 loss:  0.9443218111991882
epoch:  260 loss:  0.9291975498199463
epoch:  270 loss:  0.91386002302

In [153]:
# testing on new data    
X = torch.Tensor([[7], [8]])
predicted = lin_model(X)
print(predicted)

tensor([[34.9980],
        [39.9977]], grad_fn=<AddmmBackward0>)
