In [1]:
import torch

In [2]:
torch.__version__

'2.5.1+cpu'

# Tensors

## 1. Creating Tensors

In [3]:
# scalar - tensor with zero dimension
tensor0 = torch.tensor(1)
tensor0

tensor(1)

In [4]:
tensor0.ndim

0

In [5]:
# vector - tensor with one dimension
tensor1 = torch.tensor([6, 8, 0, 1, 2])

In [6]:
# matrix - tensor with two dimensions
tensor2 = torch.tensor(([0, 1, 7], [4, 2, 4]))

In [7]:
# Dimension and shape of a tensor
print(f'Vector:\n {tensor1}\t No. of dimensions: {tensor1.ndim}\t Shape: {tensor1.shape}\n')
print(f'Matrix:\n {tensor2}\t No. of dimensions: {tensor2.ndim}\t Shape: {tensor2.size()}\n')

Vector:
 tensor([6, 8, 0, 1, 2])	 No. of dimensions: 1	 Shape: torch.Size([5])

Matrix:
 tensor([[0, 1, 7],
        [4, 2, 4]])	 No. of dimensions: 2	 Shape: torch.Size([2, 3])



In [8]:
# Alternate ways
size = (3, 4)
tensor4 = torch.empty(size)
tensor4

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [9]:
tensor5 = torch.rand(size)
tensor5

tensor([[0.2782, 0.2462, 0.3170, 0.7151],
        [0.9492, 0.7424, 0.8451, 0.1560],
        [0.5498, 0.9105, 0.4239, 0.2150]])

In [10]:
tensor6 = torch.zeros(size)
tensor6

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [11]:
tensor7 = torch.ones(size)
tensor7

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [12]:
# Check the datatype of a tensor
tensor4 = torch.rand(1,2)
print(tensor4)
tensor4.dtype

tensor([[0.9491, 0.1878]])


torch.float32

In [13]:
# Create a tensor with a specific datatype
tensor5 = torch.rand(1, 2, dtype = torch.float16)
print(tensor5)

tensor([[0.6948, 0.9688]], dtype=torch.float16)


In [14]:
# Changing the datatype of a tensor
tensor4.type(torch.double)

tensor([[0.9491, 0.1878]], dtype=torch.float64)

In [15]:
# Creating tensors from a numpy array
import numpy as np

example_array = np.array([[9, 3], [0, 4]])
tensor8 =torch.from_numpy(example_array)

tensor9 = torch.tensor(example_array)
print(example_array)
print(tensor8)
print(tensor9)

[[9 3]
 [0 4]]
tensor([[9, 3],
        [0, 4]], dtype=torch.int32)
tensor([[9, 3],
        [0, 4]], dtype=torch.int32)


In [16]:
example_array*= 3
print(example_array)
print(tensor8)
print(tensor9)

[[27  9]
 [ 0 12]]
tensor([[27,  9],
        [ 0, 12]], dtype=torch.int32)
tensor([[9, 3],
        [0, 4]], dtype=torch.int32)


In [17]:
# Crearing a tensor from another tensor

tensor10 = torch.ones_like(tensor8)
tensor10

tensor([[1, 1],
        [1, 1]], dtype=torch.int32)

In [18]:
# Device configuration

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

tensor11 = torch.ones(3, 7).to(device)

tensor11 = torch.zeros(3, 7, device = device)

## 2. Accessing elements in a tensor

In [19]:
tensor2

tensor([[0, 1, 7],
        [4, 2, 4]])

In [20]:
tensor2.dim()

2

In [21]:
tensor2.size()

torch.Size([2, 3])

In [22]:
tensor2[0]

tensor([0, 1, 7])

In [23]:
tensor2[1, 0]

tensor(4)

In [24]:
# Slicing
tensor2[:, 2] # this will guive us all the rows and only column 2

tensor([7, 4])

In [25]:
tensor2[0, :] # this will give us only row 0 along with all columns 

tensor([0, 1, 7])

## 3. Basic Tensor Operations

In [26]:
tensor12 = torch.ones(2, 3)
tensor13 = torch.rand(2, 3)

print(tensor12)
print(tensor13)

tensor([[1., 1., 1.],
        [1., 1., 1.]])
tensor([[0.8585, 0.3469, 0.9472],
        [0.2172, 0.0511, 0.9250]])


In [27]:
# Elementwise addition
tensor14 = tensor12 + tensor13
# torch.add(tensor12, tensor13)
print(tensor14)

# Elementwise subtraction
tensor15 = tensor12 - tensor13
# torch.sub(tensor12, tensor13)
print(tensor15)

# Elementwise multiplication
tensor16 = tensor12 * tensor13
# torch.mul(tensor12, tensor13)
print(tensor16)

# Elementwise division
tensor17 = tensor12 / tensor12
# torch.div(tensor12, tensor13)
print(tensor17)

tensor([[1.8585, 1.3469, 1.9472],
        [1.2172, 1.0511, 1.9250]])
tensor([[0.1415, 0.6531, 0.0528],
        [0.7828, 0.9489, 0.0750]])
tensor([[0.8585, 0.3469, 0.9472],
        [0.2172, 0.0511, 0.9250]])
tensor([[1., 1., 1.],
        [1., 1., 1.]])


## 4. Manipulating a Tensor

In [28]:
x = torch.randint(0, 3, (4, 5))
x

tensor([[0, 2, 2, 1, 2],
        [0, 2, 2, 0, 0],
        [2, 0, 0, 1, 1],
        [0, 1, 0, 1, 1]])

In [29]:
y = x.view(20)
z = x.view(-1, 10)

In [30]:
print(x.size(), y.size(), z.size())

torch.Size([4, 5]) torch.Size([20]) torch.Size([2, 10])


In [31]:
a = torch.arange(9)
a = a.reshape(3, 3)
a

tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])

In [32]:
b = torch.randint(0, 9, (3, 3)) ## torch.randint(low = 0, high, size)
b

tensor([[1, 8, 6],
        [2, 8, 6],
        [3, 8, 8]])

In [33]:
c = torch.cat((a, b), dim = 1)
c

tensor([[0, 1, 2, 1, 8, 6],
        [3, 4, 5, 2, 8, 6],
        [6, 7, 8, 3, 8, 8]])

In [34]:
d = torch.cat((a, b), dim = 0)
d

tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8],
        [1, 8, 6],
        [2, 8, 6],
        [3, 8, 8]])

In [35]:
a

tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])

In [36]:
p = torch.randint(0, 9, (2, 3, 5))
p

tensor([[[0, 7, 3, 6, 6],
         [8, 6, 5, 5, 6],
         [3, 2, 1, 1, 2]],

        [[6, 2, 7, 4, 3],
         [8, 1, 5, 7, 4],
         [3, 3, 0, 5, 6]]])

In [37]:
p.sum()

tensor(125)

In [38]:
p.sum(dim = 0)

tensor([[ 6,  9, 10, 10,  9],
        [16,  7, 10, 12, 10],
        [ 6,  5,  1,  6,  8]])

In [39]:
p.sum(dim= 1)

tensor([[11, 15,  9, 12, 14],
        [17,  6, 12, 16, 13]])

In [40]:
p.sum(dim = 1).shape

torch.Size([2, 5])

# Autograd

In [41]:
import torch 

x = torch.tensor(2.0)
x.requires_grad, x.is_leaf

(False, True)

In [42]:
y = 3 * torch.sigmoid(x) + 5
y.requires_grad, y.is_leaf

(False, True)

In [43]:
import torch

x = torch.tensor(2.0, requires_grad = True)

x.requires_grad, x.is_leaf

(True, True)

In [44]:
y = 3 * torch.sigmoid(x) + 5
y

tensor(7.6424, grad_fn=<AddBackward0>)

In [45]:
y.requires_grad, y.is_leaf

(True, False)

In [46]:
print(x.grad_fn)

None


In [47]:
print(y.grad_fn)

<AddBackward0 object at 0x0000021219718640>


In [48]:
print(x.grad)
y.backward()
print(x.grad) #dy/dx

None
tensor(0.3150)


In [49]:
# x.grad.zero_()
y = 3 * torch.sigmoid(x) + 5
y.backward()
x.grad

tensor(0.6300)

In [50]:
a = torch.rand(2, 5, requires_grad = True)
a

tensor([[0.4074, 0.2376, 0.2749, 0.9183, 0.6811],
        [0.5607, 0.9501, 0.0027, 0.1777, 0.1407]], requires_grad=True)

In [51]:
b = a * a + a + 5
b

tensor([[5.5735, 5.2941, 5.3504, 6.7617, 6.1449],
        [5.8751, 6.8528, 5.0027, 5.2092, 5.1605]], grad_fn=<AddBackward0>)

In [52]:
c = b.mean()
c

tensor(5.7225, grad_fn=<MeanBackward0>)

In [53]:
a.is_leaf, b.is_leaf, c.is_leaf

(True, False, False)

In [54]:
b.retain_grad()

In [55]:
print(a.grad) # Before gradient computation
c.backward()
print(a.grad) # After gradient computation dc/da

None
tensor([[0.1815, 0.1475, 0.1550, 0.2837, 0.2362],
        [0.2121, 0.2900, 0.1005, 0.1355, 0.1281]])


In [56]:
b.grad

tensor([[0.1000, 0.1000, 0.1000, 0.1000, 0.1000],
        [0.1000, 0.1000, 0.1000, 0.1000, 0.1000]])

# Gradient Descent

In [57]:
# Generate train data # y = 5 * x + 3
x = torch.linspace(0.0, 1.0, 15).reshape(15, 1)
w = torch.tensor([5])
b = torch.tensor([3])
y = w * x + b


In [58]:
# Parameter Initialization
w = torch.randn(size = (1, 1), requires_grad = True)
b = torch.randn(size = (1, 1), requires_grad = True)

def forward(x):
    return w * x + b

def loss(y, y_pred):
    return ((y_pred - y) ** 2).mean()

print('w: ', w)
print('b: ', b)

w:  tensor([[-0.0503]], requires_grad=True)
b:  tensor([[0.8294]], requires_grad=True)


In [59]:
# Define hyper-parameters
learning_rate = 0.03
num_epochs = 180

#Train the model
for epoch in range(num_epochs):
    y_pred = forward(x)
    
    l = loss(y, y_pred)
    l.backward()
    
    with torch.no_grad():
        w -= learning_rate * w.grad
        b -= learning_rate * b.grad
        
    w.grad.zero_()
    b.grad.zero_()
    
    if (epoch + 1) % 10 == 0:
        print(f'epoch {epoch + 1}: w = {w.item() :.3f}, b = {b.item():.3f}, loss = {l.item():.3f}')

epoch 10: w = 1.204, b = 2.834, loss = 6.471
epoch 20: w = 1.838, b = 3.705, loss = 1.896
epoch 30: w = 2.189, b = 4.065, loss = 0.909
epoch 40: w = 2.409, b = 4.195, loss = 0.662
epoch 50: w = 2.566, b = 4.222, loss = 0.571
epoch 60: w = 2.693, b = 4.205, loss = 0.514
epoch 70: w = 2.804, b = 4.168, loss = 0.469
epoch 80: w = 2.905, b = 4.124, loss = 0.428
epoch 90: w = 2.999, b = 4.078, loss = 0.391
epoch 100: w = 3.088, b = 4.032, loss = 0.357
epoch 110: w = 3.172, b = 3.987, loss = 0.326
epoch 120: w = 3.253, b = 3.944, loss = 0.298
epoch 130: w = 3.330, b = 3.902, loss = 0.273
epoch 140: w = 3.404, b = 3.863, loss = 0.249
epoch 150: w = 3.474, b = 3.825, loss = 0.228
epoch 160: w = 3.542, b = 3.788, loss = 0.208
epoch 170: w = 3.606, b = 3.754, loss = 0.190
epoch 180: w = 3.667, b = 3.720, loss = 0.174


# Neural Networks

In [60]:
import torch
import torch.nn as nn
# import torch.nn.functional as F
import torch.optim as optim

from torchvision import datasets, transforms

In [61]:
print(dir(datasets))

['CIFAR10', 'CIFAR100', 'CLEVRClassification', 'CREStereo', 'Caltech101', 'Caltech256', 'CarlaStereo', 'CelebA', 'Cityscapes', 'CocoCaptions', 'CocoDetection', 'Country211', 'DTD', 'DatasetFolder', 'EMNIST', 'ETH3DStereo', 'EuroSAT', 'FER2013', 'FGVCAircraft', 'FakeData', 'FallingThingsStereo', 'FashionMNIST', 'Flickr30k', 'Flickr8k', 'Flowers102', 'FlyingChairs', 'FlyingThings3D', 'Food101', 'GTSRB', 'HD1K', 'HMDB51', 'INaturalist', 'ImageFolder', 'ImageNet', 'Imagenette', 'InStereo2k', 'KMNIST', 'Kinetics', 'Kitti', 'Kitti2012Stereo', 'Kitti2015Stereo', 'KittiFlow', 'LFWPairs', 'LFWPeople', 'LSUN', 'LSUNClass', 'MNIST', 'Middlebury2014Stereo', 'MovingMNIST', 'Omniglot', 'OxfordIIITPet', 'PCAM', 'PhotoTour', 'Places365', 'QMNIST', 'RenderedSST2', 'SBDataset', 'SBU', 'SEMEION', 'STL10', 'SUN397', 'SVHN', 'SceneFlowStereo', 'Sintel', 'SintelStereo', 'StanfordCars', 'UCF101', 'USPS', 'VOCDetection', 'VOCSegmentation', 'VisionDataset', 'WIDERFace', '__all__', '__builtins__', '__cached__',

In [62]:
# Hyper-parameters
hidden_size = 400
num_epochs = 8
batch_size = 32
learning_rate = 0.0001

In [63]:
# Load the MNIST dataset
train_dataset = datasets.MNIST(root = './data', train = True, download = True, transform = transforms.ToTensor())
test_dataset = datasets.MNIST(root = './data',train = False, download = True, transform = transforms.ToTensor())

In [64]:
# Training Data
print(train_dataset.classes)
print(train_dataset.data.shape)
print(train_dataset.targets.shape)

['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four', '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine']
torch.Size([60000, 28, 28])
torch.Size([60000])


In [65]:
# Test Data
print(test_dataset.classes)
print(test_dataset.data.shape)
print(test_dataset.targets.shape)

['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four', '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine']
torch.Size([10000, 28, 28])
torch.Size([10000])


In [66]:
in_features = 784 # Input size = 28 * 28
out_features = 10 # no. of classes

In [67]:
train_dataloader = torch.utils.data.DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)
test_dataloader = torch.utils.data.DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle = False)

In [68]:
import matplotlib.pyplot as plt

data = iter(train_dataloader)
imgs, labels = next(data)
print(imgs.shape)
print(labels.shape)

# for i in range(5):
#     plt.subplot(1, 5, i + 1)
#     plt.imshow(imgs[i][0], cmap = 'gray')
#     plt.xlabel(f'Label = {labels[i].item()}')
# plt.show()

torch.Size([32, 1, 28, 28])
torch.Size([32])


In [70]:
class BasicNeuralNet(nn.Module):
    def __init__(self, hidden_size):
        super(BasicNeuralNet, self).__init__()
        self.hidden_size = hidden_size
        self.layer1 = nn.Linear(in_features, self.hidden_size)
        self.layer2= nn.Linear(self.hidden_size, out_features)
        
    def forward(self, x):
        out = self.layer1(x)
        out = torch.relu(out)
        out= self.layer2(out)
        return out
    
model = BasicNeuralNet(hidden_size).to(device)

In [73]:
w1, b1, w2, b2 = list(model.parameters())

In [74]:
# First Linear Layer
print(w1, b1)
print(w1.shape)
print(b1.shape)

Parameter containing:
tensor([[ 0.0191, -0.0104,  0.0085,  ..., -0.0356, -0.0333, -0.0328],
        [-0.0022, -0.0173,  0.0166,  ..., -0.0025, -0.0306, -0.0029],
        [ 0.0140, -0.0006, -0.0231,  ..., -0.0154, -0.0312, -0.0223],
        ...,
        [ 0.0297, -0.0067,  0.0258,  ...,  0.0097, -0.0066,  0.0111],
        [-0.0091,  0.0330,  0.0032,  ...,  0.0093, -0.0127,  0.0266],
        [ 0.0004,  0.0122, -0.0206,  ...,  0.0176, -0.0087, -0.0266]],
       requires_grad=True) Parameter containing:
tensor([ 2.4458e-02,  8.4473e-03, -1.9365e-02, -9.8366e-03,  3.4209e-02,
         3.5538e-02, -4.2835e-03,  3.3435e-03,  2.2688e-02, -2.4025e-02,
        -3.0502e-02,  7.6866e-03,  1.4917e-02, -1.4436e-02, -2.7291e-02,
         1.5776e-02,  3.3038e-02, -4.6843e-03, -1.9234e-03,  1.9936e-02,
         4.3431e-03, -2.1976e-02, -2.7865e-02,  3.1759e-02,  3.4590e-02,
         2.2292e-02, -7.5622e-03, -1.2102e-02, -9.9585e-03, -2.4771e-02,
         3.3942e-03,  1.7069e-02,  9.9901e-03, -3.4927e-0

In [75]:
# Second Linear layer
print(w2, b2)
print(w2.shape)
print(b2.shape)

Parameter containing:
tensor([[-0.0250, -0.0186,  0.0436,  ...,  0.0292,  0.0097, -0.0055],
        [-0.0106, -0.0273, -0.0027,  ..., -0.0334, -0.0217, -0.0299],
        [-0.0333,  0.0100,  0.0101,  ..., -0.0272, -0.0158, -0.0210],
        ...,
        [ 0.0382,  0.0094, -0.0367,  ..., -0.0483,  0.0055, -0.0150],
        [ 0.0277, -0.0151,  0.0094,  ..., -0.0213, -0.0403, -0.0445],
        [ 0.0415, -0.0298,  0.0416,  ...,  0.0410, -0.0203, -0.0103]],
       requires_grad=True) Parameter containing:
tensor([ 0.0165, -0.0140,  0.0107,  0.0414, -0.0411, -0.0084,  0.0361, -0.0178,
        -0.0399,  0.0463], requires_grad=True)
torch.Size([10, 400])
torch.Size([10])


In [76]:
criterion = nn.CrossEntropyLoss() # Loss
optimizer = optim.Adam(model.parameters(), lr = learning_rate)

In [79]:
# Training
total_steps = len(train_dataloader)

for epoch in range(num_epochs):
    for i, (images, lables) in enumerate(train_dataloader):
        images = images.reshape(-1, 28 * 28).to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        
        loss = criterion(outputs, labels)
        
        # Backpropagation
        loss.backward()
        
        optimizer.step() # Parameter update
        optimizer.zero_grad()
        
        if i % 300 == 0:
            print(f'Epoch {epoch}, Step {i}/{total_steps}, Loss: {loss.item():.3f}')

Epoch 0, Step 0/1875, Loss: 2.295
Epoch 0, Step 300/1875, Loss: 2.256
Epoch 0, Step 600/1875, Loss: 2.254
Epoch 0, Step 900/1875, Loss: 2.249
Epoch 0, Step 1200/1875, Loss: 2.249
Epoch 0, Step 1500/1875, Loss: 2.301
Epoch 0, Step 1800/1875, Loss: 2.243
Epoch 1, Step 0/1875, Loss: 2.251
Epoch 1, Step 300/1875, Loss: 2.271
Epoch 1, Step 600/1875, Loss: 2.259
Epoch 1, Step 900/1875, Loss: 2.266
Epoch 1, Step 1200/1875, Loss: 2.273
Epoch 1, Step 1500/1875, Loss: 2.258
Epoch 1, Step 1800/1875, Loss: 2.256
Epoch 2, Step 0/1875, Loss: 2.251
Epoch 2, Step 300/1875, Loss: 2.277
Epoch 2, Step 600/1875, Loss: 2.268
Epoch 2, Step 900/1875, Loss: 2.252
Epoch 2, Step 1200/1875, Loss: 2.250
Epoch 2, Step 1500/1875, Loss: 2.273
Epoch 2, Step 1800/1875, Loss: 2.254
Epoch 3, Step 0/1875, Loss: 2.280
Epoch 3, Step 300/1875, Loss: 2.260
Epoch 3, Step 600/1875, Loss: 2.248
Epoch 3, Step 900/1875, Loss: 2.259
Epoch 3, Step 1200/1875, Loss: 2.249
Epoch 3, Step 1500/1875, Loss: 2.265
Epoch 3, Step 1800/1875, 

In [80]:
# test the model

with torch.no_grad():
    correct = 0
    num_samples = len(test_dataloader.dataset)
    
    for imgs, labels in test_dataloader:
        imgs = imgs.reshape(-1, 28 * 28).to(device)
        labels = labels.to(device)
        
        outputs = model(imgs)
        
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        
    acc = correct / num_samples
    print(f'Accuracy: {100 * acc} %')

Accuracy: 10.280000000000001 %
