In [1]:
import torch
import torch.nn as nn
import torchvision
from torchvision.transforms import transforms

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [3]:
## hyperparameter

input_size = 784   ## because the size of images are ( 28 * 28 ) .. if we do linear layer then it will become (28 x 28 = 784 )
hidden_size = 500
num_classes = 10
num_epochs = 10
batch_size = 100
learning_rate = 0.001

In [4]:
training_dataset = torchvision.datasets.MNIST(root='./mnist_folder', 
                                           train=True, 
                                           transform= transforms.ToTensor(), 
                                           download=True)

testing_dataset = torchvision.datasets.MNIST(root='./mnist_folder', 
                                          train=False, 
                                          transform= transforms.ToTensor(), 
                                          download=False)

In [5]:
train_loader = torch.utils.data.DataLoader(dataset= training_dataset,
                                           batch_size= batch_size,
                                           shuffle= True)

test_loader = torch.utils.data.DataLoader(dataset= testing_dataset,
                                           batch_size= batch_size,
                                           shuffle= False)

In [2]:
# import matplotlib.pyplot as plt

# examples = iter(test_loader)
# example_data, example_targets = next(examples)

# for i in range(6):
#     plt.subplot(2,3,i+1)
#     plt.imshow(example_data[i][0], cmap='gray')
# plt.show()

In [7]:
## Fully connected neural network with one hidden layer


class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_class):
        super(NeuralNet, self).__init__()
        self.input_size = input_size
        self.l1 = nn.Linear(in_features = input_size, out_features = hidden_size)       ## linear layer take what is the input data and
                                                                                        ## what will be the out put 
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(in_features = hidden_size, out_features = num_class)
    
    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
            ## no activation and no softmax at the end
        return out

model = NeuralNet(input_size= input_size, hidden_size= hidden_size, num_class= num_classes).to(device)

In [8]:
## Loss and optimizer

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params= model.parameters(), lr= learning_rate)

In [9]:
## train the model
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
    for i, (image, label) in enumerate(train_loader):
        ## original image size = [100, 1, 28, 28] because of ( 100 => total images , 1 => one color channel , ( 28*28 ) => pixel size)
        ## you have resize the image as per the model input
        ## [100, 784] because ( total 100 images are there and 784 is the pixel value after flatten )

        image = image.reshape(-1, 28*28).to(device)
        label = label.to(device)

        ## forward pass
        output = model(image)
        loss = criterion(output, label)

        ## backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

Epoch [1/10], Step [100/600], Loss: 0.3090
Epoch [1/10], Step [200/600], Loss: 0.4456
Epoch [1/10], Step [300/600], Loss: 0.2881
Epoch [1/10], Step [400/600], Loss: 0.3643
Epoch [1/10], Step [500/600], Loss: 0.0881
Epoch [1/10], Step [600/600], Loss: 0.1473
Epoch [2/10], Step [100/600], Loss: 0.1235
Epoch [2/10], Step [200/600], Loss: 0.1690
Epoch [2/10], Step [300/600], Loss: 0.1937
Epoch [2/10], Step [400/600], Loss: 0.1187
Epoch [2/10], Step [500/600], Loss: 0.0959
Epoch [2/10], Step [600/600], Loss: 0.0360
Epoch [3/10], Step [100/600], Loss: 0.0384
Epoch [3/10], Step [200/600], Loss: 0.0976
Epoch [3/10], Step [300/600], Loss: 0.0852
Epoch [3/10], Step [400/600], Loss: 0.0122
Epoch [3/10], Step [500/600], Loss: 0.0711
Epoch [3/10], Step [600/600], Loss: 0.0512
Epoch [4/10], Step [100/600], Loss: 0.0367
Epoch [4/10], Step [200/600], Loss: 0.0811
Epoch [4/10], Step [300/600], Loss: 0.0540
Epoch [4/10], Step [400/600], Loss: 0.0132
Epoch [4/10], Step [500/600], Loss: 0.0716
Epoch [4/10

In [10]:
## test the model

with torch.no_grad():
    n_correct = 0
    n_sample = 0
    for image, label in test_loader:
        image = image.reshape(-1, 28*28).to(device)
        label = label.to(device)
        output = model(image)

        ## prediction
        _ , predicted = torch.max(output.data, 1)      ### max returns (value ,index)
        n_sample += label.size(0)
        n_correct += (predicted == label).sum().item()

    acc = 100.0 * n_correct / n_sample
    print(f'Accuracy of the network on the 10000 test images: {acc} %')


Accuracy of the network on the 10000 test images: 98.08 %


## Activation Function

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F


## 
torch.relu()
torch.sigmoid()
torch.softmax()
torch.tanh()


nn.ReLU()
nn.Softmax()
nn.Sigmoid()
nn.Tanh()
nn.LeakyReLU()


F.tanh()
F.softmax()
F.sigmoid()
F.relu()
F.leaky_relu()