#Import Libraries

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

#Define Neural Network

In [None]:
class DeepNN(nn.Module):
  def __init__(self):
        super(DeepNN, self).__init__()
        self.fc1=nn.Linear(784,256)
        self.fc2=nn.Linear(256,128)
        self.fc3=nn.Linear(128,64)
        self.fc4=nn.Linear(64,32)
        self.fc5=nn.Linear(32,10)
  def forward(self,x):
        out=torch.sigmoid(self.fc1(x))
        out=torch.sigmoid(self.fc2(out))
        out=torch.sigmoid(self.fc3(out))
        out=torch.sigmoid(self.fc4(out))
        out=torch.sigmoid(self.fc5(out))
        return out



In [None]:
#Define Hyperparameters and generate dummy data

input_size = 784
output_size = 10
batch_size = 64
x = torch.randn(batch_size, input_size)
y = torch.randn(batch_size, output_size)


In [None]:

deep_NN = DeepNN()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(deep_NN.parameters(), lr=0.01)

# Training loop
num_epochs = 1000
for epoch in range(num_epochs):
    # Forward pass
    outputs = deep_NN(x)
    if outputs is not None:
        loss = criterion(outputs, torch.argmax(y, dim=1))

        # Backward pass
        optimizer.zero_grad()
        loss.backward()

        # Print gradients at every 100 epochs
        if (epoch + 1) % 100 == 0:
            print('Epoch [{}/{}], Loss: {:.3f}'.format(epoch + 1, num_epochs, loss.item()))
            

        optimizer.step()


Epoch [100/1000], Loss: 2.306
Epoch [200/1000], Loss: 2.286
Epoch [300/1000], Loss: 2.269
Epoch [400/1000], Loss: 2.255
Epoch [500/1000], Loss: 2.244
Epoch [600/1000], Loss: 2.236
Epoch [700/1000], Loss: 2.229
Epoch [800/1000], Loss: 2.223
Epoch [900/1000], Loss: 2.218
Epoch [1000/1000], Loss: 2.214


As way to find solution for vanishing gradient there are different approaches one of which is Weight Initialization: Initialize the weights of the neural network with appropriate values to mitigate the vanishing gradient problem. One common technique is to use weight initialization methods such as Xavier or He initialization.

In [None]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        nn.init.zeros_(m.bias)


In [None]:
deep_NN.apply(init_weights)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(deep_NN.parameters(), lr=0.01)

# Training loop
num_epochs = 1000
for epoch in range(num_epochs):
    # Forward pass
    outputs = deep_NN(x)
    if outputs is not None:
        loss = criterion(outputs, torch.argmax(y, dim=1))

        # Backward pass
        optimizer.zero_grad()
        loss.backward()

        # Print gradients at every 100 epochs
        if (epoch + 1) % 100 == 0:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch + 1, num_epochs, loss.item()))
            

        optimizer.step()


Epoch [100/1000], Loss: 2.2810
Epoch [200/1000], Loss: 2.2626
Epoch [300/1000], Loss: 2.2488
Epoch [400/1000], Loss: 2.2382
Epoch [500/1000], Loss: 2.2298
Epoch [600/1000], Loss: 2.2230
Epoch [700/1000], Loss: 2.2173
Epoch [800/1000], Loss: 2.2125
Epoch [900/1000], Loss: 2.2084
Epoch [1000/1000], Loss: 2.2049


Another Methdology is Batch Normalization were we normalize the input before each layer 

In [None]:
class DeepNN(nn.Module):
  def __init__(self):
        super(DeepNN, self).__init__()
        self.fc1=nn.Linear(784,256)
        self.bn1=nn.BatchNorm1d(256)
        self.fc2=nn.Linear(256,128)
        self.bn2=nn.BatchNorm1d(128)
        self.fc3=nn.Linear(128,64)
        self.bn3=nn.BatchNorm1d(64)
        self.fc4=nn.Linear(64,32)
        self.bn4=nn.BatchNorm1d(32)
        self.fc5=nn.Linear(32,10)
  def forward(self,x):
        out=torch.sigmoid(self.fc1(x))
        out=self.bn1(out)
        out=torch.sigmoid(self.fc2(out))
        out=self.bn2(out)
        out=torch.sigmoid(self.fc3(out))
        out=self.bn3(out)
        out=torch.sigmoid(self.fc4(out))
        out=self.bn4(out)
        out=torch.sigmoid(self.fc5(out))
        return out



In [None]:

deep_NN = DeepNN()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(deep_NN.parameters(), lr=0.01)

# Training loop
num_epochs = 1000
for epoch in range(num_epochs):
    # Forward pass
    outputs = deep_NN(x)
    if outputs is not None:
        loss = criterion(outputs, torch.argmax(y, dim=1))

        # Backward pass
        optimizer.zero_grad()
        loss.backward()

        # Print gradients at every 100 epochs
        if (epoch + 1) % 100 == 0:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch + 1, num_epochs, loss.item()))
            

        optimizer.step()


Epoch [100/1000], Loss: 1.8967
Epoch [200/1000], Loss: 1.8420
Epoch [300/1000], Loss: 1.8162
Epoch [400/1000], Loss: 1.7984
Epoch [500/1000], Loss: 1.7842
Epoch [600/1000], Loss: 1.7720
Epoch [700/1000], Loss: 1.7611
Epoch [800/1000], Loss: 1.7511
Epoch [900/1000], Loss: 1.7418
Epoch [1000/1000], Loss: 1.7329


using RELU as our activation function rather than sigmoid to avoid the saturation we encounter leading to vanishing gradient.

In [None]:
class DeepNN(nn.Module):
  def __init__(self):
        super(DeepNN, self).__init__()
        self.fc1=nn.Linear(784,256)
        self.fc2=nn.Linear(256,128)
        self.fc3=nn.Linear(128,64)
        self.fc4=nn.Linear(64,32)
        self.fc5=nn.Linear(32,10)
  def forward(self,x):
        out=torch.relu(self.fc1(x))
        out=torch.relu(self.fc2(out))
        out=torch.relu(self.fc3(out))
        out=torch.relu(self.fc4(out))
        out=torch.relu(self.fc5(out))
        return out



In [None]:

deep_NN = DeepNN()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(deep_NN.parameters(), lr=0.01)

# Training loop
num_epochs = 1000
for epoch in range(num_epochs):
    # Forward pass
    outputs = deep_NN(x)
    if outputs is not None:
        loss = criterion(outputs, torch.argmax(y, dim=1))

        # Backward pass
        optimizer.zero_grad()
        loss.backward()

        # Print gradients at every 100 epochs
        if (epoch + 1) % 100 == 0:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch + 1, num_epochs, loss.item()))
            

        optimizer.step()


Epoch [100/1000], Loss: 2.2460
Epoch [200/1000], Loss: 2.1930
Epoch [300/1000], Loss: 2.1227
Epoch [400/1000], Loss: 1.9790
Epoch [500/1000], Loss: 1.7709
Epoch [600/1000], Loss: 1.6776
Epoch [700/1000], Loss: 1.6253
Epoch [800/1000], Loss: 1.5610
Epoch [900/1000], Loss: 1.4665
Epoch [1000/1000], Loss: 1.3859


#Exploding Gradient 

In [None]:
class DeepNN(nn.Module):
  def __init__(self):
        super(DeepNN, self).__init__()
        self.fc1=nn.Linear(784,256)
        self.fc2=nn.Linear(256,512)
        self.fc3=nn.Linear(512,64)
        self.fc4=nn.Linear(64,32)
        self.fc5=nn.Linear(32,10)

# Initialize large weights for each layer 
        nn.init.normal_(self.fc1.weight, mean=0, std=10)
        nn.init.normal_(self.fc2.weight, mean=0, std=10)
        nn.init.normal_(self.fc3.weight, mean=0, std=10)
        nn.init.normal_(self.fc4.weight, mean=0, std=10)
        nn.init.normal_(self.fc5.weight, mean=0, std=10)

  def forward(self,x):
        out=torch.relu(self.fc1(x))
        out=torch.relu(self.fc2(out))
        out=torch.relu(self.fc3(out))
        out=torch.relu(self.fc4(out))
        out=torch.relu(self.fc5(out))
        return out



In [None]:

deep_NN = DeepNN()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(deep_NN.parameters(), lr=0.01)

# Training loop
num_epochs = 1000
for epoch in range(num_epochs):
    # Forward pass
    outputs = deep_NN(x)
    if outputs is not None:
        loss = criterion(outputs, torch.argmax(y, dim=1))

        # Backward pass
        optimizer.zero_grad()
        loss.backward()

        # Print gradients at every 100 epochs
        if (epoch + 1) % 100 == 0:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch + 1, num_epochs, loss.item()))
            

        optimizer.step()


Epoch [100/1000], Loss: nan
Epoch [200/1000], Loss: nan
Epoch [300/1000], Loss: nan
Epoch [400/1000], Loss: nan
Epoch [500/1000], Loss: nan
Epoch [600/1000], Loss: nan
Epoch [700/1000], Loss: nan
Epoch [800/1000], Loss: nan
Epoch [900/1000], Loss: nan
Epoch [1000/1000], Loss: nan


In [None]:
class DeepNN(nn.Module):
  def __init__(self):
        super(DeepNN, self).__init__()
        self.fc1=nn.Linear(784,256)
        self.fc2=nn.Linear(256,512)
        self.fc3=nn.Linear(512,64)
        self.fc4=nn.Linear(64,32)
        self.fc5=nn.Linear(32,10)


  def forward(self,x):
        out=torch.sigmoid(self.fc1(x))
        out=torch.sigmoid(self.fc2(out))
        out=torch.sigmoid(self.fc3(out))
        out=torch.sigmoid(self.fc4(out))
        out=torch.sigmoid(self.fc5(out))
        return out

In [None]:
deep=DeepNN()
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(deep.parameters(), lr=0.01)

# Define the gradient clip value
clip_value = 1

# Training loop
num_epochs = 1000
for epoch in range(num_epochs):
    # Forward pass
    outputs = deep(x)
    if outputs is not None:
        # Compute the loss
        loss = criterion(outputs, torch.argmax(y, dim=1))

        # Backward pass and gradient clipping
        optimizer.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(deep.parameters(), clip_value) # Apply gradient clipping
        optimizer.step()

        # Print the loss
        if (epoch + 1) % 100 == 0:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch + 1, num_epochs, loss.item()))

Epoch [100/1000], Loss: 2.2872
Epoch [200/1000], Loss: 2.2693
Epoch [300/1000], Loss: 2.2552
Epoch [400/1000], Loss: 2.2442
Epoch [500/1000], Loss: 2.2356
Epoch [600/1000], Loss: 2.2286
Epoch [700/1000], Loss: 2.2229
Epoch [800/1000], Loss: 2.2181
Epoch [900/1000], Loss: 2.2140
Epoch [1000/1000], Loss: 2.2105


#RESNET(Residual Network)



#Define a Plain CNN

#IMPORT LIBARARIES

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import FashionMNIST
from torchvision.transforms import transforms
from torch.utils.data import DataLoader

#Define Simple CNN



- Number of Convolutional Layers: 4
- Number of Convolutional Filters: 64, 128, 256, 512
- Filter/Kernel Size: 7x7, 3x3, 3x3, 3x3
- Padding: Same padding (pad with zeros to maintain input size) for all convolutional layers
- Stride: 2 for the first convolutional layer, 1 for the rest
- Activation Function: Sigmoid for all convolutional layers
- Number of Fully Connected Layers: 1
- Number of Hidden Units in Fully Connected Layer: 512
- Output Layer: Fully Connected Layer with number of units equal to the number of classes in the dataset (e.g., 10 for FashionMNIST)
- Activation Function in Output Layer: None


In [None]:
# Define the PlainNet model
class PlainNet(nn.Module):
    def __init__(self, image_channels, num_classes):
        super(PlainNet, self).__init__()
        self.conv1 = nn.Conv2d(image_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1, bias=False)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1, bias=False)
        self.conv4 = nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1, bias=False)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = torch.sigmoid(x)
        x = nn.functional.max_pool2d(x, kernel_size=3, stride=2, padding=1)

        x = self.conv2(x)
        x = torch.sigmoid(x)
        x = nn.functional.max_pool2d(x, kernel_size=3, stride=2, padding=1)

        x = self.conv3(x)
        x = torch.sigmoid(x)
        x = nn.functional.max_pool2d(x, kernel_size=3, stride=2, padding=1)

        x = self.conv4(x)
        x = torch.sigmoid(x)
        x = nn.functional.max_pool2d(x, kernel_size=3, stride=2, padding=1)

        x = self.avgpool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)

        return x

#LOAD FashionMNIST dataset

In [None]:
# Define the FashionMNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
train_dataset = FashionMNIST(root='.', train=True, transform=transform, download=True)
test_dataset = FashionMNIST(root='.', train=False, transform=transform, download=True)

# Create data loaders
batch_size = 64
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:02<00:00, 12075941.37it/s]


Extracting ./FashionMNIST/raw/train-images-idx3-ubyte.gz to ./FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 199810.00it/s]


Extracting ./FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:01<00:00, 3704038.64it/s]


Extracting ./FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 11844364.78it/s]

Extracting ./FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./FashionMNIST/raw






#Train the model

In [None]:
# PlainNet model
image_channels = 1 #Gray scale
num_classes = 10
model = PlainNet(image_channels, num_classes)

# loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass
        loss.backward()

        # Update weights
        optimizer.step()

        # Print progress
        if (i + 1) % 100 == 0:
            print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {loss.item()}')


Epoch [1/10], Step [100/938], Loss: 2.300379991531372
Epoch [1/10], Step [200/938], Loss: 2.3279833793640137
Epoch [1/10], Step [300/938], Loss: 2.3913798332214355
Epoch [1/10], Step [400/938], Loss: 2.330376625061035
Epoch [1/10], Step [500/938], Loss: 2.3539109230041504
Epoch [1/10], Step [600/938], Loss: 2.3022844791412354
Epoch [1/10], Step [700/938], Loss: 2.2668755054473877
Epoch [1/10], Step [800/938], Loss: 2.288836717605591
Epoch [1/10], Step [900/938], Loss: 2.3341193199157715
Epoch [2/10], Step [100/938], Loss: 2.328723430633545
Epoch [2/10], Step [200/938], Loss: 2.3523526191711426
Epoch [2/10], Step [300/938], Loss: 2.3069543838500977
Epoch [2/10], Step [400/938], Loss: 2.280301809310913
Epoch [2/10], Step [500/938], Loss: 2.319952964782715
Epoch [2/10], Step [600/938], Loss: 2.3494131565093994
Epoch [2/10], Step [700/938], Loss: 2.302398443222046
Epoch [2/10], Step [800/938], Loss: 2.3101301193237305
Epoch [2/10], Step [900/938], Loss: 2.2949931621551514
Epoch [3/10], Ste

In [None]:
# Move model to GPU 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Test the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the test images: {:.2f} %'.format(100 * correct / total))


Test Accuracy of the model on the test images: 10.00 %


#RESNET Architecture
#Residual Net:

- Number of Convolutional Layers: 4
- Number of Convolutional Filters: 64,64,128,256
-Filter/Kernel Size: 7x7, 3x3, 3x3, 3x3
-Padding: Same padding (pad with zeros to maintain input size)
-Activation Function: ReLU
-Number of Residual Blocks: 3
-Number of Fully Connected Layers: 1
-Number of Hidden Units in Fully Connected Layer: 512
-Output Layer: Fully Connected Layer with number of units equal to the number of classes in the dataset (e.g., 10 for FashionMNIST)
-Activation Function in Output Layer: None
#Residual Blocks:

- Number of Convolutional Layers in each Residual Block: 2
- Number of Convolutional Filters in each Residual Block: 64
- Filter/Kernel Size in each Convolutional Layer: 3x3
-Padding: Same padding (pad with zeros to maintain input size)
- Activation Function: ReLU

In [None]:
# Define the ResidualBlock module
class ReBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ReBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        residual = x
        x = self.conv1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x += self.shortcut(residual)
        x = self.relu(x)
        return x

In [None]:
# Define the ResNet model
class ResNet(nn.Module):
    def __init__(self, image_channels, num_classes):
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(image_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(64, 64, 2)
        self.layer2 = self._make_layer(64, 128, 2, stride=2)
        self.layer3 = self._make_layer(128, 256, 2, stride=2)
        self.layer4 = self._make_layer(256, 512, 2, stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)

    def _make_layer(self, in_channels, out_channels, num_blocks, stride=1):
        layers = []
        for _ in range(num_blocks):
            layers.append(ReBlock(in_channels, out_channels, stride))
            in_channels = out_channels
            stride = 1
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)

        return x

#Train Model

In [None]:
# Initialize the ResNet18 model
image_channels = 1 #Grayscale
num_classes = 10 
model = ResNet(image_channels, num_classes)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# Move model to GPU 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        if (i + 1) % 100 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                  .format(epoch + 1, num_epochs, i + 1, len(train_loader), loss.item()))



Epoch [1/5], Step [100/938], Loss: 0.7295
Epoch [1/5], Step [200/938], Loss: 0.4479
Epoch [1/5], Step [300/938], Loss: 0.4427
Epoch [1/5], Step [400/938], Loss: 0.3131
Epoch [1/5], Step [500/938], Loss: 0.3766
Epoch [1/5], Step [600/938], Loss: 0.3161
Epoch [1/5], Step [700/938], Loss: 0.2819
Epoch [1/5], Step [800/938], Loss: 0.3777
Epoch [1/5], Step [900/938], Loss: 0.5203
Epoch [2/5], Step [100/938], Loss: 0.2652
Epoch [2/5], Step [200/938], Loss: 0.3453
Epoch [2/5], Step [300/938], Loss: 0.3226
Epoch [2/5], Step [400/938], Loss: 0.3404
Epoch [2/5], Step [500/938], Loss: 0.2602
Epoch [2/5], Step [600/938], Loss: 0.4311
Epoch [2/5], Step [700/938], Loss: 0.4720
Epoch [2/5], Step [800/938], Loss: 0.1982
Epoch [2/5], Step [900/938], Loss: 0.3398
Epoch [3/5], Step [100/938], Loss: 0.3248
Epoch [3/5], Step [200/938], Loss: 0.3955
Epoch [3/5], Step [300/938], Loss: 0.2315
Epoch [3/5], Step [400/938], Loss: 0.3655
Epoch [3/5], Step [500/938], Loss: 0.3716
Epoch [3/5], Step [600/938], Loss:

In [None]:
# Test the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the test images: {:.2f} %'.format(100 * correct / total))


Test Accuracy of the model on the test images: 90.15 %
