In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset
from torch.autograd import Variable

# Hyperparameters
n_epochs = 100
batch_size_train =64
batch_size_test = 64
learning_rate = 0.01
momentum = 0.1
log_interval = 256
random_seed = 1
number_classes=10
torch.cuda.empty_cache()  # For CUDA, adjust for MPS if similar functionality exists
# Setting random seed for reproducibility
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)

# Set device to MPS if available, otherwise fallback to CPU
device = torch.device("cuda")
print(f'Using device: {device}')

Using device: mps


In [2]:
# Download and transform the MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.MNIST('../mnist_data', download=True, train=True, transform=transform)
test_dataset = datasets.MNIST('../mnist_data', download=True, train=False, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
  
# Create DataLoader for subsets
test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size_test,
    shuffle=False
)
#train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, drop_last=True)


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net1(nn.Module):
    def __init__(self):
        super(Net1, self).__init__()
        
        # Convolutional layers
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)  # Conv layer with padding
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1) # Another conv layer with padding
        self.conv2_drop = nn.Dropout2d(p=.25)  # Dropout layer with dropout rate

        # Batch normalization layers
        self.bn1 = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(64)
        
        # Pooling layer
        self.pool = nn.MaxPool2d(2)  # Pooling layer with a kernel size of 2

        # Fully connected layers
        self.fc1 = nn.Linear(64 * 7 * 7, 128)  # Adjusted for output size from conv layers
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        # Apply the first convolutional layer, followed by batch normalization, ReLU activation, and pooling
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        x = self.pool(x)
        # Apply the second convolutional layer, then dropout, batch normalization, ReLU, and pooling
        x = self.conv2(x)
        x = self.conv2_drop(x)
        x = self.bn2(x)
        x = F.relu(x)
        x = self.pool(x)
        x = x.view(-1, 64 * 7 * 7)  # Flatten the tensor
        x = self.fc1(x)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)        
        return x

# Test the network with a dummy input to ensure the sizes are correct
if __name__ == "__main__":
    dummy_input = torch.randn(1, 1, 28, 28)
    net = Net1()
    output = net(dummy_input)
    print(f'Output size: {output.size()}')  # Should be [1, 10] for 10 classes

Output size: torch.Size([1, 10])


In [None]:
def BOLT_loss(logits, target):
    probs=F.softmax(logits, dim=1)[:, 1:10] 
    batch_size = probs.size(0)
    num_classes = probs.size(1)
    # Create a mask for the losses
    mask = torch.arange(num_classes, device=target.device).expand(batch_size, num_classes)
    target = target.unsqueeze(1).expand_as(mask)
    # Calculate the loss
    loss = (mask >= target).float() * probs
    loss += (mask == (target - 1)).float() * (1 - probs)
    # Calculate the average loss
    loss = loss.sum() / batch_size    
    return loss

In [None]:
def train(epoch, loader, model, optimizer):
    model.train()
    for batch_idx, (data, target) in enumerate(loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        logits = model(data)
        be = BOLT_loss(logits, target)
        be = be / number_classes
        be.backward()
        optimizer.step()

        if batch_idx % 100 == 0:
            print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(loader.dataset)} ({100. * batch_idx / len(loader):.0f}%)]\tLoss: {be.item():.6f}')

In [None]:
accuracies=[]
def test(loader, network1, device):
    network1.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, targets in loader:
            data = data.to(device)
            targets = targets.to(device)
            outputs = network1(data)
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    accuracy = correct / total
    accuracies.append(accuracy)
    print(f'Accuracy of the network on the MNIST test images: {100 * accuracy:.2f}%')

In [None]:

network1 = Net1().to(device)
params = list(network1.parameters())
optimizer = optim.Adam(network1.parameters(), lr=0.001)
for epoch in range(1, n_epochs + 1):
    train(epoch, train_loader, network1, optimizer)
    test(test_loader , network1, device)


Accuracy of the network on the MNIST test images: 86.59%
Accuracy of the network on the MNIST test images: 88.54%
Accuracy of the network on the MNIST test images: 88.31%
Accuracy of the network on the MNIST test images: 88.74%
Accuracy of the network on the MNIST test images: 98.17%
Accuracy of the network on the MNIST test images: 98.56%
Accuracy of the network on the MNIST test images: 98.57%
Accuracy of the network on the MNIST test images: 98.71%
Accuracy of the network on the MNIST test images: 98.68%
Accuracy of the network on the MNIST test images: 98.33%
Accuracy of the network on the MNIST test images: 98.70%
Accuracy of the network on the MNIST test images: 98.77%
Accuracy of the network on the MNIST test images: 98.90%
Accuracy of the network on the MNIST test images: 98.72%
Accuracy of the network on the MNIST test images: 98.69%
Accuracy of the network on the MNIST test images: 98.82%
Accuracy of the network on the MNIST test images: 99.01%
Accuracy of the network on the 