In [123]:
# Import PyTorch

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# Import other packages

import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import truncnorm

In [124]:
# Define the hyper-parameters of the model

input_size = 784
hidden_size = 600
num_classes = 1
num_epochs = 20
batch_size = 100
learning_rate = 0.01
momentum = 0.9

In [125]:
# Import the MNIST dataset as two separate datasets

train_dataset = torchvision.datasets.MNIST(root='data', 
                                           train=True, 
                                           transform=transforms.ToTensor(),
                                           target_transform=lambda y: -1 if y<=4 else 1,  
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='data', 
                                          train=False,
                                          target_transform=lambda y: -1 if y<=4 else 1,
                                          transform=transforms.ToTensor())

# Create the data loader for training
train_dataset_loader = DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

# Create the data loader for validating

test_dataset_loader = DataLoader(dataset=test_dataset, 
                                          batch_size=1, 
                                          shuffle=False) 

In [126]:
# Defining a FC neural network

class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        #self.fc2 = nn.Linear(hidden_size, hidden_size) 
        self.fc3 = nn.Linear(hidden_size, num_classes)  
    
    def forward(self, x):
        x = self.fc1(x)
        x = x.relu()
        #x = self.fc2(x)
        #x = x.relu()
        x = self.fc3(x)
        return x

model = NeuralNet(input_size, hidden_size, num_classes)

def parameter_init(model,sigma):
    for name, param in model.named_parameters():
        if name.endswith(".bias"):
            if 'fc1' in name:
                param.data.fill_(0.1)
            else:
                param.data.fill_(0)
        else:
            param.data.normal_(0,sigma)
    
parameter_init(model,0.04)

In [127]:
criterion = nn.SoftMarginLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) 

# Training Epoch

def train_epoch():
    for i, (images, labels) in enumerate(train_dataset_loader):  
        # Move tensors to the configured device
        images = images.reshape(-1, 28*28)
        labels = labels
        
        # Forward pass
        outputs = model(images)
        loss = criterion(torch.reshape(outputs,(len(outputs),)), labels)
        
        # Backprpagation and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        

    return loss.item()

for epoch in range(num_epochs):
    loss=train_epoch()
    print ('Epoch {}, Loss: {:.3f}'.format(epoch+1,loss))

Epoch 1, Loss: 0.438
Epoch 2, Loss: 0.341
Epoch 3, Loss: 0.307
Epoch 4, Loss: 0.312
Epoch 5, Loss: 0.213
Epoch 6, Loss: 0.195
Epoch 7, Loss: 0.123
Epoch 8, Loss: 0.139
Epoch 9, Loss: 0.269
Epoch 10, Loss: 0.188
Epoch 11, Loss: 0.113
Epoch 12, Loss: 0.132
Epoch 13, Loss: 0.086
Epoch 14, Loss: 0.145
Epoch 15, Loss: 0.138
Epoch 16, Loss: 0.073
Epoch 17, Loss: 0.105
Epoch 18, Loss: 0.061
Epoch 19, Loss: 0.078
Epoch 20, Loss: 0.164


In [128]:
error=0
with torch.no_grad():
    for image, label in train_dataset_loader:
        image = image.reshape(-1, 28*28)
        outputs = torch.reshape(torch.sign(model(image)),(len(label),))
        error+=torch.sum(torch.abs(outputs-label))/len(train_dataset)
print(error)

tensor(0.0619)


In [129]:
error=0
with torch.no_grad():
    for image, label in test_dataset_loader:
        image = image.reshape(-1, 28*28)
        outputs = torch.reshape(torch.sign(model(image)),(len(label),))
        error+=torch.sum(torch.abs(outputs-label))/len(train_dataset)
print(error)

tensor(0.0119)


In [130]:
no_parameters=0
for param in model.named_parameters():
    if 'weight' in param[0]:
        no_parameters+=param[1].size()[0]*param[1].size()[1]
    else:
        no_parameters+=len(param[1])
print(no_parameters)

471601
