In [200]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset
from torch.optim import SGD

import numpy as np
import matplotlib.pyplot as plt

In [201]:
INPUT_SIZE = 784
HIDDEN_SIZE = 128
NUM_LAYERS = 3
NUM_CLASSES = 10
NUM_EPOCHS = 50
BATCH_SIZE = 50
LEARNING_RATE = 0.1

NUM_SAMPLES = 2000
NUM_TRAIN_SAMPLES = 500

In [202]:
# MNIST dataset 
dataset = torchvision.datasets.MNIST(root='../data', train=True, transform=transforms.ToTensor(), download=True)

sample_index=list(range(NUM_SAMPLES))
train_index=list(np.random.choice(range(NUM_SAMPLES),size=NUM_TRAIN_SAMPLES,replace=False))

sample_dataset=Subset(dataset, sample_index)
train_dataset=Subset(sample_dataset, train_index)

# Data loader
TRAIN_LOADER = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)

SAMPLE_LOADER = DataLoader(dataset=sample_dataset,batch_size=1,shuffle=False)

In [203]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(NeuralNet, self).__init__()
        self.hidden = nn.ModuleList()
        self.input_layer = nn.Linear(input_size, hidden_size)
        for k in range(num_layers-1):
            self.hidden.append(nn.Linear(hidden_size, hidden_size))
        self.output_layer = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        x=self.input_layer(x)
        x=x.relu()
        for layer in self.hidden:
            x=layer(x)
            x=x.relu()
        x=self.output_layer(x)
        return x
    
def ReLU_glorot_init(model):
    for name, param in model.named_parameters():
        
        if name.endswith(".bias"):
            param.data.fill_(0)
        else:
            nn.init.xavier_normal_(param)

In [204]:
def get_bound(sample_loader, train_loader, num_samples, num_train, deltas):
    model = NeuralNet(INPUT_SIZE,HIDDEN_SIZE, NUM_LAYERS, NUM_CLASSES)
    ReLU_glorot_init(model)
            
    criterion = nn.CrossEntropyLoss()
    optimizer = SGD(model.parameters(), lr=LEARNING_RATE)

    for epoch in range(NUM_EPOCHS):
        for images, labels in train_loader:
            images = images.reshape(-1, INPUT_SIZE)
            outputs = model(images)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    train_error=0
    with torch.no_grad():
        for image, label in train_loader:
            image = image.reshape(-1, INPUT_SIZE)
            output = model(image)
            loss = criterion(output, label)
            train_error+=loss.item()*len(image)
    train_error=train_error/num_train

    true_error=0
    classifications=[]
    C=0
    with torch.no_grad():
        for image, label in sample_loader:
            image = image.reshape(-1, INPUT_SIZE)
            output = model(image)
            loss = criterion(output, label)
            if C<loss.item():
                C=loss.item()
            true_error+=loss.item()
            predictions=output.max(1)[1]
            classifications+=(predictions==label).tolist()
    true_error=true_error/num_samples

    bounds=[]
    for delta in deltas:
        log_component = np.log((1-p_Delta+np.sqrt((1-p_Delta)**2+4*delta**(1/num_train)*p_Delta))/(2*delta**(1/num_train)))

        bounds.append(train_error+np.sqrt(C**2*log_component/2))
    
    return train_error, true_error, bounds, p_Delta

In [205]:
DELTAS=[0.01,0.025,0.05,0.075,0.1,0.125,0.15,0.175,0.2,0.25]
tr_er, ts_er, bs, p = get_bound(SAMPLE_LOADER, TRAIN_LOADER, NUM_SAMPLES, NUM_TRAIN, DELTAS)
print(tr_er,ts_er, bs, p)

0.005451408843509853 0.4496843063832596 [0.8416651014148162, 0.7538554055048358, 0.6798802854636343, 0.6325769331416754, 0.5967247462587598, 0.5673428996717469, 0.5421437415333938, 0.5198762424884137, 0.4997773025382459, 0.46422944873351846] 0.888
