In [68]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset
from torch.optim import SGD

import numpy as np
import matplotlib.pyplot as plt

In [69]:
INPUT_SIZE = 784
HIDDEN_SIZE = 128
NUM_LAYERS = 3
NUM_CLASSES = 10
NUM_EPOCHS = 50
BATCH_SIZE = 50
LEARNING_RATE = 0.1

NUM_SAMPLES = 5000
NUM_TRAIN_SAMPLES = 250

In [70]:
# MNIST dataset 
dataset = torchvision.datasets.MNIST(root='../data', train=True, transform=transforms.ToTensor(), download=True)

sample_index=list(range(NUM_SAMPLES))
train_index=list(np.random.choice(range(NUM_SAMPLES),size=NUM_TRAIN_SAMPLES,replace=False))

sample_dataset=Subset(dataset, sample_index)
train_dataset=Subset(sample_dataset, train_index)

# Data loader
TRAIN_LOADER = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)

SAMPLE_LOADER = DataLoader(dataset=sample_dataset,batch_size=1,shuffle=False)

In [71]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(NeuralNet, self).__init__()
        self.hidden = nn.ModuleList()
        self.input_layer = nn.Linear(input_size, hidden_size)
        for k in range(num_layers-1):
            self.hidden.append(nn.Linear(hidden_size, hidden_size))
        self.output_layer = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        x=self.input_layer(x)
        x=x.relu()
        for layer in self.hidden:
            x=layer(x)
            x=x.relu()
        x=self.output_layer(x)
        return x
    
def ReLU_glorot_init(model):
    for name, param in model.named_parameters():
        
        if name.endswith(".bias"):
            param.data.fill_(0)
        else:
            nn.init.xavier_normal_(param)

In [72]:
model = NeuralNet(INPUT_SIZE,HIDDEN_SIZE, NUM_LAYERS, NUM_CLASSES)
ReLU_glorot_init(model)
        
criterion = nn.CrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=LEARNING_RATE)

for epoch in range(NUM_EPOCHS):
    for images, labels in TRAIN_LOADER:
        images = images.reshape(-1, INPUT_SIZE)
        outputs = model(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print('Epoch {}: Loss {:.3f}'.format(epoch+1,loss.item()))

Epoch 1: Loss 2.242
Epoch 2: Loss 2.028
Epoch 3: Loss 1.901
Epoch 4: Loss 1.469
Epoch 5: Loss 1.279
Epoch 6: Loss 0.995
Epoch 7: Loss 0.811
Epoch 8: Loss 0.652
Epoch 9: Loss 0.662
Epoch 10: Loss 0.489
Epoch 11: Loss 0.376
Epoch 12: Loss 0.327
Epoch 13: Loss 0.500
Epoch 14: Loss 0.302
Epoch 15: Loss 0.140
Epoch 16: Loss 0.152
Epoch 17: Loss 0.184
Epoch 18: Loss 0.162
Epoch 19: Loss 0.115
Epoch 20: Loss 0.092
Epoch 21: Loss 0.054
Epoch 22: Loss 0.080
Epoch 23: Loss 0.064
Epoch 24: Loss 0.042
Epoch 25: Loss 0.048
Epoch 26: Loss 0.048
Epoch 27: Loss 0.036
Epoch 28: Loss 0.029
Epoch 29: Loss 0.025
Epoch 30: Loss 0.036
Epoch 31: Loss 0.025
Epoch 32: Loss 0.022
Epoch 33: Loss 0.021
Epoch 34: Loss 0.014
Epoch 35: Loss 0.014
Epoch 36: Loss 0.022
Epoch 37: Loss 0.019
Epoch 38: Loss 0.022
Epoch 39: Loss 0.019
Epoch 40: Loss 0.018
Epoch 41: Loss 0.011
Epoch 42: Loss 0.010
Epoch 43: Loss 0.010
Epoch 44: Loss 0.014
Epoch 45: Loss 0.016
Epoch 46: Loss 0.008
Epoch 47: Loss 0.010
Epoch 48: Loss 0.010
E

In [73]:
classified_ind=[]
misclassified_ind=[]
C=0
with torch.no_grad():
    for k, (image, label) in enumerate(SAMPLE_LOADER):
        image = image.reshape(-1, INPUT_SIZE)
        output = model(image)
        predictions=output.max(1)[1]
        if predictions==label:
            classified_ind.append(k)
        else:
            misclassified_ind.append(k)

NUM_DELTA = len(classified_ind)

train_index_2 = list(np.random.choice(misclassified_ind,size=NUM_TRAIN_SAMPLES,replace=False))

train_dataset_2 = Subset(sample_dataset, train_index_2)
Delta = Subset(sample_dataset, classified_ind)

TRAIN_LOADER_2 = DataLoader(dataset=train_dataset_2, batch_size=BATCH_SIZE, shuffle=True)
Delta_LOADER = DataLoader(dataset=Delta, batch_size=1, shuffle=False)

print(NUM_DELTA/NUM_SAMPLES)

0.842


In [74]:
true_error=0
true_acc=0
with torch.no_grad():
    for image, label in SAMPLE_LOADER:
        image = image.reshape(-1, INPUT_SIZE)
        output = model(image)
        loss = criterion(output, label)
        if C<loss.item():
            C=loss.item()
        true_error+=loss.item()
        true_acc+=sum(output.max(1)[1]==label)
true_error=true_error/NUM_SAMPLES
true_acc=true_acc/NUM_SAMPLES

Delta_error=0
Delta_acc=0
with torch.no_grad():
    for image, label in Delta_LOADER:
        image = image.reshape(-1, INPUT_SIZE)
        output = model(image)
        loss = criterion(output, label)
        Delta_error+=loss.item()
        Delta_acc+=sum(output.max(1)[1]==label)
Delta_error=Delta_error/NUM_DELTA
Delta_acc=Delta_acc/NUM_DELTA

train_error=0
train_acc=0
with torch.no_grad():
    for image, label in TRAIN_LOADER_2:
        image = image.reshape(-1, INPUT_SIZE)
        output = model(image)
        loss = criterion(output, label)
        train_error+=loss.item()*len(image)
        train_acc+=sum(output.max(1)[1]==label)
train_error=train_error/NUM_TRAIN_SAMPLES
train_acc=train_acc/NUM_TRAIN_SAMPLES

print(true_error,Delta_error,train_error)
print(true_acc, Delta_acc, train_acc)

0.5776669936597731 0.08785731341526255 3.274926853179932
tensor(0.8420) tensor(1.) tensor(0.)


In [75]:
criterion = nn.CrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=LEARNING_RATE)

for epoch in range(NUM_EPOCHS):
    for images, labels in TRAIN_LOADER_2:
        images = images.reshape(-1, INPUT_SIZE)
        outputs = model(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    for images, labels in TRAIN_LOADER:
        images = images.reshape(-1, INPUT_SIZE)
        outputs = model(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print('Epoch {}: Loss {:.3f}'.format(epoch+1,loss.item()))

Epoch 1: Loss 0.582
Epoch 2: Loss 0.539
Epoch 3: Loss 0.292
Epoch 4: Loss 0.134
Epoch 5: Loss 0.221
Epoch 6: Loss 0.131
Epoch 7: Loss 0.134
Epoch 8: Loss 0.137
Epoch 9: Loss 0.087
Epoch 10: Loss 0.115
Epoch 11: Loss 0.072
Epoch 12: Loss 0.054
Epoch 13: Loss 0.051
Epoch 14: Loss 0.025
Epoch 15: Loss 0.044
Epoch 16: Loss 0.029
Epoch 17: Loss 0.026
Epoch 18: Loss 0.020
Epoch 19: Loss 0.014
Epoch 20: Loss 0.015
Epoch 21: Loss 0.015
Epoch 22: Loss 0.008
Epoch 23: Loss 0.014
Epoch 24: Loss 0.011
Epoch 25: Loss 0.009
Epoch 26: Loss 0.009
Epoch 27: Loss 0.006
Epoch 28: Loss 0.007
Epoch 29: Loss 0.005
Epoch 30: Loss 0.008
Epoch 31: Loss 0.004
Epoch 32: Loss 0.007
Epoch 33: Loss 0.008
Epoch 34: Loss 0.005
Epoch 35: Loss 0.004
Epoch 36: Loss 0.004
Epoch 37: Loss 0.003
Epoch 38: Loss 0.007
Epoch 39: Loss 0.004
Epoch 40: Loss 0.006
Epoch 41: Loss 0.005
Epoch 42: Loss 0.004
Epoch 43: Loss 0.005
Epoch 44: Loss 0.004
Epoch 45: Loss 0.004
Epoch 46: Loss 0.003
Epoch 47: Loss 0.003
Epoch 48: Loss 0.002
E

In [76]:
true_error=0
true_acc=0
with torch.no_grad():
    for image, label in SAMPLE_LOADER:
        image = image.reshape(-1, INPUT_SIZE)
        output = model(image)
        loss = criterion(output, label)
        if C<loss.item():
            C=loss.item()
        true_error+=loss.item()
        true_acc+=sum(output.max(1)[1]==label)
true_error=true_error/NUM_SAMPLES
true_acc=true_acc/NUM_SAMPLES

Delta_error=0
Delta_acc=0
with torch.no_grad():
    for image, label in Delta_LOADER:
        image = image.reshape(-1, INPUT_SIZE)
        output = model(image)
        loss = criterion(output, label)
        Delta_error+=loss.item()
        Delta_acc+=sum(output.max(1)[1]==label)
Delta_error=Delta_error/NUM_DELTA
Delta_acc=Delta_acc/NUM_DELTA

train_error=0
train_acc=0
with torch.no_grad():
    for image, label in TRAIN_LOADER_2:
        image = image.reshape(-1, INPUT_SIZE)
        output = model(image)
        loss = criterion(output, label)
        train_error+=loss.item()*len(image)
        train_acc+=sum(output.max(1)[1]==label)
train_error=train_error/NUM_TRAIN_SAMPLES
train_acc=train_acc/NUM_TRAIN_SAMPLES

print(true_error,Delta_error,train_error)
print(true_acc, Delta_acc, train_acc)

0.3098644938329626 0.14609433335384236 0.00859554260969162
tensor(0.9178) tensor(0.9501) tensor(1.)
