Class for custom Imbalanced MNIST dataset

In [None]:
import torch
import torchvision
import torchvision.datasets as datasets
from torch.utils.data import Dataset
import torchvision.transforms as tvt
from PIL import Image

class BinaryImbalancedMNIST(Dataset):

  def __init__(self,proportion,val_prop=0.5,num_val_images = 10, mode="train"):
    super(BinaryImbalancedMNIST, self).__init__()
    num_images = 5000
    classes = [9,4]
    num_val_images = num_val_images
    
    self.images = []
    self.labels = []
    self.val_images = []
    self.val_labels = []

    if(mode == "train"):
      self.mnist = datasets.MNIST('data',train=True,download=True,transform=None)
    else:
      self.mnist = datasets.MNIST('data',train=False,download=True,transform=None)
      num_val_images=0
    
    #standard recommended transform for MNIST by Pytorch
    self.transform = tvt.Compose([tvt.Resize((32,32)),
                                  tvt.ToTensor(),
                                  tvt.Normalize((0.1307,),(0.3081,))
                                  ])
    num_images_class0 = int(np.floor(num_images*proportion))
    num_images_class1 = num_images - num_images_class0
    num_images_per_class = [num_images_class0,num_images_class1]
    
    num_val_images_class0 = int(np.floor(num_val_images*val_prop))
    num_val_images_class1 = num_val_images - num_val_images_class0
    num_val_images_per_class = [num_val_images_class0,num_val_images_class1]
    
    if mode == "train":
      data = self.mnist.train_data
      label = self.mnist.train_labels
    else:
      data = self.mnist.test_data
      label = self.mnist.test_labels

    for idx, cls in enumerate(classes):
      # find idx where label is 9 or 4
      data_idx = np.where(label == cls)[0]
      # get images for each class based on idx found. Keep num_val_images for validation dataset
      imgs = data[data_idx[0:num_images_per_class[idx] - num_val_images_per_class[idx]]]
      self.images.extend(imgs)
      # get labels for each class based on idx found. 
      cls_idx = label[data_idx[0:num_images_per_class[idx] - num_val_images_per_class[idx]]]
      # set label as 1 for class = 9 and 0 for class = 4
      self.labels.extend((cls_idx == classes[0]).float())

      #if training dataset, also create the validation data. Skip for testing
      if mode == "train":
        val_imgs = data[data_idx[num_images_per_class[idx]-num_val_images_per_class[idx]:num_images_per_class[idx]]]
        for img in val_imgs:
          img_tmp = Image.fromarray(img.numpy(), mode='L')
          img_tmp = self.transform(img_tmp)
          self.val_images.append(img_tmp.unsqueeze(0))
        val_cls = label[data_idx[num_images_per_class[idx]-num_val_images_per_class[idx]:num_images_per_class[idx]]]
        self.val_labels.append((val_cls == classes[0]).float())
    if mode == "train":
      self.val_images = torch.cat(self.val_images, dim=0)
      self.val_labels = torch.cat(self.val_labels, dim=0)

  def __len__(self):
    return len(self.images)

  def __getitem__(self,idx):
    image, label = self.images[idx],self.labels[idx]
    image = Image.fromarray(image.numpy(), mode='L')
    if self.transform is not None:
       image = self.transform(image).to(dtype=torch.float64)
    label = torch.tensor(label)
    return image, label


Example - imbalanced training data vs balanced validation data

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torchvision
from torch.autograd import Variable
import itertools
train_data = BinaryImbalancedMNIST(proportion=0.95, mode="train")
train_data_loader = torch.utils.data.DataLoader(dataset=train_data,batch_size=10,shuffle=True,num_workers=0)

In [None]:
for i, (image,label) in enumerate(train_data_loader):
    print(image.size())
    print(label)
    break

Validation Data - Balanced

In [None]:
train_data = BinaryImbalancedMNIST(proportion=0.95, mode="train")
val_data = train_data.val_images
val_label = train_data.val_labels
print(val_data.size())
print(val_label)

MetaModule class to enable Meta Learnings. Reference - https://github.com/danieltan07/learning-to-reweight-examples/blob/master/model.py

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torchvision
from torch.autograd import Variable
import itertools

# much of the MetaModule is adopted from the work done by Adrien Ecoffet. This
# MetaModule is needed for - https://discuss.pytorch.org/t/higher-order-derivatives-meta-learning/93051/8, https://discuss.pytorch.org/t/second-order-derivatives-in-meta-learning/76656/2
# https://discuss.pytorch.org/t/higher-order-derivatives-meta-learning/93051, https://discuss.pytorch.org/t/cannot-calculate-second-order-gradients-even-though-create-graph-true/78711/3

def to_var(x, requires_grad=True):
    if torch.cuda.is_available():
        x = x.cuda()
    return Variable(x, requires_grad=requires_grad)

class MetaModule(nn.Module):
    # adopted from: Adrien Ecoffet https://github.com/AdrienLE
    def params(self):
       for name, param in self.named_params(self):
            yield param
    
    def named_leaves(self):
        return []
    
    def named_submodules(self):
        return []
    
    def named_params(self, curr_module=None, memo=None, prefix=''):       
        if memo is None:
            memo = set()

        if hasattr(curr_module, 'named_leaves'):
            for name, p in curr_module.named_leaves():
                if p is not None and p not in memo:
                    memo.add(p)
                    yield prefix + ('.' if prefix else '') + name, p
        else:
            for name, p in curr_module._parameters.items():
                if p is not None and p not in memo:
                    memo.add(p)
                    yield prefix + ('.' if prefix else '') + name, p
                    
        for mname, module in curr_module.named_children():
            submodule_prefix = prefix + ('.' if prefix else '') + mname
            for name, p in self.named_params(module, memo, submodule_prefix):
                yield name, p
    
    def update_params(self, lr_inner, first_order=False, source_params=None, detach=False):
        if source_params is not None:
            for tgt, src in zip(self.named_params(self), source_params):
                name_t, param_t = tgt
                # name_s, param_s = src
                # grad = param_s.grad
                # name_s, param_s = src
                grad = src
                if first_order:
                    grad = to_var(grad.detach().data)
                tmp = param_t - lr_inner * grad
                self.set_param(self, name_t, tmp)
        else:

            for name, param in self.named_params(self):
                if not detach:
                    grad = param.grad
                    if first_order:
                        grad = to_var(grad.detach().data)
                    tmp = param - lr_inner * grad
                    self.set_param(self, name, tmp)
                else:
                    param = param.detach_()
                    self.set_param(self, name, param)

    def set_param(self,curr_mod, name, param):
        if '.' in name:
            n = name.split('.')
            module_name = n[0]
            rest = '.'.join(n[1:])
            for name, mod in curr_mod.named_children():
                if module_name == name:
                    self.set_param(mod, rest, param)
                    break
        else:
            setattr(curr_mod, name, param)
            
    def detach_params(self):
        for name, param in self.named_params(self):
            self.set_param(self, name, param.detach())   
                
    def copy(self, other, same_var=False):
        for name, param in other.named_params():
            if not same_var:
                param = to_var(param.data.clone(), requires_grad=True)
            self.set_param(name, param)


class MetaLinear(MetaModule):
    def __init__(self, *args, **kwargs):
        super().__init__()
        ignore = nn.Linear(*args, **kwargs)
       
        self.register_buffer('weight', to_var(ignore.weight.data, requires_grad=True))
        self.register_buffer('bias', to_var(ignore.bias.data, requires_grad=True))
        
    def forward(self, x):
        return F.linear(x, self.weight, self.bias)
    
    def named_leaves(self):
        return [('weight', self.weight), ('bias', self.bias)]
    
class MetaConv2d(MetaModule):
    def __init__(self, *args, **kwargs):
        super().__init__()
        ignore = nn.Conv2d(*args, **kwargs)
        
        self.stride = ignore.stride
        self.padding = ignore.padding
        self.dilation = ignore.dilation
        self.groups = ignore.groups
        
        self.register_buffer('weight', to_var(ignore.weight.data, requires_grad=True))
        
        if ignore.bias is not None:
            self.register_buffer('bias', to_var(ignore.bias.data, requires_grad=True))
        else:
            self.register_buffer('bias', None)
        
    def forward(self, x):
        return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
    
    def named_leaves(self):
        return [('weight', self.weight), ('bias', self.bias)]

class MetaLeNet5(MetaModule):
    def __init__(self, n_out):
        super(MetaLeNet5, self).__init__()
    
        layers = []
        layers.append(MetaConv2d(1, 6, kernel_size=5))
        layers.append(nn.ReLU(inplace=True))
        layers.append(nn.MaxPool2d(kernel_size=2,stride=2))

        layers.append(MetaConv2d(6, 16, kernel_size=5))
        layers.append(nn.ReLU(inplace=True))
        layers.append(nn.MaxPool2d(kernel_size=2,stride=2))
        
        layers.append(MetaConv2d(16, 120, kernel_size=5))
        layers.append(nn.ReLU(inplace=True))
        
        self.cnn = nn.Sequential(*layers)
        
        layers = []
        layers.append(MetaLinear(120, 84))
        layers.append(nn.ReLU(inplace=True))
        layers.append(MetaLinear(84, n_out))
        
        self.fc_layers = nn.Sequential(*layers)
        
    def forward(self, x):
        x = self.cnn(x)
        x = x.view(-1, 120)
        return self.fc_layers(x).squeeze()

Training Loop - baseline performance

In [None]:
import copy
import time
from tqdm import tqdm
import torch.nn.functional as F

def run_training_baseline(net,device):
  net = copy.deepcopy(net)
  net = net.to(device)
  optimizer = torch.optim.SGD(net.params(),lr=hyperparameters['lr'])
  print("\n\nStarting training loop...")
  start_time = time.perf_counter()
  elapsed_time = 0.0
  plot_step = 100
  print_step = 1000
  batch = hyperparameters['batch_size']
  running_loss = []
  test_running_loss = []
  test_accuracy = []
  test_error = []
  iteration_count = []
  train_running_loss = 0.0
  for i in tqdm(range(hyperparameters['num_iterations'])):
    net.train()
    images, labels = next(iter(train_data_loader))
    if(i%print_step) == (print_step - 1):
      current_time = time.perf_counter()
      elapsed_time = current_time - start_time 
      print("\n\niter=%4d: elapsed_time=%5d secs] Ground Truth:     " % ((i+1), elapsed_time) + 
                        ' '.join('%5s' % [labels[j].item()] for j in range(batch)))
    images = images.to(device).float()
    labels = labels.to(device)
    outputs = net(images)
    loss = F.binary_cross_entropy_with_logits(outputs,labels)
    if(i%print_step) == (print_step - 1):
      predicted = (F.sigmoid(outputs)>0.5).int()
      print("\n\n[iter=%4d: elapsed_time=%5d secs] Predicted Labels:     " % ((i+1), elapsed_time) + 
                         ' '.join('%5s' % [predicted[j].item()] for j in range(batch)))
      print("\n[iter:%4d] loss:%.2f"%(i+1, avg_loss))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    train_running_loss += loss.item()
    if(i%plot_step) == (plot_step-1):
      avg_loss = train_running_loss/float(plot_step)
      running_loss.append(avg_loss)
      iteration_count.append(i)
      train_running_loss = 0.0

      net.eval()
      with torch.no_grad():
        testing_loss = 0.0
        totalRight = 0
        error = 0
        total = 0
        acc = []
        for j, data in enumerate(test_data_loader):
          images, label = data
          images = images.to(device).float()
          label = label.to(device)
          outputs = net(images)
          predicted = (F.sigmoid(outputs)>0.5).float()
          loss = F.binary_cross_entropy_with_logits(outputs,label)
          testing_loss += loss
          totalRight += (predicted == label).sum().item()
          total += label.size(0)
        test_running_loss.append(testing_loss/float(j+1))
        test_accuracy.append(100 * totalRight / float(total))
        test_error.append(100*(total-totalRight)/float(total))
  return running_loss, test_error, test_accuracy, iteration_count

__MAIN__ for running baseline model

In [None]:
import random
import os
hyperparameters = {
        'lr' : 1e-3,
        'momentum' : 0.9,
        'batch_size' : 100,
        'num_iterations' : 8000
}

train_data = BinaryImbalancedMNIST(proportion=0.995, mode="train")
test_data = BinaryImbalancedMNIST(proportion=0.5, mode="test")
train_data_loader = torch.utils.data.DataLoader(dataset=train_data,batch_size=hyperparameters['batch_size'],shuffle=True,num_workers=0)
test_data_loader = torch.utils.data.DataLoader(dataset=test_data,batch_size=hyperparameters['batch_size'],shuffle=True,num_workers=0)

if torch.cuda.is_available() == True:
  device = torch.device("cuda:0")
else:
  device = torch.device("cpu")

print("device: ",device)

seed = 0
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmarks = False
os.environ['PYTHONHASHSEED'] = str(seed)
num_classes=100

model = MetaLeNet5(n_out=1)

running_loss, test_error, test_accuracy, iteration_count = run_training_baseline(model,device)

Plot baseline results - Accuracy vs Iteration & Error vs Iteration

In [None]:
import matplotlib.pyplot as plt
fig, axes = plt.subplots(1, 2, figsize=(13,5))
ax1, ax2 = axes.ravel()
ax1.plot(iteration_count,running_loss, label='Training Loss')
ax1.set_ylabel("Losses")
ax1.set_xlabel("Iteration")
ax1.legend()

ax2.plot(iteration_count, test_accuracy, label="Test Accuracy")
ax2.set_ylabel('Accuracy')
ax2.set_xlabel('Iteration')
ax2.legend()
plt.savefig("loss_accuracy_0_995.png")
plt.show()

plt.figure(figsize=(6,6))
plt.plot(iteration_count, test_error, label="Test error")
plt.ylabel('Error')
plt.xlabel('Iteration')
plt.legend()
plt.savefig("test_error_0_995.png")
plt.show()

print("Final test accuracy: ", test_accuracy[-1]," test error: ", test_error[-1])

LRE performance - MetaLeNet5 on imbalanced training data

In [None]:
import copy
import time
from tqdm import tqdm
import torch.nn.functional as F

def run_training_w_lre(net,device):
  net = copy.deepcopy(net)
  net = net.to(device)
  optimizer = torch.optim.SGD(net.params(),lr=hyperparameters['lr'])
  print("\n\nStarting training loop...")
  start_time = time.perf_counter()
  elapsed_time = 0.0
  plot_step = 100
  print_step = 1000
  batch = hyperparameters['batch_size']
  running_loss = []
  test_running_loss = []
  test_accuracy = []
  test_error = []
  iteration_count = []
  train_running_loss = 0.0
  for i in tqdm(range(hyperparameters['num_iterations'])):
    net.train()
    x_f, y_f = next(iter(train_data_loader))
    if(i%print_step) == (print_step - 1):
      current_time = time.perf_counter()
      elapsed_time = current_time - start_time 
      print("\n\niter=%4d: elapsed_time=%5d secs] Ground Truth:     " % ((i+1), elapsed_time) + 
                        ' '.join('%5s' % [y_f[j].item()] for j in range(batch)))
    x_f = x_f.to(device).float()
    y_f = y_f.to(device)


    # Since this method requires 3 passes of the network, we create a dummy network
    # that is used for the inital 2 passes which gives us the gradient with respect
    # to model parameters and example weights. The last pass with the optimal weights
    # will be performed on the actual model we train.
    dummy_net = MetaLeNet5(n_out=1)
    dummy_net.load_state_dict(net.state_dict())
    dummy_net = dummy_net.to(device)

    # 1st Pass: training data pass to compute initial weighted loss
    y_f_pred = dummy_net(x_f)
    loss = F.binary_cross_entropy_with_logits(y_f_pred,y_f,reduce=False)
    # intialize epsilon with 0s as initial weight
    eps = (torch.zeros(loss.size())).to(device)
    eps.requires_grad = True
    l_f = torch.sum(loss*eps)

    dummy_net.zero_grad()

    #perform parameter update using the MetaModule class
    gradient = torch.autograd.grad(l_f,(dummy_net.params()),create_graph=True)
    dummy_net.update_params(hyperparameters['lr'],source_params=gradient)

    # 2nd Pass: val data pass to compute gradient with the weights
    y_g_pred = dummy_net(val_images)
    l_g = F.binary_cross_entropy_with_logits(y_g_pred,val_labels)
    eps_gradient = torch.autograd.grad(l_g,eps,only_inputs=True)[0]

    # Normalize the gradient and ceil to avoid negative weights
    w_hat = torch.clamp(-eps_gradient,min=0)
    sum = torch.sum(w_hat)

    if (sum!=0):
      weights = w_hat/sum
    else:
      weights = w_hat

    # 3rd Pass: training data pass through original network to compute weighted
    #           loss and perform parameter update

    y_f_pred_1 = net(x_f)
    loss = F.binary_cross_entropy_with_logits(y_f_pred_1,y_f,reduce=False)
    l_f = torch.sum(loss*weights)

    if(i%print_step) == (print_step - 1):
      predicted = (F.sigmoid(y_f_pred)>0.5).int()
      print("\n\n[iter=%4d: elapsed_time=%5d secs] Predicted Labels:     " % ((i+1), elapsed_time) + 
                         ' '.join('%5s' % [predicted[j].item()] for j in range(batch)))
      print("\n[iter:%4d] loss:%.2f"%(i+1, avg_loss))

    optimizer.zero_grad()
    l_f.backward()
    optimizer.step()
    train_running_loss += l_f.item()
    if(i%plot_step) == (plot_step-1):
      avg_loss = train_running_loss/float(plot_step)
      running_loss.append(avg_loss)
      iteration_count.append(i)
      train_running_loss = 0.0

      net.eval()
      with torch.no_grad():
        testing_loss = 0.0
        totalRight = 0
        error = 0
        total = 0
        for j, data in enumerate(test_data_loader):
          images, label = data
          images = images.to(device).float()
          label = label.to(device)
          outputs = net(images)
          predicted = (F.sigmoid(outputs)>0.5).float()
          loss = F.binary_cross_entropy_with_logits(outputs,label)
          testing_loss += loss
          totalRight += (predicted == label).sum().item()
          total += label.size(0)
        test_running_loss.append(testing_loss/float(j+1))
        test_accuracy.append(100 * totalRight / float(total))
        test_error.append(100*(total-totalRight)/float(total))
  return running_loss, test_error, test_accuracy, iteration_count

__MAIN__ for LRE with varying proportions

In [None]:
import random
import os
hyperparameters = {
        'lr' : 1e-3,
        'momentum' : 0.9,
        'batch_size' : 100,
        'num_iterations' : 8000
}

if torch.cuda.is_available() == True:
  device = torch.device("cuda:0")
else:
  device = torch.device("cpu")

print("device: ",device)

seed = 0
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmarks = False
os.environ['PYTHONHASHSEED'] = str(seed)
num_classes=100

test_data = BinaryImbalancedMNIST(proportion=0.5, mode="test")
test_data_loader = torch.utils.data.DataLoader(dataset=test_data,batch_size=hyperparameters['batch_size'],shuffle=True,num_workers=0)

proportions = [0.9, 0.95, 0.98, 0.99, 0.995]
accuracy_accumulated = {}
loss_accumulated = {}
error_accumulated = {}
num_trials = 5

for prop in proportions:
  train_data = BinaryImbalancedMNIST(proportion=prop, mode="train")
  train_data_loader = torch.utils.data.DataLoader(dataset=train_data,batch_size=hyperparameters['batch_size'],shuffle=True,num_workers=0)
  val_images = (train_data_loader.dataset.val_images).to(device)
  val_labels = (train_data_loader.dataset.val_labels).to(device)

  for idx in range(num_trials):
    model = MetaLeNet5(n_out=1)
    running_loss, test_error, test_accuracy, iteration_count = run_training_w_lre(model,device)
    
    if(prop in accuracy_accumulated):
      accuracy_accumulated[prop].append(test_accuracy)
      loss_accumulated[prop].append(running_loss)
      error_accumulated[prop].append(test_error)
    else:
      accuracy_accumulated[prop] = [test_accuracy]
      loss_accumulated[prop] = [running_loss]
      error_accumulated[prop] = [test_error]


Plot results for varying proportions

In [None]:
import matplotlib.pyplot as plt

mean_loss = {}
mean_accuracy = {}
mean_error = {}

for prop in proportions:
  mean_loss[prop] = np.mean([loss_accumulated[prop][0],loss_accumulated[prop][1],loss_accumulated[prop][2],loss_accumulated[prop][3],loss_accumulated[prop][4]],axis=0)
  mean_accuracy[prop] = np.mean([accuracy_accumulated[prop][0],accuracy_accumulated[prop][1],accuracy_accumulated[prop][2],accuracy_accumulated[prop][3],accuracy_accumulated[prop][4]],axis=0)
  mean_error[prop] = np.mean([error_accumulated[prop][0],error_accumulated[prop][1],error_accumulated[prop][2],error_accumulated[prop][3],error_accumulated[prop][4]],axis=0)

fig, axes = plt.subplots(1, 2, figsize=(13,5))
ax1, ax2 = axes.ravel()
ax1.plot(iteration_count,mean_loss[0.9], label='0.9')
ax1.plot(iteration_count,mean_loss[0.95], label='0.95')
ax1.plot(iteration_count,mean_loss[0.98], label='0.98')
ax1.plot(iteration_count,mean_loss[0.99], label='0.99')
ax1.plot(iteration_count,mean_loss[0.995], label='0.995')
ax1.set_ylabel("Training Losses")
ax1.set_xlabel("Iteration")
ax1.legend()

ax2.plot(iteration_count, mean_accuracy[0.9], label="0.9")
ax2.plot(iteration_count, mean_accuracy[0.95], label="0.95")
ax2.plot(iteration_count, mean_accuracy[0.98], label="0.98")
ax2.plot(iteration_count, mean_accuracy[0.99], label="0.99")
ax2.plot(iteration_count, mean_accuracy[0.995], label="0.95")
ax2.set_ylabel('Test Accuracy')
ax2.set_xlabel('Iteration')
ax2.legend()
plt.savefig("loss_accuracy.png",bbox_inches="tight")
plt.show()

In [None]:
error = [mean_error[0.9][-1],
         mean_error[0.95][-1],
         mean_error[0.98][-1],
         mean_error[0.99][-1],
         mean_error[0.995][-1]]

plt.figure(figsize=(6,6))
plt.plot(proportions, error, linestyle="--", marker="*", label="LRE weighted")
plt.ylabel('Error %')
plt.xlabel('Proportions')
plt.legend()
plt.grid()
plt.savefig("error_w_proportions.png",bbox_inches="tight")
plt.show()

LRE with noisy validation data

In [None]:
import random
import os
hyperparameters = {
        'lr' : 1e-3,
        'momentum' : 0.9,
        'batch_size' : 100,
        'num_iterations' : 8000
}

if torch.cuda.is_available() == True:
  device = torch.device("cuda:0")
else:
  device = torch.device("cpu")

print("device: ",device)

seed = 0
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmarks = False
os.environ['PYTHONHASHSEED'] = str(seed)
num_classes=100

test_data = BinaryImbalancedMNIST(proportion=0.5, mode="test")
test_data_loader = torch.utils.data.DataLoader(dataset=test_data,batch_size=hyperparameters['batch_size'],shuffle=True,num_workers=0)

val_proportions = [0.5, 0.6, 0.8]
accuracy_accumulated = {}
loss_accumulated = {}
error_accumulated = {}
num_trials = 1

for val_prop in val_proportions:
  train_data = BinaryImbalancedMNIST(proportion=0.98, val_prop=val_prop, mode="train")
  train_data_loader = torch.utils.data.DataLoader(dataset=train_data,batch_size=hyperparameters['batch_size'],shuffle=True,num_workers=0)
  val_images = (train_data_loader.dataset.val_images).to(device)
  val_labels = (train_data_loader.dataset.val_labels).to(device)

  for idx in range(num_trials):
    model = MetaLeNet5(n_out=1)
    running_loss, test_error, test_accuracy, iteration_count = run_training_w_lre(model,device)
    
    if(val_prop in accuracy_accumulated):
      accuracy_accumulated[val_prop].append(test_accuracy)
      loss_accumulated[val_prop].append(running_loss)
      error_accumulated[val_prop].append(test_error)
    else:
      accuracy_accumulated[val_prop] = [test_accuracy]
      loss_accumulated[val_prop] = [running_loss]
      error_accumulated[val_prop] = [test_error]

In [None]:
plt.figure(figsize=(10,6))
plt.plot(iteration_count,np.squeeze(accuracy_accumulated[0.5]), label='Unbiased validation set')
plt.plot(iteration_count,np.squeeze(accuracy_accumulated[0.6]), label='Partly biased validation set')
plt.plot(iteration_count,np.squeeze(accuracy_accumulated[0.8]), label='Biased validation set')
plt.ylabel("Test Accuracy")
plt.xlabel("Iteration")
plt.legend()
plt.savefig("val_data_biased.png",bbox_inches="tight")
plt.show()

LRE with varying val_data size

In [None]:
import random
import os
hyperparameters = {
        'lr' : 1e-3,
        'momentum' : 0.9,
        'batch_size' : 100,
        'num_iterations' : 8000
}

if torch.cuda.is_available() == True:
  device = torch.device("cuda:0")
else:
  device = torch.device("cpu")

print("device: ",device)

seed = 0
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmarks = False
os.environ['PYTHONHASHSEED'] = str(seed)
num_classes=100

test_data = BinaryImbalancedMNIST(proportion=0.5, mode="test")
test_data_loader = torch.utils.data.DataLoader(dataset=test_data,batch_size=hyperparameters['batch_size'],shuffle=True,num_workers=0)


accuracy_accumulated = {}
loss_accumulated = {}
error_accumulated = {}
num_trials = 1
num_val_images = [5,10,20,30]

for num_val_data in num_val_images:
  train_data = BinaryImbalancedMNIST(proportion=0.98, val_prop=0.5, num_val_images = num_val_data, mode="train")
  train_data_loader = torch.utils.data.DataLoader(dataset=train_data,batch_size=hyperparameters['batch_size'],shuffle=True,num_workers=0)
  val_images = (train_data_loader.dataset.val_images).to(device)
  val_labels = (train_data_loader.dataset.val_labels).to(device)

  for idx in range(num_trials):
    model = MetaLeNet5(n_out=1)
    running_loss, test_error, test_accuracy, iteration_count = run_training_w_lre(model,device)
    
    
    accuracy_accumulated[num_val_data] = [test_accuracy]
    loss_accumulated[num_val_data] = [running_loss]
    error_accumulated[num_val_data] = [test_error]

In [None]:
plt.figure(figsize=(10,6))
plt.plot(iteration_count,np.squeeze(accuracy_accumulated[5]), label='n_val = 5')
plt.plot(iteration_count,np.squeeze(accuracy_accumulated[10]), label='n_val = 10')
plt.plot(iteration_count,np.squeeze(accuracy_accumulated[20]), label='n_val = 20')
plt.plot(iteration_count,np.squeeze(accuracy_accumulated[30]), label='n_val = 30')
plt.ylabel("Test Accuracy")
plt.xlabel("Iteration")
plt.legend()
plt.savefig("n_val_variation.png",bbox_inches="tight")
plt.show()

LRE comparison with other techniques. Numbers referenced from https://arxiv.org/abs/1803.09050

In [None]:
import matplotlib.pyplot as plt
lre = [2,3,4,6.25,8.75]
random = [2,4,6,10,18]
proportion = [2,2.5,3.5,6,8]
resample = [2,3,4,6,10]
hardmine=[2,3,3.5,5,8]
prop = [0.90,0.95,0.98,0.99,0.995]

plt.figure(figsize=(10,6))
plt.plot(prop,lre, label='LRE')
plt.plot(prop,random,linestyle='--',label='Random')
plt.plot(prop,proportion,linestyle='-.',label='Proportion')
plt.plot(prop,resample,linestyle=':',label='Resample')
plt.plot(prop,hardmine,linestyle='--',label='Hard Mining')

plt.ylabel("Test Error %")
plt.xlabel("Proportions")
plt.legend()
plt.savefig("error_comparison",bbox_inches="tight")
plt.show()