In [5]:
from numpy import vstack
from pandas import read_csv
from torch.utils.data import Dataset
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torch import Tensor
from torch import nn, optim
from torch.nn import Linear
from torch.nn import ReLU
from torch.nn import Sigmoid
from torch.nn import Module
from torchvision import datasets, transforms
from torch.nn import BCELoss
from torch.nn.init import kaiming_uniform_
from torch.nn.init import xavier_uniform_
import torch
import copy

import numpy as np

In [6]:

transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5,), (0.5,)),
                              ])
trainset = datasets.MNIST('mnist_train', download=True, train=True, transform=transform)

valset = datasets.MNIST('mnist_val', download=True, train=False, transform=transform)

trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(valset, batch_size=64, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to mnist_train/MNIST/raw/train-images-idx3-ubyte.gz


9920512it [00:04, 2351212.26it/s]                             


Extracting mnist_train/MNIST/raw/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to mnist_train/MNIST/raw/train-labels-idx1-ubyte.gz


32768it [00:00, 302390.61it/s]                           


Extracting mnist_train/MNIST/raw/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to mnist_train/MNIST/raw/t10k-images-idx3-ubyte.gz


1654784it [00:00, 2559124.15it/s]                             


Extracting mnist_train/MNIST/raw/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to mnist_train/MNIST/raw/t10k-labels-idx1-ubyte.gz


8192it [00:00, 131313.44it/s]
  return torch.from_numpy(parsed).view(length, num_rows, num_cols)


Extracting mnist_train/MNIST/raw/t10k-labels-idx1-ubyte.gz
Processing...
Done!
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to mnist_val/MNIST/raw/train-images-idx3-ubyte.gz


9920512it [00:04, 2343679.47it/s]                             


Extracting mnist_val/MNIST/raw/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to mnist_val/MNIST/raw/train-labels-idx1-ubyte.gz


32768it [00:00, 134335.80it/s]                          


Extracting mnist_val/MNIST/raw/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to mnist_val/MNIST/raw/t10k-images-idx3-ubyte.gz


1654784it [00:00, 2573288.92it/s]                             


Extracting mnist_val/MNIST/raw/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to mnist_val/MNIST/raw/t10k-labels-idx1-ubyte.gz


8192it [00:00, 167754.96it/s]


Extracting mnist_val/MNIST/raw/t10k-labels-idx1-ubyte.gz
Processing...
Done!


In [7]:
input_size= 784
hidden_sizes = [128, 64]

In [8]:
def MultiClassCrossEntropy(logits, labels, T):
	# Ld = -1/N * sum(N) sum(C) softmax(label) * log(softmax(logit))
	labels = Variable(labels.data, requires_grad=False).cuda()
	outputs = torch.log_softmax(logits/T, dim=1)   # compute the log of softmax values
	labels = torch.softmax(labels/T, dim=1)
	# print('outputs: ', outputs)
	# print('labels: ', labels.shape)
	outputs = torch.sum(outputs * labels, dim=1, keepdim=False)
	outputs = -torch.mean(outputs, dim=0, keepdim=False)
	# print('OUT: ', outputs)
	return Variable(outputs.data, requires_grad=True).cuda()

def kaiming_normal_init(m):
	if isinstance(m, nn.Conv2d):
		nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
	elif isinstance(m, nn.Linear):
		nn.init.kaiming_normal_(m.weight, nonlinearity='sigmoid')

In [65]:
class Model(nn.Module):
	def __init__(self):
		# Hyper Parameters
		self.init_lr = 0.01
		self.num_epochs = 10
		self.batch_size = 64
		self.momentum=0.9

		self.in_features = hidden_sizes[1]
		self.n_classes = 0
		self.n_known=0

		self.class_map=dict()

		# Network architecture
		super(Model, self).__init__()
		self.feature_extractor = nn.Sequential(Linear(input_size, hidden_sizes[0]),
                        ReLU(),
                        Linear(hidden_sizes[0], hidden_sizes[1]),
                        ReLU())

		self.fc= nn.Linear(hidden_sizes[1],self.n_classes)
		
		# self.feature_extractor = nn.DataParallel(self.feature_extractor) 
		# self.fc = nn.DataParallel(self.fc)

	def forward(self, x):
		x = self.feature_extractor(x)
		x = x.view(x.size(0), -1)
		x = self.fc(x)
		return x

	def increment_classes(self, new_classes):
		"""Add n classes in the final fc layer"""
		n = len(new_classes)
		print('new classes: ', n)
		
		weight = self.fc.weight.data

		
		new_out_features = self.n_known + n

		print('new out features: ', new_out_features)
		new_fc = nn.Linear(self.in_features, new_out_features, bias=False)
		self.fc = new_fc
		
		# kaiming_normal_init(self.fc.weight)
		self.fc.weight.data[:self.n_classes] = weight
		self.n_classes += n

	def classify(self, images):
		"""Classify images by softmax

		Args:
			x: input image batch
		Returns:
			preds: Tensor of size (batch_size,)
		"""
		_, preds = torch.max(torch.softmax(self.forward(images), dim=1), dim=1, keepdim=False)

		return preds

	def update(self, dataset):

		self.compute_means = True

		# Save a copy to compute distillation outputs
		prev_model = copy.deepcopy(self)

		classes = set()

		trainloader = torch.utils.data.DataLoader(dataset, batch_size=32,shuffle=True, num_workers=1)
		for data, label in trainloader:
			for item in label.cpu().detach().numpy():
				classes.add(item)

		print(classes)

		# return

		new_classes = [i for i in classes if i not in self.class_map.keys()]

		print("Classes: ", classes)
		print('Known: ', self.n_known)

		print(self.class_map)

		if len(new_classes) > 0:
			self.increment_classes(new_classes)

		

		print("Batch Size (for n_classes classes) : ", len(dataset))
		optimizer = optim.SGD(self.parameters(), lr=self.init_lr, momentum = self.momentum)

		for epoch in range(self.num_epochs):
			
			for i,(images, labels) in enumerate(trainloader):
				# print(labels.numpy())
				seen_labels = []
				images = Variable(torch.FloatTensor(images))
				seen_labels = torch.LongTensor([self.class_map[label] for label in labels.numpy()])
				labels = Variable(seen_labels)
				# indices = indices.cuda()

				optimizer.zero_grad()
				logits = self.forward(images)
				cls_loss = nn.CrossEntropyLoss()(logits, labels)
				if len(new_classes) > 0:
					dist_target = prev_model.forward(images)
					logits_dist = logits[:,:-(len(new_classes))]
					dist_loss = MultiClassCrossEntropy(logits_dist, dist_target, 2)
					loss = dist_loss+cls_loss
				else:
					loss = cls_loss

				loss.backward()
				optimizer.step()

					

In [66]:
num_task_classes=2
total_classes= 10
num_iters= total_classes//num_task_classes
all_classes = np.arange(total_classes)

transform = transforms.Compose([transforms.ToTensor(),
							transforms.Normalize((0.5,), (0.5,)),
							])
all_train_set = datasets.MNIST('mnist_train', download=True, train=True, transform=transform)
all_test_set = datasets.MNIST('mnist_val', download=True, train=False, transform=transform)

model = Model()

for s in range(0, num_iters, num_task_classes):
	# Load Datasets
	print('Iteration: ', s)
	#print('Algo running: ', args.algo)
	print("Loading training examples for classes", all_classes[s: s+num_task_classes])
	
	mask=torch.tensor((all_train_set.targets) == s)
	for i in range(1,num_task_classes):
		mask = mask | (torch.tensor(all_train_set.targets) == (s+i))
	
	# print(list(mask.numpy()))
	idx = [i for i in range(len(all_train_set)) if mask[i]]

	current_train_set = torch.utils.data.Subset(all_train_set, idx)
	print(current_train_set)
	# trainloader = torch.utils.data.DataLoader(current_train_set, batch_size=32,
    #                                         shuffle=True, num_workers=1)
	# for data, label in trainloader:
	# 	print(data.size())
	# 	print(label)
	# 	break
	# break

	mask=torch.tensor(all_test_set.targets) == s
	for i in range(1,num_task_classes):
		mask = mask | (torch.tensor(all_test_set.targets) == (s+i))

	# print(all_test_set)
	current_test_set = torch.utils.data.Subset(all_test_set, mask.nonzero().view(-1))

	# print(current_test_set)


	# # Update representation via BackProp
	model.update(current_train_set)
	break
	# # model.eval()

	# model.n_known = model.n_classes
	# print ("model classes : %d, " % model.n_known)

	# total = 0.0
	# correct = 0.0
	# for images, labels in train_loader:
	# 	images = Variable(images)
	# 	preds = model.classify(images)
	# 	preds = [pred for pred in preds.cpu().numpy()]
	# 	total += labels.size(0)
	# 	correct += (preds == labels.numpy()).sum()

	# # Train Accuracy
	# print ('Train Accuracy : %.2f ,' % (100.0 * correct / total))

	# total = 0.0
	# correct = 0.0
	# for indices, images, labels in test_loader:
	# 	images = Variable(images).cuda()
	# 	preds = model.classify(images)
	# 	preds = preds.cpu().numpy()
	# 	total += labels.size(0)
	# 	correct += (preds == labels.numpy()).sum()

	# # Test Accuracy
	# print ('Test Accuracy : %.2f ,' % (100.0 * correct / total))

Iteration:  0
Loading training examples for classes [0 1]
<torch.utils.data.dataset.Subset object at 0x10f58b3d0>


  mask=torch.tensor((all_train_set.targets) == s)
  mask = mask | (torch.tensor(all_train_set.targets) == (s+i))
  mask=torch.tensor(all_test_set.targets) == s
  mask = mask | (torch.tensor(all_test_set.targets) == (s+i))


{0, 1}
Classes:  {0, 1}
Known:  0
{}
new classes:  2
new out features:  2
Batch Size (for n_classes classes) :  12665


KeyError: 1

In [None]:
# criterion = nn.NLLLoss()
# images, labels = next(iter(trainloader))


In [None]:
# print(images.shape)
# images=images.view(images.shape[0], -1)
# print(images.shape)


In [None]:
# logps = model(images) 
# loss = criterion(logps, labels) 

In [None]:
# print(logps.shape)
# loss.backward()

In [None]:
# optimizer = optim.SGD(model.parameters(), lr=0.01)
# epochs=2
# for e in range(epochs):
#     running_loss=0

#     for images, labels in trainloader:
#         # print(images.shape)
#         images= images.view(images.shape[0], -1)
#         optimizer.zero_grad()

#         output= model(images)
#         loss= criterion(output, labels)

#         loss.backward()

#         optimizer.step()

#         running_loss += loss.item()
        

In [None]:
# images, labels = next(iter(testloader))

# img = images[0].view(1, 784)
# with torch.no_grad():
#     logps = model(img)

# ps = torch.exp(logps)
# probab = list(ps.numpy()[0])
# print("Predicted Digit =", probab.index(max(probab)))
# # view_classify(img.view(1, 28, 28), ps)