In [1]:
import os, time
import numpy as np
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
import torch.optim as optim

import pickle
import torch
from arguments import get_args

In [2]:
from dataloaders import split_cifar100_rs_5 as dataloader

In [3]:
data, taskcla, inputsize = dataloader.get(seed=0)
print('\nInput size =', inputsize, '\nTask info =', taskcla)
print(len(taskcla))

Task order = [19, 2, 20, 9, 11, 18, 7, 14, 5, 3, 6, 15, 10, 8, 17, 12, 4, 1, 16, 13]

Input size = [3, 32, 32] 
Task info = [(0, 5), (1, 5), (2, 5), (3, 5), (4, 5), (5, 5), (6, 5), (7, 5), (8, 5), (9, 5), (10, 5), (11, 5), (12, 5), (13, 5), (14, 5), (15, 5), (16, 5), (17, 5), (18, 5), (19, 5)]
20


In [4]:
print(type(data))
print(data.keys())

<class 'dict'>
dict_keys([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 'ncla'])


In [5]:
print(data[0].keys())

dict_keys(['name', 'ncla', 'train', 'test', 'valid'])


In [15]:
print(data['ncla'], data[0]['ncla'], data[0]['name'])

100 5 cifar100-13


In [16]:
for i in range(len(taskcla)):
    print(data[i]['name'])

cifar100-13
cifar100-19
cifar100-2
cifar100-20
cifar100-9
cifar100-11
cifar100-18
cifar100-7
cifar100-14
cifar100-5
cifar100-3
cifar100-6
cifar100-15
cifar100-10
cifar100-8
cifar100-17
cifar100-12
cifar100-4
cifar100-1
cifar100-16


In [7]:
print(data[0]['train'].keys())

dict_keys(['x', 'y'])


In [8]:
print(type(data[0]['train']['x']), type(data[0]['train']['y']))

<class 'torch.Tensor'> <class 'torch.Tensor'>


In [9]:
print(data[0]['train']['x'].size())
print(data[0]['train']['y'].size())

torch.Size([2250, 3, 32, 32])
torch.Size([2250])


In [10]:
np.unique(data[0]['train']['y'].numpy())

array([0, 1, 2, 3, 4])

In [11]:
from networks import conv_net_coscl as network

In [12]:
net = network.Net(inputsize, taskcla, use_TG=True)

In [13]:
def count_param(M):
    for i, (name, model) in enumerate(M.named_children()):
        trainable_num, total_num = get_parameter_number(model)
        print(f"\t>> {i}: {name} \t {trainable_num/1e6:.2f}M / {total_num/1e6:.2f}M")
    trainable_num, total_num = get_parameter_number(M)
    print(f">> # of params in total: {trainable_num/1e6:.2f}M / {total_num/1e6:.2f}M")


def get_parameter_number(net):
    trainable_num  = sum(p.numel() for p in net.parameters() if p.requires_grad)
    total_num = sum(p.numel() for p in net.parameters())
    return trainable_num, total_num

In [14]:
count_param(net)
print(net)

	>> 0: last 	 0.03M / 0.03M
	>> 1: net1 	 0.02M / 0.02M
	>> 2: fc1 	 0.13M / 0.13M
	>> 3: efc1 	 0.01M / 0.01M
	>> 4: net2 	 0.02M / 0.02M
	>> 5: fc2 	 0.13M / 0.13M
	>> 6: efc2 	 0.01M / 0.01M
	>> 7: net3 	 0.02M / 0.02M
	>> 8: fc3 	 0.13M / 0.13M
	>> 9: efc3 	 0.01M / 0.01M
	>> 10: net4 	 0.02M / 0.02M
	>> 11: fc4 	 0.13M / 0.13M
	>> 12: efc4 	 0.01M / 0.01M
	>> 13: net5 	 0.02M / 0.02M
	>> 14: fc5 	 0.13M / 0.13M
	>> 15: efc5 	 0.01M / 0.01M
	>> 16: drop1 	 0.00M / 0.00M
	>> 17: drop2 	 0.00M / 0.00M
	>> 18: MaxPool 	 0.00M / 0.00M
	>> 19: relu 	 0.00M / 0.00M
	>> 20: sig_gate 	 0.00M / 0.00M
>> # of params in total: 0.80M / 0.80M
Net(
  (last): ModuleList(
    (0): Linear(in_features=256, out_features=5, bias=True)
    (1): Linear(in_features=256, out_features=5, bias=True)
    (2): Linear(in_features=256, out_features=5, bias=True)
    (3): Linear(in_features=256, out_features=5, bias=True)
    (4): Linear(in_features=256, out_features=5, bias=True)
    (5): Linear(in_features=256

In [19]:
task = torch.autograd.Variable(torch.LongTensor([1]))
mask = net.mask(task)

In [21]:
print(task.size())
print(mask[0].size())

torch.Size([1])
torch.Size([1, 256])


In [22]:
emb = torch.nn.Embedding(10, 200)

In [25]:
emb(torch.autograd.Variable(torch.LongTensor([0]))).size()

torch.Size([1, 200])

In [26]:
emb(torch.autograd.Variable(torch.LongTensor([9]))).size()

torch.Size([1, 200])