In [1]:
import numpy as np
import os
from retinanet import model
from retinanet import coco_eval
from retinanet.dataloader import CocoDataset_inOrder,rehearsal_DataSet, collater, Resizer, AspectRatioBasedSampler, Augmenter, Normalizer
from torchvision import transforms
from torch.utils.data import DataLoader
import torch.optim as optim
import collections
import torch
root_path = '/home/deeplab307/Documents/Anaconda/Shiang/CL/'
method = 'w_distillation'
data_split = '15+1'
start_round = 2
batch_size = 1

checkpoint_epoch = 50

def checkDir(path):
    """check whether directory exists or not.If not, then create it 
    """
    if not os.path.isdir(path):
        os.mkdir(path)
def get_checkpoint_path(method, now_round, epoch, data_split ="None"):
    global root_path
    # global data_split
    
    
    checkDir(os.path.join(root_path, 'model', method, 'round{}'.format(now_round)))
    checkDir(os.path.join(root_path, 'model', method, 'round{}'.format(now_round), data_split))
    
    path = os.path.join(root_path, 'model', method, 'round{}'.format(now_round), data_split,'voc_retinanet_{}_checkpoint.pt'.format(epoch))
    return path


def readCheckpoint(method, now_round, epoch, data_split, retinanet, optimizer = None, scheduler = None):
    print('readcheckpoint at Round{} Epoch{}'.format(now_round, epoch))
    prev_checkpoint = torch.load(get_checkpoint_path(method, now_round, epoch, data_split))
    retinanet.load_state_dict(prev_checkpoint['model_state_dict'])
    if optimizer != None:
        optimizer.load_state_dict(prev_checkpoint['optimizer_state_dict'])
    if scheduler != None:
        scheduler.load_state_dict(prev_checkpoint['scheduler_state_dict'])
    


# coco_path = '/home/deeplab307/Documents/Anaconda/Shiang/CL/DataSet/VOC2012'



# dataset_train = CocoDataset_inOrder(coco_path, set_name='TrainVoc2012', dataset = 'voc',
#                                     transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]),
#                                    data_split=data_split, start_round=start_round)

# # dataset_val = CocoDataset_inOrder(os.path.join(root_path, 'DataSet', 'VOC2012'), set_name="ValVoc2012", dataset = 'voc', 
# #                 transform=transforms.Compose([Normalizer(), Resizer()]), 
# #                 start_round=1, data_split = "20")

dataset_train = CocoDataset_inOrder(os.path.join(root_path, 'DataSet', 'VOC2012'), set_name='TrainVoc2012', dataset = 'voc',
                                    transform=transforms.Compose([Normalizer(), Resizer()]),
                                   data_split=data_split, start_round=start_round)
retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True)
retinanet.cuda()

sampler = AspectRatioBasedSampler(dataset_train, batch_size = batch_size, drop_last=False)
dataloader_train = DataLoader(dataset_train, num_workers=2, collate_fn=collater, batch_sampler=sampler)

optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)
# loss_hist = collections.deque(maxlen=500)

#readCheckpoint(method, start_round, checkpoint_epoch,data_split, retinanet)

loading annotations into memory...
Done (t=0.09s)
creating index...
index created!
loading annotations into memory...
Done (t=0.05s)
creating index...
index created!
{'id': [[1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 15, 16, 17, 18, 19], [6], [20], [14], [12], [13]], 'name': [['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person'], ['train'], ['sheep'], ['sofa'], ['pottedplant'], ['tvmonitor']]}
dataloader class_num = 16


In [2]:
readCheckpoint(method, 2, 20,data_split, retinanet, optimizer)

readcheckpoint at Round2 Epoch20


In [3]:
prev_model = model.resnet50(num_classes=15, pretrained=True)
readCheckpoint(method, 1, 50,data_split, prev_model)
retinanet.init_prev_model(prev_model)

readcheckpoint at Round1 Epoch50


# 計算所有data loss

In [13]:
import time
import pickle
retinanet.train()
retinanet.freeze_bn()

dataset = dataset_train

retinanet.train()

retinanet.distill_loss = True
retinanet.freeze_resnet(True)
retinanet.freeze_bn()
grads = []
for idx, data in enumerate(dataset):
    temp = []
    start = time.time()
    optimizer.zero_grad()
    with torch.cuda.device(0):
        if torch.cuda.is_available():
            losses = retinanet([data['img'].permute(2, 0, 1).cuda().float().unsqueeze(dim=0), data['annot'].cuda().unsqueeze(dim=0)])
            if retinanet.distill_loss:
                classification_loss, regression_loss, dist_class_loss, dist_reg_loss, dist_feat_loss = losses
            else:
                classification_loss, regression_loss = losses

        else:

            print('not have gpu')
            break

        classification_loss = classification_loss.mean()
        regression_loss = regression_loss.mean()


        loss = classification_loss + regression_loss
        if retinanet.distill_loss:
            loss += dist_class_loss + dist_reg_loss + dist_feat_loss

        if bool(loss == 0):
            continue

        loss.backward()


        for name, p in retinanet.named_parameters():
            if "prev_model" not in name and "bn" not in name and p.requires_grad:
                temp.append(p.grad.view(-1))

        temp = torch.cat(temp)
        if grads == []:
            grads = temp
        else:
            grads += temp

        end = time.time()


        if not retinanet.distill_loss:
            print('Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Spend Time:{:1.2f}s'.format(idx, float(classification_loss), float(regression_loss), end - start))
        else:
            print('Iteration: {} | Class loss: {:1.4f} | Reg loss: {:1.4f} | dis_class_loss: {:1.4f} | dis_reg_loss: {:1.4f} | dis_feat_loss: {:1.4f} | Spend Time:{:1.2f}s'.format(idx, float(classification_loss), float(regression_loss), float(dist_class_loss), float(dist_reg_loss), float(dist_feat_loss),end - start))
        del classification_loss
        del regression_loss
        if retinanet.distill_loss:
            del dist_class_loss, dist_reg_loss, dist_feat_loss
#     except Exception as e:
#         print(e)
#         continue

unfreeze resnet
Iteration: 0 | Class loss: 0.4215 | Reg loss: 0.5193 | dis_class_loss: 0.0061 | dis_reg_loss: 0.0097 | dis_feat_loss: 0.0224 | Spend Time:0.15s
Iteration: 1 | Class loss: 0.6358 | Reg loss: 0.7972 | dis_class_loss: 0.0065 | dis_reg_loss: 0.0109 | dis_feat_loss: 0.0226 | Spend Time:0.12s
Iteration: 2 | Class loss: 0.9089 | Reg loss: 0.5037 | dis_class_loss: 0.0000 | dis_reg_loss: 0.0000 | dis_feat_loss: 0.0110 | Spend Time:0.11s
Iteration: 3 | Class loss: 0.4843 | Reg loss: 0.5211 | dis_class_loss: 0.0254 | dis_reg_loss: 0.0409 | dis_feat_loss: 0.0252 | Spend Time:0.12s
Iteration: 4 | Class loss: 0.2465 | Reg loss: 0.1521 | dis_class_loss: 0.0484 | dis_reg_loss: 0.0223 | dis_feat_loss: 0.0215 | Spend Time:0.10s
Iteration: 5 | Class loss: 0.2452 | Reg loss: 0.1942 | dis_class_loss: 0.0782 | dis_reg_loss: 0.0117 | dis_feat_loss: 0.0227 | Spend Time:0.11s
Iteration: 6 | Class loss: 0.4927 | Reg loss: 0.5612 | dis_class_loss: 0.0501 | dis_reg_loss: 0.1007 | dis_feat_loss: 0.

Iteration: 57 | Class loss: 0.6048 | Reg loss: 0.6589 | dis_class_loss: 0.0074 | dis_reg_loss: 0.0097 | dis_feat_loss: 0.0300 | Spend Time:0.11s
Iteration: 58 | Class loss: 0.4967 | Reg loss: 0.4226 | dis_class_loss: 0.0053 | dis_reg_loss: 0.0254 | dis_feat_loss: 0.0248 | Spend Time:0.10s
Iteration: 59 | Class loss: 0.3279 | Reg loss: 0.0688 | dis_class_loss: 0.0519 | dis_reg_loss: 0.0300 | dis_feat_loss: 0.0270 | Spend Time:0.10s
Iteration: 60 | Class loss: 0.2835 | Reg loss: 0.2257 | dis_class_loss: 0.0052 | dis_reg_loss: 0.0078 | dis_feat_loss: 0.0143 | Spend Time:0.11s
Iteration: 61 | Class loss: 0.3895 | Reg loss: 0.1780 | dis_class_loss: 0.0121 | dis_reg_loss: 0.0196 | dis_feat_loss: 0.0282 | Spend Time:0.11s
Iteration: 62 | Class loss: 0.4815 | Reg loss: 0.4620 | dis_class_loss: 0.0215 | dis_reg_loss: 0.0917 | dis_feat_loss: 0.0085 | Spend Time:0.12s
Iteration: 63 | Class loss: 0.4284 | Reg loss: 0.7234 | dis_class_loss: 0.0252 | dis_reg_loss: 0.0320 | dis_feat_loss: 0.0224 | Sp

Iteration: 114 | Class loss: 0.5148 | Reg loss: 0.4843 | dis_class_loss: 0.0044 | dis_reg_loss: 0.0088 | dis_feat_loss: 0.0218 | Spend Time:0.11s
Iteration: 115 | Class loss: 0.5639 | Reg loss: 0.6149 | dis_class_loss: 0.0397 | dis_reg_loss: 0.0453 | dis_feat_loss: 0.0140 | Spend Time:0.11s
Iteration: 116 | Class loss: 0.7690 | Reg loss: 0.7509 | dis_class_loss: 0.0041 | dis_reg_loss: 0.0149 | dis_feat_loss: 0.0226 | Spend Time:0.11s
Iteration: 117 | Class loss: 0.2375 | Reg loss: 0.1629 | dis_class_loss: 0.0089 | dis_reg_loss: 0.1967 | dis_feat_loss: 0.0124 | Spend Time:0.11s
Iteration: 118 | Class loss: 0.0573 | Reg loss: 0.0000 | dis_class_loss: 0.0072 | dis_reg_loss: 0.0191 | dis_feat_loss: 0.0182 | Spend Time:0.11s
Iteration: 119 | Class loss: 0.4122 | Reg loss: 0.2019 | dis_class_loss: 0.0070 | dis_reg_loss: 0.0074 | dis_feat_loss: 0.0124 | Spend Time:0.11s
Iteration: 120 | Class loss: 0.8177 | Reg loss: 0.3658 | dis_class_loss: 0.0121 | dis_reg_loss: 0.0182 | dis_feat_loss: 0.02

Iteration: 171 | Class loss: 0.2950 | Reg loss: 0.7023 | dis_class_loss: 0.0881 | dis_reg_loss: 0.0384 | dis_feat_loss: 0.0256 | Spend Time:0.11s
Iteration: 172 | Class loss: 0.6364 | Reg loss: 0.4095 | dis_class_loss: 0.0000 | dis_reg_loss: 0.0000 | dis_feat_loss: 0.0112 | Spend Time:0.11s
Iteration: 173 | Class loss: 0.6238 | Reg loss: 0.3924 | dis_class_loss: 0.0016 | dis_reg_loss: 0.1175 | dis_feat_loss: 0.0108 | Spend Time:0.12s
Iteration: 174 | Class loss: 0.4715 | Reg loss: 0.4051 | dis_class_loss: 0.0183 | dis_reg_loss: 0.2062 | dis_feat_loss: 0.0094 | Spend Time:0.11s
Iteration: 175 | Class loss: 0.3910 | Reg loss: 0.2935 | dis_class_loss: 0.0230 | dis_reg_loss: 0.0202 | dis_feat_loss: 0.0208 | Spend Time:0.11s
Iteration: 176 | Class loss: 0.2277 | Reg loss: 0.2204 | dis_class_loss: 0.0005 | dis_reg_loss: 0.0375 | dis_feat_loss: 0.0140 | Spend Time:0.11s
Iteration: 177 | Class loss: 0.2908 | Reg loss: 0.2340 | dis_class_loss: 0.0042 | dis_reg_loss: 0.0132 | dis_feat_loss: 0.01

Iteration: 228 | Class loss: 0.3460 | Reg loss: 0.3186 | dis_class_loss: 0.0061 | dis_reg_loss: 0.0053 | dis_feat_loss: 0.0186 | Spend Time:0.11s
Iteration: 229 | Class loss: 0.6460 | Reg loss: 0.3703 | dis_class_loss: 0.0205 | dis_reg_loss: 0.0067 | dis_feat_loss: 0.0292 | Spend Time:0.11s
Iteration: 230 | Class loss: 0.1034 | Reg loss: 0.1823 | dis_class_loss: 0.0132 | dis_reg_loss: 0.0436 | dis_feat_loss: 0.0273 | Spend Time:0.10s
Iteration: 231 | Class loss: 0.9467 | Reg loss: 0.5863 | dis_class_loss: 0.0069 | dis_reg_loss: 0.0210 | dis_feat_loss: 0.0202 | Spend Time:0.11s
Iteration: 232 | Class loss: 1.3456 | Reg loss: 0.7967 | dis_class_loss: 0.0064 | dis_reg_loss: 0.0080 | dis_feat_loss: 0.0216 | Spend Time:0.11s
Iteration: 233 | Class loss: 0.9130 | Reg loss: 0.6077 | dis_class_loss: 0.0158 | dis_reg_loss: 0.0139 | dis_feat_loss: 0.0194 | Spend Time:0.11s
Iteration: 234 | Class loss: 0.3251 | Reg loss: 0.2112 | dis_class_loss: 0.0559 | dis_reg_loss: 0.0968 | dis_feat_loss: 0.02

In [10]:
grads = grads / len(dataset)

In [23]:
from collections import defaultdict

all_grads = defaultdict()
index = 0
for name, p in retinanet.named_parameters():
    if "prev_model" not in name and "bn" not in name and p.requires_grad:
        n_param = p.numel()
        all_grads[name] = grads[index:index+n_param].view_as(p).cpu()
        index += n_param

In [26]:
all_grads['conv1.weight'].max()

tensor(5.6232)

In [None]:
img = dataset_train.load_image(74)
ann = dataset_train.load_annotations(74)
data = {'img': img, 'annot': ann}
data = dataset_train.transform(data)

In [None]:
data['annot'].cuda().unsque`eze(dim=0).shape

In [None]:
dataset_train.cocoHelper.classes

In [None]:
retinanet.each_cat_loss = True
classification_loss, _ = retinanet([data['img'].permute(2, 0, 1).cuda().float().unsqueeze(dim=0), data['annot'].cuda().unsqueeze(dim=0)])

In [None]:
classification_loss

# 分別為每個類別計算loss

In [None]:
import time
import pickle
import numpy as np
retinanet.train()
retinanet.freeze_bn()

dataset = dataset_train

fail_id = []
losses = [collections.defaultdict() for _ in dataset.seen_class_id]

retinanet.each_cat_loss = True
for idx, data in enumerate(dataset):

    try:
        with torch.cuda.device(0):
            if torch.cuda.is_available():
                classification_loss, _ = retinanet([data['img'].permute(2, 0, 1).cuda().float().unsqueeze(dim=0), data['annot'].cuda().unsqueeze(dim=0)])
            else:
                print('not have gpu')
                break


            img_id = dataset.image_ids[idx]

            for key in classification_loss.keys():
                losses[key][img_id] = float(np.mean(classification_loss[key]))

            print(idx)

    except Exception as e:
        print(e)
        fail_id.append(idx)
        continue

print('fail_id:',fail_id)


with open(os.path.join("/".join(get_checkpoint_path(method, 1, 50,data_split).split('/')[:-1]), 'losses_each_cat_new.pickle'), 'wb') as f:
    pickle.dump(losses, f)
#print(losses)

In [None]:
classification_loss.keys()

In [None]:
from collections import defaultdict

In [None]:
with open(os.path.join("/".join(get_checkpoint_path(method, 1, 50,data_split).split('/')[:-1]), 'losses_each_cat.pickle'), 'rb') as f:
    losses = pickle.load(f)

In [None]:
for i in losses:
    print(len(i))

In [None]:
losses

In [None]:
os.path.join("/".join(get_checkpoint_path(method, 1, 50).split('/')[:-1]), 'losses.pickle')

In [None]:
losses = [v[2] for v in losses.values()]

In [None]:
method = 'w_distillation'

for i in range(50,51,10):
    print(np.mean(readCheckpoint(method, 1, i)))



In [None]:
method = 'incremental'

for i in range(10,31,10):
    print(np.mean(readCheckpoint(method, 1, i)))

In [None]:
method = 'incremental'

for i in range(10,51,10):
    print(np.mean(readCheckpoint(method, 0, i)))



In [None]:
np.mean(readCheckpoint(method, 0, i)

In [None]:
parts = []

for i in range(0,4):
    parts.append(retinanet.classificationModel.output.weight.data[i*9: i*9 + 9,:,:,:])

In [None]:
import torch.nn as nn
output = nn.Conv2d(256, 9 * 20, kernel_size=3, padding=1)

In [None]:
output.weight.data[0:9,:,:,:] = parts[3]
#vehicle(7)
for i in range(0,7):
    output.weight.data[9 + i*9:9 + i*9 + 9,:,:,:] = parts[0]
#furniture(6)
for i in range(0,6):
    output.weight.data[72 + i*9:72 + i*9 + 9,:,:,:] = parts[1]
#animals(6)
for i in range(0,6):
    output.weight.data[126 + i*9:126 + i*9 + 9,:,:,:] = parts[2]

In [None]:
(output.weight.data[72:81,:,:,:] == output.weight.data[81:90,:,:,:]).any()

In [None]:
import copy
prev_model = copy.deepcopy(retinanet)
retinanet.increase_class(1)

retinanet.cuda()
prev_model.cuda()

In [None]:
import torch
test = torch.ones(2,256,30,30).cuda()

In [None]:
prev_out = prev_model.classificationModel.output_act(prev_model.classificationModel.output(test))
cur_out = retinanet.classificationModel.output_act(retinanet.classificationModel.output(test))

In [None]:

def change_shape1(out, num_classes):
    out1 = out.permute(0, 2, 3, 1)
    batch_size, width, height, channels = out1.shape
    out1 = out1.view(batch_size, width, height, 9, num_classes)
    
    return out1.contiguous().view(2, -1, num_classes)
def change_shape2(out, num_classes):
    out1 = out.permute(0, 2, 3, 1)
    batch_size, width, height, channels = out1.shape
    out1 = out1.view(batch_size, width, height,num_classes, 9)
    
    out1 = out1.permute(0, 1, 2, 4, 3)
    
    return out1.contiguous().view(2, -1, num_classes)
prev_out_new1 = change_shape1(prev_out, 19)
cur_out_new1 = change_shape1(cur_out, 20)

# prev_out_new2 = change_shape2(prev_out, 19)
# cur_out_new2 = change_shape2(cur_out, 20)
# (prev_out == cur_out[:,:171,:,:]).any()