In [2]:
%load_ext autoreload
%autoreload 2

import scod
import torch
import torch.nn as nn
import numpy as np
from tqdm import trange
import datetime
import random
import pickle


from torchvision import transforms
from torch.utils.data import Subset
from torch.utils.data import DataLoader

import seaborn as sns
from matplotlib import pyplot as plt

from scripts.train_functions import create_dataloaders, main, set_up_model, load_ckp
from scripts.benchmark_functions import alg_flags, scod_flagger, ds_scod_flagger, create_scod_model, create_benchmark_seq_batches, get_Lt_J
from scripts.utils import eval_scod, init_scod


1. Take a mixed batch (x% earth, y% lens flare, z% test)\
Flag OOD f% using scod k-highest/ds-scod

2. Continual training: \
    a. Baseline method: mix new with old training\
    b. Our method: use EWC to prevent forgetting\
    Train for N epochs on mixture

3. Plot loss evolution on \
    a. Mixture\
    b. Just validation set (in distribution)\
    c. Earth background (OOD 1)\
    d. Lens flare (OOD 2)


In [3]:
dataset_path="datasets/exoromper/"

batch_size = 100
dataset_name = "exoromper"


In [4]:
dataloaders, dataset_sizes = create_dataloaders(dataset_path, batch_size, dataset_name=dataset_name)

print(dataloaders)

{'all_train': <torch.utils.data.dataloader.DataLoader object at 0x7f4d39684990>, 'all_val': <torch.utils.data.dataloader.DataLoader object at 0x7f4d07fc70d0>, 'all_test': <torch.utils.data.dataloader.DataLoader object at 0x7f4d39684a10>, 'space': <torch.utils.data.dataloader.DataLoader object at 0x7f4d39684dd0>, 'earth': <torch.utils.data.dataloader.DataLoader object at 0x7f4d39713950>, 'lens_flare': <torch.utils.data.dataloader.DataLoader object at 0x7f4d39713210>}


# Add SCOD wrapper

In [5]:
load_model_path = "./best_model/ex_v5_best_model.pt"

unc_model = create_scod_model(load_model_path, dataset_name, batch_size, force_cpu=True)

force_cpu is True. device:  cpu


  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "


Weight space dimension: 2.563e+03
computing basis
using T = 16


  0%|          | 0/2 [00:00<?, ?it/s]

# Create mixture dataset using flagged inputs

In [10]:
ood_ratio = 0.99
mix_train_ratio = 0.01
num_batches = 2

ood_num = int(batch_size*ood_ratio)
mix_train_num = int(batch_size*mix_train_ratio)

batch_compositions = [  {'space':batch_size - ood_num,'earth':ood_num}]*num_batches
train_batch_compositions = [{'space':mix_train_num}]*num_batches

flag_limit = ood_num
algs_to_test = [lambda x: scod_flagger(x, unc_model, flag_limit=flag_limit, debug = False, dist_layer = scod.distributions.NormalMeanParamLayer(), force_cpu=True)]
algs_names = ['scod_flagged']

flagged_inputs, flagged_labels, flagged_fnames, flags = alg_flags(algs_to_test, dataset_path, batch_size, num_batches, batch_compositions)
train_inputs, train_labels, train_fnames = create_benchmark_seq_batches(dataset_path, mix_train_num, num_batches, train_batch_compositions)
for k in range(len(algs_to_test)):
    for i in range(num_batches):
        flagged_subset = [(ts, lbl) for (ts,lbl,flg) in zip(flagged_inputs[i], flagged_labels[i], flags[i][k]) if flg]
        train_subset = [(ts, lbl) for (ts,lbl) in zip(train_inputs[i], train_labels[i])]
        refine_subset = flagged_subset + train_subset
        random.shuffle(refine_subset)

        mix_name = "mixture_"+str(batch_compositions[i])+'_'+algs_names[k]+'_'+str(len(flagged_subset))+'_train_'+str(len(train_subset))+"_batch_"+str(i)
        
        dataloaders[mix_name] = DataLoader(refine_subset, batch_size=batch_size, shuffle=True, num_workers=8)
        dataset_sizes[mix_name]  = len(refine_subset)
        print("Added ", mix_name, " to list of dataloaders.")

Test seq  0  algorithm  0  flagged:  [True, False, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True]
Test seq  1  algorithm  0  flagged:  [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True,

# Resume training

In [12]:

for b in range(num_batches):
    initialized_model, exp_lr_scheduler, sgd_optimizer, criterion, device = set_up_model(dataset_name, force_cpu=True)
    load_model_path = "./best_model/ex_v5_best_model.pt"
    model, optimizer, start_epoch_idx, valid_loss = load_ckp(load_model_path, initialized_model, sgd_optimizer)
    print("start_epoch_idx = ", start_epoch_idx)
    print("valid_loss = {:.6f}".format(valid_loss))

    start_epochs = start_epoch_idx
    num_epochs = 15
    scheduler = exp_lr_scheduler

    phases = [('train',"mixture_{'space': 90, 'earth': 10}_scod_flagged_10_train_100_batch_"+str(b)), 
            ('val','space'), ('val','earth'), ('val','lens_flare')]

    # phases = [('train',"mixture_{'space': 1, 'earth': 99}_scod_flagged_99_train_1_batch_"+str(b)), 
    #         ('val','space'), ('val','earth'), ('val','lens_flare')]
            

    losses = {p[1]: np.zeros(num_epochs) for p in phases}

    # Resume training
    for epoch in range(start_epochs, start_epochs+num_epochs):
        print('Epoch {}/{}'.format(epoch+1, start_epochs+num_epochs))
        print('-' * 10)
        for phase in phases:
            if phase[0] == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()

            running_loss = 0.0

            # batch loop
            for d in dataloaders[phase[1]]:
                if len(d) == 2:
                    inputs, labels = d 
                elif len(d) == 3:
                    inputs, labels, fnames = d 
                else:
                    raise ValueError("Unrecognized. Dataloader entry has length {}".format(len(d)))

                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase[0] == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    if phase[0] == 'train':
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item() * inputs.size(0)
            epoch_loss = running_loss / dataset_sizes[phase[1]]
            print('{} {} Loss: {:.4f}'.format(phase[0], phase[1], epoch_loss))
            if phase[0] == 'train':
                scheduler.step()
            losses[phase[1]][epoch-start_epochs] = epoch_loss

    # Save train losses and valid losses
    losses_path = "./losses/"
    fname = "ours_batch_"+str(b)+"_"+datetime.datetime.now().strftime("%Y%m%d-%H%M%S") +'.npz' 
    with open(losses_path+fname, "wb") as fp:
        pickle.dump(losses, fp)



force_cpu is True. device:  cpu
start_epoch_idx =  34
valid_loss = 2.141914
Epoch 35/49
----------
train mixture_{'space': 90, 'earth': 10}_scod_flagged_10_train_100_batch_0 Loss: 4.5493
val space Loss: 3.5746
val earth Loss: 65.3320
val lens_flare Loss: 29.5189
Epoch 36/49
----------
train mixture_{'space': 90, 'earth': 10}_scod_flagged_10_train_100_batch_0 Loss: 4.5989
val space Loss: 5.5236
val earth Loss: 65.5361
val lens_flare Loss: 32.5425
Epoch 37/49
----------
train mixture_{'space': 90, 'earth': 10}_scod_flagged_10_train_100_batch_0 Loss: 4.4752
val space Loss: 6.6172
val earth Loss: 64.5890
val lens_flare Loss: 33.4607
Epoch 38/49
----------
train mixture_{'space': 90, 'earth': 10}_scod_flagged_10_train_100_batch_0 Loss: 4.7625
val space Loss: 7.1665
val earth Loss: 61.7315
val lens_flare Loss: 32.9585
Epoch 39/49
----------
train mixture_{'space': 90, 'earth': 10}_scod_flagged_10_train_100_batch_0 Loss: 4.5508
val space Loss: 8.3264
val earth Loss: 62.5490
val lens_flare Los

# Resume training in a new way

In [None]:
initialized_model, exp_lr_scheduler, sgd_optimizer, criterion, device = set_up_model(dataset_name)
load_model_path = "./best_model/ex_v5_best_model.pt"
model, optimizer, start_epoch_idx, valid_loss = load_ckp(load_model_path, initialized_model, sgd_optimizer)
print("start_epoch_idx = ", start_epoch_idx)
print("valid_loss = {:.6f}".format(valid_loss))

In [None]:
start_epochs = start_epoch_idx
num_epochs = 1
scheduler = exp_lr_scheduler

# Resume training on mixture
phases = [('train',"mixture_{'space': 90, 'earth': 10}_scod_flagged_10_train_100_batch_0"), 
            ('val','space'), ('val','earth'), ('val','lens_flare')]

losses = {p[1]: np.zeros(num_epochs) for p in phases}

dist_layer = scod.distributions.NormalMeanParamLayer()

# Resume training
for epoch in range(start_epochs, start_epochs+num_epochs):
    print('Epoch {}/{}'.format(epoch+1, start_epochs+num_epochs))
    print('-' * 10)

    # Create new unc model
    new_unc_model = init_scod(model, dataloaders["all_train"], dataset_name) 

    for phase in phases:
        if phase[0] == 'train':
            model.train()  # Set model to training mode
        else:
            model.eval()

        running_loss = 0.0

        # batch loop
        for d in dataloaders[phase[1]]:
            if len(d) == 2:
                inputs, labels = d 
            elif len(d) == 3:
                inputs, labels, fnames = d 
            else:
                raise ValueError("Unrecognized. Dataloader entry has length {}".format(len(d)))

            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase[0] == 'train'):
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                

                if phase[0] == 'train':
                    for i in inputs[:2]:
                        unc = eval_scod(i, unc_model, dist_layer)
                        unc = unc.item()
                        print("Uncertainty: ", unc)
                        LtJ = get_Lt_J(i.unsqueeze(0), unc_model, debug=False, dist_layer=dist_layer)
                        orig_trace = np.trace(LtJ)
                        print("LtJ shape is ", np.shape(LtJ))

                        new_unc = eval_scod(i, new_unc_model, dist_layer)
                        new_unc = new_unc.item()
                        print("New Uncertainty: ", new_unc)
                        new_LtJ = get_Lt_J(i.unsqueeze(0), new_unc_model, debug=False, dist_layer=dist_layer)
                        diff = np.trace(new_LtJ - LtJ)
                        print("Trace is ", diff/orig_trace)


                    loss.backward()
                    optimizer.step()
            running_loss += loss.item() * inputs.size(0)
        epoch_loss = running_loss / dataset_sizes[phase[1]]
        print('{} {} Loss: {:.4f}'.format(phase[0], phase[1], epoch_loss))
        if phase[0] == 'train':
            scheduler.step()
        losses[phase[1]][epoch-start_epochs] = epoch_loss

print(losses)