# Testing Original Code 

Found some discrepancies in code and paper description, so here we will be using a varied version of their main.py to print and keep track of exactly how data is used

In [5]:
import os
import torch
import yaml
from tqdm import tqdm
from datasets import get_dataloaders
from eval import eval_model
from methods import get_model
from models import get_net_optimizer_scheduler
from utils.density import GaussianDensityTorch
import warnings

In [3]:
def get_inputs_labels(data):
    """Processes input data to handle both single-task and multi-task scenarios

    This function is crucial for handling data augmentation and task transitions:
    - For single task data (normal samples): assigns label 0
    - For multi-task data (augmented/multiple categories): assigns sequential labels

    Args:
        data: Either a single tensor or list of tensors
            - Single tensor: normal samples from current task
            - List of tensors: samples from multiple tasks/augmentations

    Returns:
        tuple: (processed inputs, corresponding labels)
    """
    if isinstance(data, list):
        # Multi-task scenario: each element represents different task/augmentation
        inputs = [x.to(args.device) for x in data]
        # Creates sequential labels (0,1,2...) for each task
        labels = torch.arange(len(inputs), device=args.device)
        labels = labels.repeat_interleave(inputs[0].size(0))
        inputs = torch.cat(inputs, dim=0)
    else:
        # Single task scenario: all data from same task (normal samples)
        inputs = data.to(args.device)
        # All normal samples get label 0
        labels = torch.zeros(inputs.size(0), device=args.device).long()
    return inputs, labels


def get_args():
    """
    Alternative get_args() function used to get arguments for testing in Jupyter
    Returns:
    """

    class arguments():
        def __init__(self):
            self.config_file = './configs/cad.yaml'
            self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
            self.data_dir = "../mvtec_anomaly_detection"
            self.mtd_dir = "../datasets/mtd_ano_mask"
            self.save_checkpoint = True
            self.save_path = "./checkpoints"
            self.noise_ratio = 0
            self.seed = 42

        def str2bool(self, v):
            return v.lower() in ("yes", "true", "t", "1")


def main(args):
    """Main training loop implementing the CAD framework

    Key components:
    1. Memory management for task statistics
    2. Periodic evaluation during training
    3. Support for different training methods (panda, upper bound, etc.)
    4. Task-wise distribution tracking
    """
    # Initialize model components
    net, optimizer, scheduler = get_net_optimizer_scheduler(args)
    density = GaussianDensityTorch()  # Used for anomaly score calculation
    net.to(args.device)

    # Get specific model implementation based on method argument
    model = get_model(args, net, optimizer, scheduler)

    # Initialize storage for tracking tasks and distributions
    dataloaders_train = []  # Stores training dataloaders for all tasks
    dataloaders_test = []  # Stores test dataloaders for all tasks
    learned_tasks = []  # Keeps track of completed tasks
    all_test_filenames = []  # Stores filenames for testing

    # Statistics storage for calculating final distribution
    task_wise_mean = []  # Stores mean embeddings per task
    task_wise_cov = []  # Stores covariance matrices per task
    task_wise_train_data_nums = []  # Stores number of samples per task

    # Main training loop over tasks
    for t in range(args.dataset.n_tasks):
        print('---' * 10, f'Task:{t}', '---' * 10)

        # Get dataloaders for current task and update storage
        # First, passes empty lists
        train_dataloader, dataloaders_train, dataloaders_test, learned_tasks, data_train_nums, all_test_filenames, train_data, test_data = \
            get_dataloaders(args, t, dataloaders_train, dataloaders_test, learned_tasks, all_test_filenames)
        task_wise_train_data_nums.append(data_train_nums)

        # Training loop for current task
        # net.train()
        # for epoch in tqdm(range(args.train.num_epochs)):
        #     one_epoch_embeds = []  # Stores embeddings from current epoch
        # 
        #     for batch_idx, (data) in enumerate(train_dataloader):
        #         inputs, labels = get_inputs_labels(data)
        #         print(labels)
        #         break
        #     break
        # break
        #     model(epoch, inputs, labels, one_epoch_embeds, t, extra_para=None)
        #
        # # Periodic evaluation during training
        # if args.train.test_epochs > 0 and (epoch + 1) % args.train.test_epochs == 0:
        #     net.eval()
        #     # Update density estimation with current embeddings
        #     density = model.training_epoch(
        #         density,
        #         one_epoch_embeds,
        #         task_wise_mean,
        #         task_wise_cov,
        #         task_wise_train_data_nums,
        #         t
        #     )
        #     # Evaluate model on all learned tasks
        #     eval_model(args, epoch, dataloaders_test, learned_tasks, net, density)
        #     net.train()

    # Save final model and density estimator
    # if args.save_checkpoint:
    #     torch.save(net, f'{args.save_path}/net.pth')
    #     torch.save(density, f'{args.save_path}/density.pth')


os.environ["CUDA_VISIBLE_DEVICES"] = '0'
with warnings.catch_warnings(action="ignore"):
    args = get_args()
    main(args)

NameError: name 'args' is not defined

In [6]:
with open('configs/cad.yaml', 'r') as f:
    data = yaml.load(f, Loader=yaml.FullLoader)
    print(data)

{'name': 'continual anomaly detection', 'dataset': {'name': 'seq-mvtec', 'image_size': 224, 'num_workers': 4, 'data_incre_setting': 'mul', 'n_classes_per_task': 3, 'n_tasks': 5, 'dataset_order': 1, 'strong_augmentation': False, 'random_aug': False}, 'model': {'name': 'vit', 'pretrained': True, 'method': 'dne', 'fix_head': True, 'with_dne': True, 'with_embeds': True, 'buffer_size': 200, 'n_feat': 304, 'fc_internal': 1024, 'n_coupling_blocks': 4, 'clamp': 3, 'n_scales': 3}, 'train': {'optimizer': {'name': 'adam', 'weight_decay': 3e-05, 'momentum': 0.9}, 'warmup_epochs': 10, 'warmup_lr': 0, 'base_lr': 0.0001, 'final_lr': 0, 'num_epochs': 50, 'batch_size': 32, 'test_epochs': 10, 'alpha': 0.4, 'beta': 0.5, 'num_classes': 2}, 'eval': {'eval_classifier': 'density', 'batch_size': 32, 'visualization': True}}


In [8]:
for key in data:
    print(key)

name
dataset
model
train
eval


In [10]:
for key in data['dataset']:
    print(key)

name
image_size
num_workers
data_incre_setting
n_classes_per_task
n_tasks
dataset_order
strong_augmentation
random_aug
