# Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import math

from tqdm import tqdm
import time

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from copy import deepcopy

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split

from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.linear_model import SGDOneClassSVM
from sklearn.base import clone

from scipy.spatial.distance import cdist
from scipy.stats import ks_2samp
from scipy.optimize import minimize
from scipy.stats import wasserstein_distance

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.1 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/opt/conda/lib/python3.11/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/opt/conda/lib/python3.11/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/opt/conda/lib/python3.11/site-packages/ipykernel/kernelapp.py", line 739, in start
    self.io_loop.start()
  File "/opt/conda/lib/python3.11/site-packages/tornado

In [3]:
X_train = np.load('data/x_train.npy')
y_train = np.load('data/y_train.npy')

X_test = np.load('data/x_test.npy')
y_test = np.load('data/y_test.npy')

X = np.concatenate([X_train, X_test], axis=0)
y = np.concatenate([y_train, y_test], axis=0)

y = np.where(y == 6, 0, 1)

# Setup

In [4]:
def create_phi(normal_data, c):
    """
    Concept creation function for normal data.
    Uses k-Means clustering to partition normal data into c clusters.
    
    Args:
        normal_data (numpy array): The normal data points.
        c (int): Number of desired normal concepts.
    
    Returns:
        list of numpy arrays: List of normal clusters.
    """
    kmeans = KMeans(n_clusters=c, random_state=42)
    labels = kmeans.fit_predict(normal_data)
    
    normal_concepts = [normal_data[labels == i] for i in range(c)]
    print("Finished creating normal concepts")
    
    return normal_concepts


def create_gamma(anomaly_data, c):
    """
    Concept creation function for anomaly data.
    Uses k-Means clustering to partition anomaly data into c clusters.
    
    Args:
        anomaly_data (numpy array): The anomaly data points.
        c (int): Number of desired anomaly concepts.
    
    Returns:
        list of numpy arrays: List of anomaly clusters.
    """
    kmeans = KMeans(n_clusters=c, random_state=42)
    labels = kmeans.fit_predict(anomaly_data)
    
    anomaly_concepts = [anomaly_data[labels == i] for i in range(c)]
    print("Finished creating anomaly concepts")
    
    return anomaly_concepts
    
def match_lambda(anomaly_concepts, normal_concepts):
    """
    Matches each normal concept with the closest anomaly concept.
    Uses Euclidean distance to determine the best match.
    
    Args:
        anomaly_concepts (list of numpy arrays): List of anomaly clusters.
        normal_concepts (list of numpy arrays): List of normal clusters.
    
    Returns:
        list of tuples: Pairs of (normal_concept, matched_anomaly_concept)
    """
    pairs = []
    remaining_anomalies = anomaly_concepts.copy()

    for normal_concept in normal_concepts:
        normal_centroid = np.mean(normal_concept, axis=0)
        anomaly_centroids = [np.mean(ac, axis=0) for ac in remaining_anomalies]

        distances = cdist([normal_centroid], anomaly_centroids, metric='euclidean')[0]
        closest_idx = np.argmin(distances)

        pairs.append((normal_concept, remaining_anomalies[closest_idx]))
        remaining_anomalies.pop(closest_idx)

    print("Finished matching concept pairs")
    
    return pairs

def lifelong_roc_auc(R):
    """
    Computes the Lifelong ROC-AUC metric.
    
    Args:
        R (numpy array): NxN matrix of ROC-AUC scores, where R[i, j] is the model's 
                         performance on concept j after learning concept i.
    
    Returns:
        float: Lifelong ROC-AUC score.
    """
    N = R.shape[0]
    lower_triangular_sum = np.sum(np.tril(R))
    normalization_factor = (N * (N + 1)) / 2

    return lower_triangular_sum / normalization_factor

def BWT(R):
    """
    Computes the Backward Transfer (BWT) score.
    
    Args:
        R (numpy array): NxN results matrix.
    
    Returns:
        float: BWT score.
    """
    N = R.shape[0]
    backward_transfer = 0
    count = 0

    for i in range(1, N):
        for j in range(i):
            backward_transfer += (R[i, j] - R[j, j])
            count += 1

    return backward_transfer / count if count > 0 else 0

def FWT(R):
    """
    Computes the Forward Transfer (FWT) score.
    
    Args:
        R (numpy array): NxN results matrix.
    
    Returns:
        float: FWT score.
    """
    N = R.shape[0]
    forward_transfer = 0
    count = 0

    for i in range(N):
        for j in range(i + 1, N): 
            forward_transfer += R[i, j]
            count += 1

    return forward_transfer / count if count > 0 else 0 

def kolmogorov_smirnov_test(X_old, X_new, alpha=0.05):
    """Detect concept drift using KS-test on feature distributions."""
    
    p_values = [ks_2samp(X_old[:, i], X_new[:, i]).pvalue for i in range(X_old.shape[1])]
    return np.any(np.array(p_values) < alpha)

def histogram_binning(X, bins=25):
    """Convert sample distributions into histograms."""
    
    return np.array([np.histogram(X[:, i], bins=bins, density=True)[0] for i in range(X.shape[1])]).T

def kl_divergence(P, Q):
    """Compute KL divergence between two distributions."""
    
    P, Q = np.clip(P, 1e-10, None), np.clip(Q, 1e-10, None)  # Avoid log(0)
    return np.sum(P * np.log(P / Q))

def strategic_sample_selection(X_old, X_new, top_k=100, learning_rate=0.01, num_iterations=100):
    """
    Selects representative new samples by minimizing KL divergence.
    
    Args:
        X_old (numpy.ndarray): Old memory buffer samples.
        X_new (numpy.ndarray): Incoming new samples.
        top_k (int): Number of samples to retain.
        learning_rate (float): Step size for optimization.
        num_iterations (int): Number of optimization steps.

    Returns:
        numpy.ndarray: Selected representative new samples.
    """
    
    H_old, H_new = histogram_binning(X_old), histogram_binning(X_new)
    m_n = np.random.rand(H_new.shape[0])  

    def loss_function(m_n):
        """Computes KL divergence loss for selected samples."""
        weighted_H_new = H_new * m_n[:, np.newaxis]  
        combined_H = (H_old + weighted_H_new) / 2 
        return kl_divergence(H_new, combined_H) 

    progress_bar = tqdm(total=num_iterations, desc="Optimizing Sample Selection", position=0, leave=True)

    def callback(xk):
        progress_bar.update(1)  

    result = minimize(loss_function, m_n, method="L-BFGS-B", bounds=[(0, 1)] * len(m_n), 
                      options={"maxiter": num_iterations, "ftol": 1e-10}, callback=callback)

    progress_bar.close()

    selected_indices = np.argsort(result.x)[-top_k:]

    return X_new[selected_indices] 


def update_memory_buffer(X_old, X_new_selected, memory_size=3000):
    """Updates memory buffer using strategic forgetting."""
    updated_buffer = np.vstack((X_old, X_new_selected))  

    if updated_buffer.shape[0] > memory_size:
        updated_buffer = updated_buffer[-memory_size:]

    return updated_buffer

class HierarchicalMemory:
    def __init__(self, memory_limit=5000, pyramid_factor=2, centroids_per_concept=10):
        self.memory_limit = memory_limit
        self.pyramid_factor = pyramid_factor
        self.centroids_per_concept = centroids_per_concept
        self.memory = {}  # level: [concept1, concept2, ...]

    def add_concept(self, data, level=1):
        if level not in self.memory:
            self.memory[level] = []
        self.memory[level].append(np.array(data))
        self._summarize_memory()

    def _pyramidal_allocation(self):
        levels = sorted(self.memory.keys())
        weights = np.array([1 / (self.pyramid_factor ** (lvl - 1)) for lvl in levels])
        total_weight = weights.sum()
        allocations = (weights / total_weight) * self.memory_limit
        return {lvl: int(alloc) for lvl, alloc in zip(levels, allocations)}

    def _summarize_concept(self, concept, n_samples):
        if len(concept) <= n_samples:
            return concept
        kmeans = KMeans(n_clusters=min(self.centroids_per_concept, len(concept)), random_state=42).fit(concept)
        centroids = kmeans.cluster_centers_
        distances = np.linalg.norm(concept[:, None] - centroids, axis=2)
        closest_indices = np.argmin(distances, axis=0)
        summarized = concept[closest_indices]
        return summarized

    def _summarize_memory(self):
        allocations = self._pyramidal_allocation()
        for level, concepts in self.memory.items():
            summarized_level = []
            alloc_per_concept = max(1, allocations[level] // len(concepts))
            for concept in concepts:
                summarized = self._summarize_concept(concept, alloc_per_concept)
                summarized_level.append(summarized)
            self.memory[level] = summarized_level

    def get_all_memory(self):
        all_data = []
        for level_concepts in self.memory.values():
            for concept in level_concepts:
                all_data.append(concept)
        return np.vstack(all_data) if all_data else np.empty((0,))

def scenario_design(normal_data, anomaly_data, c):
    """
    Implements Algorithm 1 to create a lifelong learning scenario.
    
    Args:
        normal_data (numpy array): The normal data points.
        anomaly_data (numpy array): The anomaly data points.
        c (int): Number of desired concepts.
    
    Returns:
        list of tuples: List of (normal_concept, anomaly_concept) pairs forming the scenario.
    """
    normal_concepts = create_phi(normal_data, c)
    anomaly_concepts = create_gamma(anomaly_data, c)
    
    scenario = match_lambda(anomaly_concepts, normal_concepts)
    
    return scenario

def evaluation_protocol(T, E, Y, model, strategy="naive", replay_buffer_size=5000, memory_size=5000, alpha=0.05):
    """
    Implements Algorithm 2: Lifelong Learning Evaluation Protocol with multiple strategies.
    
    Args:
        T (list): Sequence of N training sets.
        E (list): Sequence of N testing sets.
        Y (list): Sequence of true labels for test sets.
        model (sklearn.base.BaseEstimator): A scikit-learn-like model instance that supports `fit` and `decision_function`.
        strategy (str): Strategy for training.
        replay_buffer_size (int): Maximum size of replay buffer if applicable
        memory_size (int): Maximum memory size if applicable
        alpha (float): KS-test threshold for drift detection.

    Returns:
        numpy array: NxN results matrix R where R[i, j] is ROC-AUC of model on E[j] after learning T[i].
    """
    N = len(T)
    R = np.zeros((N, N))  

    if strategy in ["cumulative"]:
        cumulative_data = []
    
    if strategy in ["replay"]:
        replay_buffer = []

    if strategy == "SSF":
        memory_buffer = None 

    if strategy == "hierarchical":
        h_memory = HierarchicalMemory(memory_limit=memory_size, pyramid_factor=2, centroids_per_concept=10)

    for i, Ti in tqdm(enumerate(T), desc=f"Evaluating using {strategy} strategy"):
        current_model = deepcopy(model)

        # -- NAIVE --
        if strategy == "naive":
            current_model.fit(Ti)

        # -- CUMULATIVE --
        elif strategy == "cumulative":
            cumulative_data.extend(Ti.tolist())
            current_model.fit(np.array(cumulative_data)) 

        # -- REPLAY -- 
        elif strategy == "replay":
            if replay_buffer:
                combined_data = np.vstack((np.array(replay_buffer), Ti))
            else:
                combined_data = Ti

            current_model.fit(combined_data)
            replay_buffer.extend(Ti.tolist())

            if len(replay_buffer) > replay_buffer_size:
                replay_buffer = replay_buffer[-replay_buffer_size:]
        
        # -- SSF -- 
        elif strategy == "SSF":
            if memory_buffer is None:
                memory_buffer = Ti[:memory_size]  
            else:
                drift_detected = kolmogorov_smirnov_test(memory_buffer, Ti, alpha)
                if drift_detected:
                    X_new_selected = strategic_sample_selection(memory_buffer, Ti, top_k=1000)
                    memory_buffer = update_memory_buffer(memory_buffer, X_new_selected, memory_size=memory_size)
            memory_buffer = np.unique(memory_buffer, axis=0)
            current_model.fit(memory_buffer)

        # -- HIERARCHICAL --
        elif strategy == "hierarchical":

            memory_data = h_memory.get_all_memory()
            if memory_data.size == 0:
                drift_level = 1
            else:
                drift_distances = [
                    wasserstein_distance(Ti[:, d], memory_data[:, d])
                    for d in range(Ti.shape[1])
                ]
                drift_score = np.mean(drift_distances)
                print(f"drift: {drift_score}")
                
                if drift_score < 0.05:
                    drift_level = 1
                elif drift_score < 0.1:
                    drift_level = 2
                elif drift_score < 0.2:
                    drift_level = 3
                else:
                    drift_level = 4
        
            h_memory.add_concept(Ti, level=drift_level)
            summarized_memory = h_memory.get_all_memory()
            current_model.fit(summarized_memory)

        # -- Evaluation --
        for j, ((Ej_normal, Ej_anomaly), (y_normal, y_anomaly)) in enumerate(zip(E, Y)):
            test_data = np.vstack((Ej_normal, Ej_anomaly))
            test_labels = np.hstack((y_normal, y_anomaly))  
        
            scores = -current_model.decision_function(test_data)  
            R[i, j] = roc_auc_score(test_labels, scores)

    return R


# Experiments

In [5]:
num_concepts = 5

X_normal = X[y == 0]  
X_anomaly = X[y == 1]

normal_concepts = create_phi(X_normal, num_concepts)
anomaly_concepts = create_gamma(X_anomaly, num_concepts)

concept_pairs = match_lambda(anomaly_concepts, normal_concepts)

T = []  
E = [] 
Y = []

for normal, anomaly in concept_pairs:

    normal_train, normal_test = train_test_split(normal, test_size=0.3, random_state=42)
    anomaly_train, anomaly_test = train_test_split(anomaly, test_size=0.3, random_state=42)  

    T.append(normal_train)
    E.append((normal_test, anomaly_test))

    y_normal_test = np.zeros(len(normal_test))
    y_anomaly_test = np.ones(len(anomaly_test))
    
    Y.append((y_normal_test, y_anomaly_test))

Finished creating normal concepts
Finished creating anomaly concepts
Finished matching concept pairs


# Eval

## LOF

In [6]:
R_hm = evaluation_protocol(T, E, Y, LocalOutlierFactor(n_neighbors=20, novelty=True), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 1it [00:00,  1.63it/s]

drift: 0.2009141688186157


Evaluating using hierarchical strategy: 2it [00:01,  1.59it/s]

drift: 0.6825707131791178


Evaluating using hierarchical strategy: 3it [00:01,  1.50it/s]

drift: 0.5369153546908596


Evaluating using hierarchical strategy: 4it [00:02,  1.45it/s]

drift: 0.5778725606766417


Evaluating using hierarchical strategy: 5it [00:03,  1.48it/s]

Lifelong ROC-AUC: 0.6058086718178083, BWT: -0.05192413793713711, FWT: 0.8173483540003452





In [26]:
R_ssf = evaluation_protocol(T, E, Y, LocalOutlierFactor(n_neighbors=20, novelty=True), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")


[Aluating using SSF strategy: 0it [00:00, ?it/s]
Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 273.05it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 256.08it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 276.87it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 326.53it/s]

Evaluating using SSF strategy: 5it [00:05,  1.14s/it]

Lifelong ROC-AUC: 0.7544561432893502, BWT: -0.0039135441339650965, FWT: 0.45301001764958054





In [6]:
R_naive = evaluation_protocol(T, E, Y, LocalOutlierFactor(n_neighbors=20, novelty=True), strategy="naive")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 5it [00:51, 10.36s/it]

Lifelong ROC-AUC: 0.6207107852264637, BWT: -0.283093404759226, FWT: 0.377989401800772





In [7]:
R_cumulative = evaluation_protocol(T, E, Y, LocalOutlierFactor(n_neighbors=20, novelty=True), strategy="cumulative")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_cumulative)}, BWT: {BWT(R_cumulative)}, FWT: {FWT(R_cumulative)}")

Evaluating using cumulative strategy: 5it [02:10, 26.08s/it]

Lifelong ROC-AUC: 0.9238354062988797, BWT: -0.004499087364359666, FWT: 0.17379610432039533





In [8]:
R_replay = evaluation_protocol(T, E, Y, LocalOutlierFactor(n_neighbors=20, novelty=True), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 5it [00:53, 10.79s/it]

Lifelong ROC-AUC: 0.7034898501276438, BWT: -0.19004147760711937, FWT: 0.24881712704471132





## IF

In [7]:
R_hm = evaluation_protocol(T, E, Y, IsolationForest(n_estimators=100), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 1it [00:00,  3.46it/s]

drift: 0.2009141688186157


Evaluating using hierarchical strategy: 2it [00:00,  3.14it/s]

drift: 0.6825707131791178


Evaluating using hierarchical strategy: 3it [00:00,  2.95it/s]

drift: 0.5369153546908596


Evaluating using hierarchical strategy: 4it [00:01,  2.95it/s]

drift: 0.5778725606766417


Evaluating using hierarchical strategy: 5it [00:01,  3.02it/s]

Lifelong ROC-AUC: 0.7977446218811072, BWT: 0.053902109492241966, FWT: 0.6446608787208653





In [27]:
R_ssf = evaluation_protocol(T, E, Y,  IsolationForest(n_estimators=100), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")


[Aluating using SSF strategy: 0it [00:00, ?it/s]
Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 281.14it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 253.28it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 237.31it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 244.61it/s]

Evaluating using SSF strategy: 5it [00:05,  1.06s/it]

Lifelong ROC-AUC: 0.6559285630296237, BWT: 0.06868127809581366, FWT: 0.804018261100736





In [7]:
R_naive = evaluation_protocol(T, E, Y, IsolationForest(n_estimators=100), strategy="naive")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 5it [00:04,  1.09it/s]

Lifelong ROC-AUC: 0.6771475708237422, BWT: -0.22528601715677166, FWT: 0.6957773121657185





In [9]:
R_cumulative = evaluation_protocol(T, E, Y, IsolationForest(n_estimators=100), strategy="cumulative")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_cumulative)}, BWT: {BWT(R_cumulative)}, FWT: {FWT(R_cumulative)}")

Evaluating using cumulative strategy: 5it [00:07,  1.55s/it]

Lifelong ROC-AUC: 0.753897896406868, BWT: -0.0066406078135951676, FWT: 0.7719174809547474





In [10]:
R_replay = evaluation_protocol(T, E, Y, IsolationForest(n_estimators=100), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 5it [00:05,  1.05s/it]

Lifelong ROC-AUC: 0.6977725521692333, BWT: -0.16392284276343677, FWT: 0.7159532915927771





## SGDOCSVM

In [8]:
R_hm = evaluation_protocol(T, E, Y, SGDOneClassSVM(), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 2it [00:00,  8.02it/s]

drift: 0.2009141688186157
drift: 0.6825707131791178


Evaluating using hierarchical strategy: 4it [00:00,  5.20it/s]

drift: 0.5369153546908596


Evaluating using hierarchical strategy: 5it [00:00,  5.64it/s]

drift: 0.5778725606766417
Lifelong ROC-AUC: 0.835257953664189, BWT: -0.0019655490765778773, FWT: 0.9206040708906336





In [28]:
R_ssf = evaluation_protocol(T, E, Y,  SGDOneClassSVM(), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")


[Aluating using SSF strategy: 0it [00:00, ?it/s]
Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 288.13it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 307.55it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 306.85it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 271.76it/s]

Evaluating using SSF strategy: 5it [00:02,  2.14it/s]

Lifelong ROC-AUC: 0.790790866239933, BWT: -0.004091332504534584, FWT: 0.5467376505389003





In [16]:
R_naive = evaluation_protocol(T, E, Y, SGDOneClassSVM(), strategy="naive")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 5it [00:00,  5.05it/s]

Lifelong ROC-AUC: 0.6589862939199218, BWT: -0.2399821951513669, FWT: 0.5658554538516768





In [17]:
R_cumulative = evaluation_protocol(T, E, Y, SGDOneClassSVM(), strategy="cumulative")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_cumulative)}, BWT: {BWT(R_cumulative)}, FWT: {FWT(R_cumulative)}")

Evaluating using cumulative strategy: 5it [00:04,  1.03it/s]


Lifelong ROC-AUC: 0.597799790906325, BWT: -0.215598947769658, FWT: 0.6579349113663381


In [18]:
R_replay = evaluation_protocol(T, E, Y, SGDOneClassSVM(), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 5it [00:01,  2.51it/s]

Lifelong ROC-AUC: 0.653901107789708, BWT: -0.23987138665874969, FWT: 0.5783463917929488





# SLAD

In [6]:
from deepod.models.tabular import SLAD

In [9]:
R_ssf = evaluation_protocol(T, E, Y, SLAD(epochs=10), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")

Evaluating using SSF strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  9 10 12 13 14 16 17 18 20 22 24 25 26 27 28 30 31
 32 33 34 35 36 37 38 40 41 42 44 45 46 47 48 49 50 51 53 54 55 56 57 58
 59 60]
epoch  1, training loss: 0.713113, time: 1.2s
epoch 10, training loss: 0.658922, time: 1.2s
Start Inference on the training data...


Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 276.47it/s]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 2  3  4  5  6  8 10 11 12 13 14 16 17 18 19 20 21 22 23 26 27 28 30 31
 32 33 34 35 36 37 38 39 40 42 43 44 46 47 48 49 50 51 52 53 54 55 56 57
 58 59]
epoch  1, training loss: 0.689160, time: 0.2s
epoch 10, training loss: 0.635717, time: 0.2s
Start Inference on the training data...


Optimizing Sample Selection:   2%|▏         | 2/100 [00:00<00:00, 184.81it/s]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8 10 11 12 13 14 16 17 18 19 20 21 22 23 25 26 27
 28 29 30 31 34 35 36 38 39 40 41 43 45 46 47 48 49 50 52 54 56 57 58 59
 60 61]
epoch  1, training loss: 0.712030, time: 1.3s
epoch 10, training loss: 0.662940, time: 1.2s
Start Inference on the training data...


Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 303.10it/s]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  9 10 12 13 14 15 16 17 18 19 20 21 22 23 25 26 27
 28 29 30 31 32 34 35 37 39 40 41 42 43 44 45 46 47 48 50 51 53 55 57 58
 59 60]
epoch  1, training loss: 0.694377, time: 0.2s
epoch 10, training loss: 0.636759, time: 0.2s
Start Inference on the training data...


Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 274.44it/s]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 27
 28 30 31 32 33 34 36 37 38 40 41 44 45 46 47 49 51 52 53 55 56 57 58 59
 60 61]
epoch  1, training loss: 0.679085, time: 1.3s
epoch 10, training loss: 0.620422, time: 1.2s
Start Inference on the training data...


Evaluating using SSF strategy: 5it [10:17, 123.56s/it]

Lifelong ROC-AUC: 0.22979397963140194, BWT: 0.04452327661901747, FWT: 0.1780373793977931





In [10]:
R_naive = evaluation_protocol(T, E, Y, SLAD(epochs=10), strategy="naive")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  9 10 12 13 14 16 17 18 20 22 24 25 26 27 28 30 31
 32 33 34 35 36 37 38 40 41 42 44 45 46 47 48 49 50 51 53 54 55 56 57 58
 59 60]
epoch  1, training loss: 0.712536, time: 1.2s
epoch 10, training loss: 0.659050, time: 0.2s
Start Inference on the training data...


Evaluating using naive strategy: 1it [01:43, 103.43s/it]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  5  6  9 10 11 12 13 15 16 18 19 20 21 22 23 24 25 26 27 28 29
 30 31 32 33 34 37 38 39 41 42 43 45 46 48 49 51 52 53 54 55 56 57 58 59
 60 61]
epoch  1, training loss: 0.731120, time: 0.2s
epoch 10, training loss: 0.658768, time: 1.0s
Start Inference on the training data...


Evaluating using naive strategy: 2it [03:35, 108.42s/it]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  4  5  6 10 11 13 14 15 16 17 18 19 20 21 22 25 26 27 29 30 31
 32 35 36 38 39 40 41 42 43 44 45 46 47 49 50 51 52 53 54 55 56 57 58 59
 60 61]
epoch  1, training loss: 0.732390, time: 0.9s
epoch 10, training loss: 0.639096, time: 0.9s
Start Inference on the training data...


Evaluating using naive strategy: 3it [06:21, 134.70s/it]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 2  3  4  5  6  7  8  9 10 12 14 15 16 17 18 19 20 21 22 23 24 26 29 30
 31 32 34 35 36 37 38 40 41 42 43 44 45 46 47 48 49 50 51 53 54 56 57 59
 60 61]
epoch  1, training loss: 0.725860, time: 1.3s
epoch 10, training loss: 0.669007, time: 0.3s
Start Inference on the training data...


Evaluating using naive strategy: 4it [09:04, 146.09s/it]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  4  5  6  7  8  9 10 11 12 13 14 15 17 18 21 22 23 24 25 27 28 30
 31 33 34 35 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 57
 60 61]
epoch  1, training loss: 0.738106, time: 0.4s
epoch 10, training loss: 0.671427, time: 0.1s
Start Inference on the training data...


Evaluating using naive strategy: 5it [11:50, 142.19s/it]

Lifelong ROC-AUC: 0.42501030198745793, BWT: 0.3790839248665491, FWT: 0.34203425505100765





In [11]:
R_replay = evaluation_protocol(T, E, Y, SLAD(epochs=10), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  9 10 12 13 14 16 17 18 20 22 24 25 26 27 28 30 31
 32 33 34 35 36 37 38 40 41 42 44 45 46 47 48 49 50 51 53 54 55 56 57 58
 59 60]
epoch  1, training loss: 0.712536, time: 1.2s
epoch 10, training loss: 0.659050, time: 0.2s
Start Inference on the training data...


Evaluating using replay strategy: 1it [01:24, 84.78s/it]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  5  6  9 10 11 12 13 15 16 18 19 20 21 22 23 24 25 26 27 28 29
 30 31 32 33 34 37 38 39 41 42 43 45 46 48 49 51 52 53 54 55 56 57 58 59
 60 61]
epoch  1, training loss: 0.633241, time: 2.3s
epoch 10, training loss: 0.586830, time: 0.4s
Start Inference on the training data...


Evaluating using replay strategy: 2it [03:56, 124.41s/it]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  4  5  6 10 11 13 14 15 16 17 18 19 20 21 22 25 26 27 29 30 31
 32 35 36 38 39 40 41 42 43 44 45 46 47 49 50 51 52 53 54 55 56 57 58 59
 60 61]
epoch  1, training loss: 0.745461, time: 2.9s
epoch 10, training loss: 0.679813, time: 0.4s
Start Inference on the training data...


Evaluating using replay strategy: 3it [05:40, 115.07s/it]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 2  3  4  5  6  7  8  9 10 12 14 15 16 17 18 19 20 21 22 23 24 26 29 30
 31 32 34 35 36 37 38 40 41 42 43 44 45 46 47 48 49 50 51 53 54 56 57 59
 60 61]
epoch  1, training loss: 0.734115, time: 1.5s
epoch 10, training loss: 0.652431, time: 1.1s
Start Inference on the training data...


Evaluating using replay strategy: 4it [08:46, 143.03s/it]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  4  5  6  7  8  9 10 11 12 13 14 15 17 18 21 22 23 24 25 27 28 30
 31 33 34 35 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 57
 60 61]
epoch  1, training loss: 0.764536, time: 0.5s
epoch 10, training loss: 0.685621, time: 0.2s
Start Inference on the training data...


Evaluating using replay strategy: 5it [11:35, 139.16s/it]

Lifelong ROC-AUC: 0.375337019597195, BWT: 0.27761302291808937, FWT: 0.425066935526481





# ICL

In [12]:
from deepod.models.tabular import ICL

In [13]:
R_ssf = evaluation_protocol(T, E, Y, ICL(epochs=10), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")

Evaluating using SSF strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=51, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing: 100%|██████████| 42/42 [00:00<00:00, 676.29it/s]

testing: 100%|██████████| 42/42 [00:00<00:00, 686.72it/s]

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  41%|████      | 69/168 [00:00<00:00, 683.96it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 680.37it/s][A

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  41%|████      | 69/168 [00:00<00:00, 686.76it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 683.30it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  19%|█▊        | 69/370 [00:00<00:00, 684.68it/s][A
testing:  37%|███▋      | 138/370 [00:00<00:00, 686.49it/s][A
testing:  56%|█████▌    | 207/370 [00:00<00:00, 684.90it/s][A
testing:  75%|███████▍  | 276/370 [00:00<00:00, 683.12it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 683.49it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  19%|█▊        | 69/370 [00:00<00:00, 686.02it/s][A
testing:  37%|███▋      | 138/370 [00:00<00:

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=51, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing: 100%|██████████| 42/42 [00:00<00:00, 688.02it/s]

testing: 100%|██████████| 42/42 [00:00<00:00, 692.07it/s]

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  41%|████      | 69/168 [00:00<00:00, 686.64it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 681.80it/s][A

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  41%|████      | 69/168 [00:00<00:00, 687.01it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 684.28it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  19%|█▊        | 69/370 [00:00<00:00, 680.38it/s][A
testing:  37%|███▋      | 138/370 [00:00<00:00, 681.54it/s][A
testing:  56%|█████▌    | 207/370 [00:00<00:00, 681.69it/s][A
testing:  75%|███████▍  | 276/370 [00:00<00:00, 679.62it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 675.10it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  19%|█▊        | 69/370 [00:00<00:00, 684.30it/s][A
testing:  37%|███▋      | 138/370 [00:00<00:

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=51, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing: 100%|██████████| 43/43 [00:00<00:00, 684.25it/s]

testing: 100%|██████████| 43/43 [00:00<00:00, 685.73it/s]

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  41%|████      | 69/168 [00:00<00:00, 685.05it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 684.05it/s][A

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  41%|████      | 69/168 [00:00<00:00, 684.59it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 682.09it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  19%|█▊        | 69/370 [00:00<00:00, 682.33it/s][A
testing:  37%|███▋      | 138/370 [00:00<00:00, 683.06it/s][A
testing:  56%|█████▌    | 207/370 [00:00<00:00, 683.10it/s][A
testing:  75%|███████▍  | 276/370 [00:00<00:00, 682.44it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 682.10it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  19%|█▉        | 70/370 [00:00<00:00, 690.43it/s][A
testing:  38%|███▊      | 140/370 [00:00<00:

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=51, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing: 100%|██████████| 43/43 [00:00<00:00, 692.21it/s]

testing: 100%|██████████| 43/43 [00:00<00:00, 686.98it/s]

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  41%|████      | 69/168 [00:00<00:00, 686.02it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 684.03it/s][A

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  41%|████      | 69/168 [00:00<00:00, 688.03it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 687.06it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  19%|█▊        | 69/370 [00:00<00:00, 685.45it/s][A
testing:  37%|███▋      | 138/370 [00:00<00:00, 686.41it/s][A
testing:  56%|█████▌    | 207/370 [00:00<00:00, 686.97it/s][A
testing:  75%|███████▍  | 276/370 [00:00<00:00, 684.94it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 683.47it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  19%|█▊        | 69/370 [00:00<00:00, 684.32it/s][A
testing:  37%|███▋      | 138/370 [00:00<00:

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=51, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing: 100%|██████████| 44/44 [00:00<00:00, 693.96it/s]

testing: 100%|██████████| 44/44 [00:00<00:00, 689.75it/s]

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  41%|████      | 69/168 [00:00<00:00, 686.92it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 685.81it/s][A

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  42%|████▏     | 70/168 [00:00<00:00, 691.06it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 689.18it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  19%|█▊        | 69/370 [00:00<00:00, 684.41it/s][A
testing:  37%|███▋      | 138/370 [00:00<00:00, 682.50it/s][A
testing:  56%|█████▌    | 207/370 [00:00<00:00, 682.85it/s][A
testing:  75%|███████▍  | 276/370 [00:00<00:00, 681.66it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 680.20it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  19%|█▊        | 69/370 [00:00<00:00, 682.95it/s][A
testing:  37%|███▋      | 138/370 [00:00<00:

Lifelong ROC-AUC: 0.21140831558070158, BWT: 0.04032918546117241, FWT: 0.26611288741057415





In [14]:
R_naive = evaluation_protocol(T, E, Y, ICL(epochs=10), strategy="naive")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=51, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing: 100%|██████████| 42/42 [00:00<00:00, 692.34it/s]

testing: 100%|██████████| 42/42 [00:00<00:00, 688.24it/s]

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  41%|████      | 69/168 [00:00<00:00, 687.06it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 680.63it/s][A

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  41%|████      | 69/168 [00:00<00:00, 684.72it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 683.26it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  19%|█▊        | 69/370 [00:00<00:00, 681.58it/s][A
testing:  37%|███▋      | 138/370 [00:00<00:00, 680.51it/s][A
testing:  56%|█████▌    | 207/370 [00:00<00:00, 681.01it/s][A
testing:  75%|███████▍  | 276/370 [00:00<00:00, 679.76it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 679.70it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  19%|█▊        | 69/370 [00:00<00:00, 686.99it/s][A
testing:  37%|███▋      | 138/370 [00:00<00:

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=51, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing: 100%|██████████| 36/36 [00:00<00:00, 689.31it/s]

testing: 100%|██████████| 36/36 [00:00<00:00, 694.12it/s]

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  42%|████▏     | 70/168 [00:00<00:00, 690.64it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 688.39it/s][A

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  41%|████      | 69/168 [00:00<00:00, 684.22it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 683.21it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  19%|█▊        | 69/370 [00:00<00:00, 681.69it/s][A
testing:  37%|███▋      | 138/370 [00:00<00:00, 684.92it/s][A
testing:  56%|█████▌    | 207/370 [00:00<00:00, 683.99it/s][A
testing:  75%|███████▍  | 276/370 [00:00<00:00, 681.86it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 681.43it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  19%|█▊        | 69/370 [00:00<00:00, 685.53it/s][A
testing:  37%|███▋      | 138/370 [00:00<00:

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=51, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing:   0%|          | 0/81 [00:00<?, ?it/s][A
testing: 100%|██████████| 81/81 [00:00<00:00, 685.17it/s][A

testing:   0%|          | 0/81 [00:00<?, ?it/s][A
testing: 100%|██████████| 81/81 [00:00<00:00, 684.02it/s][A

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  41%|████      | 69/168 [00:00<00:00, 684.81it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 683.75it/s][A

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  42%|████▏     | 70/168 [00:00<00:00, 690.79it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 686.17it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  19%|█▊        | 69/370 [00:00<00:00, 682.85it/s][A
testing:  37%|███▋      | 138/370 [00:00<00:00, 685.36it/s][A
testing:  56%|█████▌    | 207/370 [00:00<00:00, 685.09it/s][A
testing:  75%|███████▍  | 276/370 [00:00<00:00, 683.78it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 683.00it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=51, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing: 100%|██████████| 46/46 [00:00<00:00, 687.58it/s]

testing: 100%|██████████| 46/46 [00:00<00:00, 690.67it/s]

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  41%|████      | 69/168 [00:00<00:00, 684.99it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 682.79it/s][A

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  41%|████      | 69/168 [00:00<00:00, 687.13it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 683.22it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  19%|█▊        | 69/370 [00:00<00:00, 684.12it/s][A
testing:  37%|███▋      | 138/370 [00:00<00:00, 685.41it/s][A
testing:  56%|█████▌    | 207/370 [00:00<00:00, 684.77it/s][A
testing:  75%|███████▍  | 276/370 [00:00<00:00, 683.20it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 682.33it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  19%|█▊        | 69/370 [00:00<00:00, 685.09it/s][A
testing:  37%|███▋      | 138/370 [00:00<00:

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=51, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing: 100%|██████████| 24/24 [00:00<00:00, 683.87it/s]

testing: 100%|██████████| 24/24 [00:00<00:00, 678.30it/s]

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  41%|████      | 69/168 [00:00<00:00, 689.84it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 686.81it/s][A

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  42%|████▏     | 70/168 [00:00<00:00, 691.90it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 687.98it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  19%|█▊        | 69/370 [00:00<00:00, 682.28it/s][A
testing:  37%|███▋      | 138/370 [00:00<00:00, 682.23it/s][A
testing:  56%|█████▌    | 207/370 [00:00<00:00, 683.43it/s][A
testing:  75%|███████▍  | 276/370 [00:00<00:00, 682.15it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 681.50it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  19%|█▊        | 69/370 [00:00<00:00, 688.43it/s][A
testing:  37%|███▋      | 138/370 [00:00<00:

Lifelong ROC-AUC: 0.366368888036986, BWT: 0.3468663853036834, FWT: 0.27846962518715346





In [15]:
R_replay = evaluation_protocol(T, E, Y, ICL(epochs=10), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=51, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing: 100%|██████████| 42/42 [00:00<00:00, 685.78it/s]

testing: 100%|██████████| 42/42 [00:00<00:00, 687.98it/s]

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  40%|████      | 68/168 [00:00<00:00, 677.80it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 678.98it/s][A

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  41%|████      | 69/168 [00:00<00:00, 684.95it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 682.73it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  18%|█▊        | 68/370 [00:00<00:00, 679.72it/s][A
testing:  37%|███▋      | 136/370 [00:00<00:00, 679.09it/s][A
testing:  55%|█████▌    | 205/370 [00:00<00:00, 680.07it/s][A
testing:  74%|███████▍  | 274/370 [00:00<00:00, 679.07it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 677.61it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  19%|█▊        | 69/370 [00:00<00:00, 685.14it/s][A
testing:  37%|███▋      | 138/370 [00:00<00:

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=51, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing:   0%|          | 0/78 [00:00<?, ?it/s][A
testing: 100%|██████████| 78/78 [00:00<00:00, 684.32it/s][A

testing:   0%|          | 0/78 [00:00<?, ?it/s][A
testing: 100%|██████████| 78/78 [00:00<00:00, 683.03it/s][A

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  41%|████      | 69/168 [00:00<00:00, 683.85it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 684.08it/s][A

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  41%|████      | 69/168 [00:00<00:00, 685.55it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 684.86it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  19%|█▊        | 69/370 [00:00<00:00, 684.08it/s][A
testing:  37%|███▋      | 138/370 [00:00<00:00, 686.44it/s][A
testing:  56%|█████▌    | 207/370 [00:00<00:00, 686.79it/s][A
testing:  75%|███████▍  | 276/370 [00:00<00:00, 683.99it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 683.60it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=51, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing:   0%|          | 0/158 [00:00<?, ?it/s][A
testing:  44%|████▍     | 70/158 [00:00<00:00, 692.04it/s][A
testing: 100%|██████████| 158/158 [00:00<00:00, 686.56it/s][A

testing:   0%|          | 0/158 [00:00<?, ?it/s][A
testing:  44%|████▎     | 69/158 [00:00<00:00, 688.55it/s][A
testing: 100%|██████████| 158/158 [00:00<00:00, 687.05it/s][A

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  41%|████      | 69/168 [00:00<00:00, 687.47it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 682.97it/s][A

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  42%|████▏     | 70/168 [00:00<00:00, 691.50it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 685.75it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  18%|█▊        | 68/370 [00:00<00:00, 679.42it/s][A
testing:  37%|███▋      | 137/370 [00:00<00:00, 683.55it/s][A
testing:  56%|█████▌    | 206/370 [00:00<00:00, 684.70it/s][A
testing:  74%|███████▍  | 275/370 [00:00<00:00,

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=51, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing:   0%|          | 0/124 [00:00<?, ?it/s][A
testing: 100%|██████████| 124/124 [00:00<00:00, 687.75it/s][A

testing:   0%|          | 0/124 [00:00<?, ?it/s][A
testing: 100%|██████████| 124/124 [00:00<00:00, 682.15it/s][A

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  41%|████      | 69/168 [00:00<00:00, 683.69it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 681.84it/s][A

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  41%|████      | 69/168 [00:00<00:00, 687.39it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 684.40it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  19%|█▊        | 69/370 [00:00<00:00, 687.01it/s][A
testing:  37%|███▋      | 138/370 [00:00<00:00, 687.21it/s][A
testing:  56%|█████▌    | 207/370 [00:00<00:00, 684.58it/s][A
testing:  75%|███████▍  | 276/370 [00:00<00:00, 683.37it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 683.13it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=51, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(52, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing:   0%|          | 0/103 [00:00<?, ?it/s][A
testing: 100%|██████████| 103/103 [00:00<00:00, 684.71it/s][A

testing:   0%|          | 0/103 [00:00<?, ?it/s][A
testing: 100%|██████████| 103/103 [00:00<00:00, 685.83it/s][A

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  41%|████      | 69/168 [00:00<00:00, 683.19it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 680.19it/s][A

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing:  41%|████      | 69/168 [00:00<00:00, 682.71it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 679.16it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  19%|█▊        | 69/370 [00:00<00:00, 680.54it/s][A
testing:  37%|███▋      | 138/370 [00:00<00:00, 679.24it/s][A
testing:  56%|█████▌    | 206/370 [00:00<00:00, 679.13it/s][A
testing:  74%|███████▍  | 274/370 [00:00<00:00, 678.95it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 678.11it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/

Lifelong ROC-AUC: 0.3699878547155736, BWT: 0.29572369486164496, FWT: 0.3692735205720691





# RCA

In [16]:
from deepod.models.tabular import RCA

In [17]:
R_ssf = evaluation_protocol(T, E, Y, RCA(epochs=10), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")

Evaluating using SSF strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 30%|███       | 3/10 [00:00<00:00, 28.15it/s][A
 60%|██████    | 6/10 [00:00<00:00, 28.02it/s][A
100%|██████████| 10/10 [00:00<00:00, 27.93it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  7.02it/s][A
 20%|██        | 2/10 [00:00<00:01,  7.01it/s][A
 30%|███       | 3/10 [00:00<00:00,  7.00it/s][A
 40%|████      | 4/10 [00:00<00:00,  7.00it/s][A
 50%|█████     | 5/10 [00:00<00:00,  7.00it/s][A
 60%|██████    | 6/10 [00:00<00:00,  7.00it/s][A
 70%|███████   | 7/10 [00:01<00:00,  6.98it/s][A
 80%|████████  | 8/10 [00:01<00:00,  6.99it/s][A
 90%|█████████ | 9/10 [00:01<00:00,  6.99it/s][A
100%|██████████| 10/10 [00:01<00:00,  6.99it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:02,  3.17it/s][A
 20%|██        | 2/10 [00:00<00:02,  3.16it/s][A
 30%|███       | 3/10 [00:00<00:02,  3.15it/s][A
 40%|████      | 4/10 [00:01<00:01,  3.16it/s][A
 50%|█████     | 5/1

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 30%|███       | 3/10 [00:00<00:00, 28.14it/s][A
 60%|██████    | 6/10 [00:00<00:00, 28.07it/s][A
100%|██████████| 10/10 [00:00<00:00, 27.96it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  7.00it/s][A
 20%|██        | 2/10 [00:00<00:01,  6.98it/s][A
 30%|███       | 3/10 [00:00<00:01,  6.98it/s][A
 40%|████      | 4/10 [00:00<00:00,  6.99it/s][A
 50%|█████     | 5/10 [00:00<00:00,  6.99it/s][A
 60%|██████    | 6/10 [00:00<00:00,  6.99it/s][A
 70%|███████   | 7/10 [00:01<00:00,  6.99it/s][A
 80%|████████  | 8/10 [00:01<00:00,  6.99it/s][A
 90%|█████████ | 9/10 [00:01<00:00,  6.99it/s][A
100%|██████████| 10/10 [00:01<00:00,  6.98it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:02,  3.19it/s][A
 20%|██        | 2/10 [00:00<00:02,  3.18it/s][A
 30%|███       | 3/10 [00:00<00:02,  3.17it/s][A
 40%|████      | 4/10 [00:01<00:01,  3.18it/s][A
 50%|█████     | 5/1

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 30%|███       | 3/10 [00:00<00:00, 27.75it/s][A
 60%|██████    | 6/10 [00:00<00:00, 27.50it/s][A
100%|██████████| 10/10 [00:00<00:00, 27.45it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  7.07it/s][A
 20%|██        | 2/10 [00:00<00:01,  7.03it/s][A
 30%|███       | 3/10 [00:00<00:00,  7.02it/s][A
 40%|████      | 4/10 [00:00<00:00,  7.01it/s][A
 50%|█████     | 5/10 [00:00<00:00,  7.01it/s][A
 60%|██████    | 6/10 [00:00<00:00,  7.01it/s][A
 70%|███████   | 7/10 [00:00<00:00,  7.00it/s][A
 80%|████████  | 8/10 [00:01<00:00,  7.01it/s][A
 90%|█████████ | 9/10 [00:01<00:00,  7.01it/s][A
100%|██████████| 10/10 [00:01<00:00,  7.00it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:02,  3.16it/s][A
 20%|██        | 2/10 [00:00<00:02,  3.16it/s][A
 30%|███       | 3/10 [00:00<00:02,  3.15it/s][A
 40%|████      | 4/10 [00:01<00:01,  3.17it/s][A
 50%|█████     | 5/1

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 30%|███       | 3/10 [00:00<00:00, 27.48it/s][A
 60%|██████    | 6/10 [00:00<00:00, 27.46it/s][A
100%|██████████| 10/10 [00:00<00:00, 27.39it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  7.08it/s][A
 20%|██        | 2/10 [00:00<00:01,  7.06it/s][A
 30%|███       | 3/10 [00:00<00:00,  7.06it/s][A
 40%|████      | 4/10 [00:00<00:00,  7.06it/s][A
 50%|█████     | 5/10 [00:00<00:00,  7.05it/s][A
 60%|██████    | 6/10 [00:00<00:00,  7.05it/s][A
 70%|███████   | 7/10 [00:00<00:00,  7.05it/s][A
 80%|████████  | 8/10 [00:01<00:00,  7.04it/s][A
 90%|█████████ | 9/10 [00:01<00:00,  7.04it/s][A
100%|██████████| 10/10 [00:01<00:00,  7.04it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:02,  3.18it/s][A
 20%|██        | 2/10 [00:00<00:02,  3.19it/s][A
 30%|███       | 3/10 [00:00<00:02,  3.17it/s][A
 40%|████      | 4/10 [00:01<00:01,  3.18it/s][A
 50%|█████     | 5/1

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 30%|███       | 3/10 [00:00<00:00, 27.28it/s][A
 60%|██████    | 6/10 [00:00<00:00, 27.11it/s][A
100%|██████████| 10/10 [00:00<00:00, 26.95it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  7.03it/s][A
 20%|██        | 2/10 [00:00<00:01,  7.01it/s][A
 30%|███       | 3/10 [00:00<00:00,  7.02it/s][A
 40%|████      | 4/10 [00:00<00:00,  7.06it/s][A
 50%|█████     | 5/10 [00:00<00:00,  7.07it/s][A
 60%|██████    | 6/10 [00:00<00:00,  7.07it/s][A
 70%|███████   | 7/10 [00:00<00:00,  7.05it/s][A
 80%|████████  | 8/10 [00:01<00:00,  7.05it/s][A
 90%|█████████ | 9/10 [00:01<00:00,  7.03it/s][A
100%|██████████| 10/10 [00:01<00:00,  7.04it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:02,  3.20it/s][A
 20%|██        | 2/10 [00:00<00:02,  3.19it/s][A
 30%|███       | 3/10 [00:00<00:02,  3.18it/s][A
 40%|████      | 4/10 [00:01<00:01,  3.18it/s][A
 50%|█████     | 5/1

Lifelong ROC-AUC: 0.3150596094401886, BWT: 0.008468256555611004, FWT: 0.40685438222971354





In [18]:
R_naive = evaluation_protocol(T, E, Y, RCA(epochs=10), strategy="naive")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 30%|███       | 3/10 [00:00<00:00, 28.19it/s][A
 60%|██████    | 6/10 [00:00<00:00, 28.08it/s][A
100%|██████████| 10/10 [00:00<00:00, 28.00it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  7.05it/s][A
 20%|██        | 2/10 [00:00<00:01,  7.05it/s][A
 30%|███       | 3/10 [00:00<00:00,  7.05it/s][A
 40%|████      | 4/10 [00:00<00:00,  7.05it/s][A
 50%|█████     | 5/10 [00:00<00:00,  7.06it/s][A
 60%|██████    | 6/10 [00:00<00:00,  7.06it/s][A
 70%|███████   | 7/10 [00:00<00:00,  7.05it/s][A
 80%|████████  | 8/10 [00:01<00:00,  7.05it/s][A
 90%|█████████ | 9/10 [00:01<00:00,  7.04it/s][A
100%|██████████| 10/10 [00:01<00:00,  7.04it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:02,  3.16it/s][A
 20%|██        | 2/10 [00:00<00:02,  3.16it/s][A
 30%|███       | 3/10 [00:00<00:02,  3.16it/s][A
 40%|████      | 4/10 [00:01<00:01,  3.18it/s][A
 50%|█████     | 5/1

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 40%|████      | 4/10 [00:00<00:00, 32.78it/s][A
100%|██████████| 10/10 [00:00<00:00, 32.58it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  7.05it/s][A
 20%|██        | 2/10 [00:00<00:01,  7.04it/s][A
 30%|███       | 3/10 [00:00<00:00,  7.01it/s][A
 40%|████      | 4/10 [00:00<00:00,  7.01it/s][A
 50%|█████     | 5/10 [00:00<00:00,  7.01it/s][A
 60%|██████    | 6/10 [00:00<00:00,  7.01it/s][A
 70%|███████   | 7/10 [00:00<00:00,  7.02it/s][A
 80%|████████  | 8/10 [00:01<00:00,  7.02it/s][A
 90%|█████████ | 9/10 [00:01<00:00,  7.01it/s][A
100%|██████████| 10/10 [00:01<00:00,  7.01it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:02,  3.17it/s][A
 20%|██        | 2/10 [00:00<00:02,  3.16it/s][A
 30%|███       | 3/10 [00:00<00:02,  3.18it/s][A
 40%|████      | 4/10 [00:01<00:01,  3.19it/s][A
 50%|█████     | 5/10 [00:01<00:01,  3.19it/s][A
 60%|██████    | 6/1

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:00<00:00, 14.65it/s][A
 40%|████      | 4/10 [00:00<00:00, 14.60it/s][A
 60%|██████    | 6/10 [00:00<00:00, 14.60it/s][A
 80%|████████  | 8/10 [00:00<00:00, 14.58it/s][A
100%|██████████| 10/10 [00:00<00:00, 14.56it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  7.03it/s][A
 20%|██        | 2/10 [00:00<00:01,  7.02it/s][A
 30%|███       | 3/10 [00:00<00:00,  7.03it/s][A
 40%|████      | 4/10 [00:00<00:00,  7.02it/s][A
 50%|█████     | 5/10 [00:00<00:00,  7.01it/s][A
 60%|██████    | 6/10 [00:00<00:00,  7.02it/s][A
 70%|███████   | 7/10 [00:00<00:00,  7.02it/s][A
 80%|████████  | 8/10 [00:01<00:00,  7.02it/s][A
 90%|█████████ | 9/10 [00:01<00:00,  7.01it/s][A
100%|██████████| 10/10 [00:01<00:00,  7.01it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:02,  3.17it/s][A
 20%|██        | 2/10 [00:00<00:02,  3.17it/s][A
 30%|███       | 3/

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 30%|███       | 3/10 [00:00<00:00, 26.06it/s][A
 60%|██████    | 6/10 [00:00<00:00, 25.91it/s][A
100%|██████████| 10/10 [00:00<00:00, 25.68it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  7.12it/s][A
 20%|██        | 2/10 [00:00<00:01,  7.13it/s][A
 30%|███       | 3/10 [00:00<00:00,  7.12it/s][A
 40%|████      | 4/10 [00:00<00:00,  7.09it/s][A
 50%|█████     | 5/10 [00:00<00:00,  7.06it/s][A
 60%|██████    | 6/10 [00:00<00:00,  7.04it/s][A
 70%|███████   | 7/10 [00:00<00:00,  7.03it/s][A
 80%|████████  | 8/10 [00:01<00:00,  7.02it/s][A
 90%|█████████ | 9/10 [00:01<00:00,  7.02it/s][A
100%|██████████| 10/10 [00:01<00:00,  7.04it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:02,  3.21it/s][A
 20%|██        | 2/10 [00:00<00:02,  3.18it/s][A
 30%|███       | 3/10 [00:00<00:02,  3.19it/s][A
 40%|████      | 4/10 [00:01<00:01,  3.19it/s][A
 50%|█████     | 5/1

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 50%|█████     | 5/10 [00:00<00:00, 49.04it/s][A
100%|██████████| 10/10 [00:00<00:00, 48.77it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  7.08it/s][A
 20%|██        | 2/10 [00:00<00:01,  7.08it/s][A
 30%|███       | 3/10 [00:00<00:00,  7.10it/s][A
 40%|████      | 4/10 [00:00<00:00,  7.11it/s][A
 50%|█████     | 5/10 [00:00<00:00,  7.11it/s][A
 60%|██████    | 6/10 [00:00<00:00,  7.11it/s][A
 70%|███████   | 7/10 [00:00<00:00,  7.10it/s][A
 80%|████████  | 8/10 [00:01<00:00,  6.59it/s][A
 90%|█████████ | 9/10 [00:01<00:00,  6.72it/s][A
100%|██████████| 10/10 [00:01<00:00,  6.91it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:02,  3.20it/s][A
 20%|██        | 2/10 [00:00<00:02,  3.20it/s][A
 30%|███       | 3/10 [00:00<00:02,  3.20it/s][A
 40%|████      | 4/10 [00:01<00:01,  3.20it/s][A
 50%|█████     | 5/10 [00:01<00:01,  3.20it/s][A
 60%|██████    | 6/

Lifelong ROC-AUC: 0.4184634705864608, BWT: 0.2998736935141256, FWT: 0.4351211109995251





In [19]:
R_replay = evaluation_protocol(T, E, Y, RCA(epochs=10), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 30%|███       | 3/10 [00:00<00:00, 28.54it/s][A
 60%|██████    | 6/10 [00:00<00:00, 28.45it/s][A
100%|██████████| 10/10 [00:00<00:00, 28.34it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  7.13it/s][A
 20%|██        | 2/10 [00:00<00:01,  7.10it/s][A
 30%|███       | 3/10 [00:00<00:00,  7.11it/s][A
 40%|████      | 4/10 [00:00<00:00,  7.11it/s][A
 50%|█████     | 5/10 [00:00<00:00,  7.11it/s][A
 60%|██████    | 6/10 [00:00<00:00,  7.11it/s][A
 70%|███████   | 7/10 [00:00<00:00,  7.11it/s][A
 80%|████████  | 8/10 [00:01<00:00,  7.11it/s][A
 90%|█████████ | 9/10 [00:01<00:00,  7.11it/s][A
100%|██████████| 10/10 [00:01<00:00,  7.10it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:02,  3.22it/s][A
 20%|██        | 2/10 [00:00<00:02,  3.23it/s][A
 30%|███       | 3/10 [00:00<00:02,  3.22it/s][A
 40%|████      | 4/10 [00:01<00:01,  3.21it/s][A
 50%|█████     | 5/1

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:00<00:00, 14.83it/s][A
 40%|████      | 4/10 [00:00<00:00, 14.98it/s][A
 60%|██████    | 6/10 [00:00<00:00, 15.04it/s][A
 80%|████████  | 8/10 [00:00<00:00, 15.07it/s][A
100%|██████████| 10/10 [00:00<00:00, 15.03it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  7.01it/s][A
 20%|██        | 2/10 [00:00<00:01,  7.00it/s][A
 30%|███       | 3/10 [00:00<00:00,  7.01it/s][A
 40%|████      | 4/10 [00:00<00:00,  7.01it/s][A
 50%|█████     | 5/10 [00:00<00:00,  7.00it/s][A
 60%|██████    | 6/10 [00:00<00:00,  7.00it/s][A
 70%|███████   | 7/10 [00:00<00:00,  7.00it/s][A
 80%|████████  | 8/10 [00:01<00:00,  7.00it/s][A
 90%|█████████ | 9/10 [00:01<00:00,  7.00it/s][A
100%|██████████| 10/10 [00:01<00:00,  7.00it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.69it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.33it/s][A
 30%|███       | 3/

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  7.55it/s][A
 20%|██        | 2/10 [00:00<00:01,  7.48it/s][A
 30%|███       | 3/10 [00:00<00:00,  7.48it/s][A
 40%|████      | 4/10 [00:00<00:00,  7.47it/s][A
 50%|█████     | 5/10 [00:00<00:00,  7.48it/s][A
 60%|██████    | 6/10 [00:00<00:00,  7.47it/s][A
 70%|███████   | 7/10 [00:00<00:00,  7.48it/s][A
 80%|████████  | 8/10 [00:01<00:00,  7.48it/s][A
 90%|█████████ | 9/10 [00:01<00:00,  7.49it/s][A
100%|██████████| 10/10 [00:01<00:00,  7.48it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  7.10it/s][A
 20%|██        | 2/10 [00:00<00:01,  7.06it/s][A
 30%|███       | 3/10 [00:00<00:00,  7.04it/s][A
 40%|████      | 4/10 [00:00<00:00,  7.03it/s][A
 50%|█████     | 5/10 [00:00<00:00,  7.04it/s][A
 60%|██████    | 6/10 [00:00<00:00,  7.04it/s][A
 70%|███████   | 7/10 [00:00<00:00,  7.03it/s][A
 80%|████████  | 8/10 [00:01<00:00,  7.03it/s][A
 90%|████████

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:00,  9.64it/s][A
 20%|██        | 2/10 [00:00<00:00,  9.60it/s][A
 30%|███       | 3/10 [00:00<00:00,  9.61it/s][A
 40%|████      | 4/10 [00:00<00:00,  9.62it/s][A
 50%|█████     | 5/10 [00:00<00:00,  9.63it/s][A
 60%|██████    | 6/10 [00:00<00:00,  9.63it/s][A
 70%|███████   | 7/10 [00:00<00:00,  9.64it/s][A
 80%|████████  | 8/10 [00:00<00:00,  9.62it/s][A
 90%|█████████ | 9/10 [00:00<00:00,  9.62it/s][A
100%|██████████| 10/10 [00:01<00:00,  9.61it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  7.14it/s][A
 20%|██        | 2/10 [00:00<00:01,  7.13it/s][A
 30%|███       | 3/10 [00:00<00:00,  7.14it/s][A
 40%|████      | 4/10 [00:00<00:00,  7.12it/s][A
 50%|█████     | 5/10 [00:00<00:00,  7.12it/s][A
 60%|██████    | 6/10 [00:00<00:00,  7.12it/s][A
 70%|███████   | 7/10 [00:00<00:00,  7.10it/s][A
 80%|████████  | 8/10 [00:01<00:00,  7.08it/s][A
 90%|████████

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=61, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:00<00:00, 11.56it/s][A
 40%|████      | 4/10 [00:00<00:00, 11.52it/s][A
 60%|██████    | 6/10 [00:00<00:00, 11.52it/s][A
 80%|████████  | 8/10 [00:00<00:00, 11.51it/s][A
100%|██████████| 10/10 [00:00<00:00, 11.49it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  7.03it/s][A
 20%|██        | 2/10 [00:00<00:01,  7.03it/s][A
 30%|███       | 3/10 [00:00<00:00,  7.04it/s][A
 40%|████      | 4/10 [00:00<00:00,  7.03it/s][A
 50%|█████     | 5/10 [00:00<00:00,  7.03it/s][A
 60%|██████    | 6/10 [00:00<00:00,  7.03it/s][A
 70%|███████   | 7/10 [00:00<00:00,  7.02it/s][A
 80%|████████  | 8/10 [00:01<00:00,  7.03it/s][A
 90%|█████████ | 9/10 [00:01<00:00,  7.03it/s][A
100%|██████████| 10/10 [00:01<00:00,  7.03it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:02,  3.21it/s][A
 20%|██        | 2/10 [00:00<00:02,  3.19it/s][A
 30%|███       | 3/

Lifelong ROC-AUC: 0.38759839235676985, BWT: 0.2463374375288844, FWT: 0.3641182664095414





# RDP

In [20]:
from deepod.models.tabular import RDP

In [21]:
R_ssf = evaluation_protocol(T, E, Y, RDP(epochs=10), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")

Evaluating using SSF strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=61, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000017, time: 0.5s
epoch 10, training loss: 0.000017, time: 0.5s
Start Inference on the training data...



testing: 100%|██████████| 42/42 [00:00<00:00, 1133.13it/s]

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 1115.31it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  31%|███       | 114/370 [00:00<00:00, 1132.22it/s][A
testing:  62%|██████▏   | 228/370 [00:00<00:00, 1124.40it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 1119.47it/s][A

testing:   0%|          | 0/375 [00:00<?, ?it/s][A
testing:  30%|███       | 113/375 [00:00<00:00, 1120.07it/s][A
testing:  60%|██████    | 226/375 [00:00<00:00, 1121.82it/s][A
testing: 100%|██████████| 375/375 [00:00<00:00, 1119.63it/s][A

testing: 100%|██████████| 61/61 [00:00<00:00, 1111.58it/s]

testing:   0%|          | 0/238 [00:00<?, ?it/s][A
testing:  47%|████▋     | 113/238 [00:00<00:00, 1123.36it/s][A
testing: 100%|██████████| 238/238 [00:00<00:00, 1117.06it/s][A
Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 323.56it/s]


Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=61, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000026, time: 0.3s
epoch 10, training loss: 0.000018, time: 0.1s
Start Inference on the training data...



testing: 100%|██████████| 42/42 [00:00<00:00, 1124.95it/s]

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 1115.64it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  31%|███       | 113/370 [00:00<00:00, 1122.06it/s][A
testing:  61%|██████    | 226/370 [00:00<00:00, 1121.92it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 1119.64it/s][A

testing:   0%|          | 0/375 [00:00<?, ?it/s][A
testing:  30%|██▉       | 112/375 [00:00<00:00, 1119.33it/s][A
testing:  60%|██████    | 225/375 [00:00<00:00, 1121.42it/s][A
testing: 100%|██████████| 375/375 [00:00<00:00, 1120.76it/s][A

testing: 100%|██████████| 61/61 [00:00<00:00, 1111.33it/s]

testing:   0%|          | 0/238 [00:00<?, ?it/s][A
testing:  47%|████▋     | 113/238 [00:00<00:00, 1125.01it/s][A
testing: 100%|██████████| 238/238 [00:00<00:00, 1121.87it/s][A
Optimizing Sample Selection:   3%|▎         | 3/100 [00:00<00:00, 263.06it/s]


Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=61, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000022, time: 0.4s
epoch 10, training loss: 0.000023, time: 0.1s
Start Inference on the training data...



testing: 100%|██████████| 43/43 [00:00<00:00, 993.51it/s]

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 1012.41it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  28%|██▊       | 104/370 [00:00<00:00, 1030.88it/s][A
testing:  56%|█████▌    | 208/370 [00:00<00:00, 1013.37it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 998.95it/s] [A

testing:   0%|          | 0/375 [00:00<?, ?it/s][A
testing:  26%|██▌       | 98/375 [00:00<00:00, 979.49it/s][A
testing:  53%|█████▎    | 197/375 [00:00<00:00, 981.62it/s][A
testing: 100%|██████████| 375/375 [00:00<00:00, 984.11it/s][A

testing: 100%|██████████| 61/61 [00:00<00:00, 965.16it/s]

testing:   0%|          | 0/238 [00:00<?, ?it/s][A
testing:  42%|████▏     | 100/238 [00:00<00:00, 997.74it/s][A
testing: 100%|██████████| 238/238 [00:00<00:00, 1005.48it/s][A
Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 204.51it/s]


Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=61, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000018, time: 0.5s
epoch 10, training loss: 0.000024, time: 0.5s
Start Inference on the training data...



testing: 100%|██████████| 43/43 [00:00<00:00, 1128.31it/s]

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 1124.53it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  31%|███       | 113/370 [00:00<00:00, 1123.56it/s][A
testing:  61%|██████▏   | 227/370 [00:00<00:00, 1130.03it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 1126.28it/s][A

testing:   0%|          | 0/375 [00:00<?, ?it/s][A
testing:  30%|███       | 113/375 [00:00<00:00, 1121.29it/s][A
testing:  60%|██████    | 226/375 [00:00<00:00, 1124.34it/s][A
testing: 100%|██████████| 375/375 [00:00<00:00, 1122.44it/s][A

testing: 100%|██████████| 61/61 [00:00<00:00, 1119.02it/s]

testing:   0%|          | 0/238 [00:00<?, ?it/s][A
testing:  47%|████▋     | 113/238 [00:00<00:00, 1128.08it/s][A
testing: 100%|██████████| 238/238 [00:00<00:00, 1125.11it/s][A
Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 315.91it/s]


Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=61, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000022, time: 0.5s
epoch 10, training loss: 0.000026, time: 0.5s
Start Inference on the training data...



testing: 100%|██████████| 44/44 [00:00<00:00, 1134.36it/s]

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 1123.64it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  31%|███       | 113/370 [00:00<00:00, 1121.11it/s][A
testing:  61%|██████    | 226/370 [00:00<00:00, 1122.00it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 1119.28it/s][A

testing:   0%|          | 0/375 [00:00<?, ?it/s][A
testing:  30%|██▉       | 112/375 [00:00<00:00, 1117.21it/s][A
testing:  60%|██████    | 226/375 [00:00<00:00, 1124.46it/s][A
testing: 100%|██████████| 375/375 [00:00<00:00, 1120.79it/s][A

testing: 100%|██████████| 61/61 [00:00<00:00, 1117.02it/s]

testing:   0%|          | 0/238 [00:00<?, ?it/s][A
testing:  48%|████▊     | 114/238 [00:00<00:00, 1133.37it/s][A
testing: 100%|██████████| 238/238 [00:00<00:00, 1122.85it/s][A
Evaluating using SSF strategy: 5it [00:30,  6.09s/it]

Lifelong ROC-AUC: 0.22137051134872962, BWT: 0.04914431520735178, FWT: 0.31390066909618175





In [22]:
R_naive = evaluation_protocol(T, E, Y, RDP(epochs=10), strategy="naive")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=61, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000016, time: 0.5s
epoch 10, training loss: 0.000018, time: 0.5s
Start Inference on the training data...



testing: 100%|██████████| 42/42 [00:00<00:00, 1129.05it/s]

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 1126.35it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  30%|███       | 112/370 [00:00<00:00, 1118.48it/s][A
testing:  61%|██████    | 225/370 [00:00<00:00, 1119.62it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 1120.05it/s][A

testing:   0%|          | 0/375 [00:00<?, ?it/s][A
testing:  30%|███       | 113/375 [00:00<00:00, 1120.18it/s][A
testing:  60%|██████    | 226/375 [00:00<00:00, 1123.55it/s][A
testing: 100%|██████████| 375/375 [00:00<00:00, 1121.42it/s][A

testing: 100%|██████████| 61/61 [00:00<00:00, 1111.03it/s]

testing:   0%|          | 0/238 [00:00<?, ?it/s][A
testing:  47%|████▋     | 113/238 [00:00<00:00, 1124.47it/s][A
testing: 100%|██████████| 238/238 [00:00<00:00, 1119.22it/s][A
Evaluating using naive strategy: 1it [00:05,  5.99s/it]

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=61, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000014, time: 0.4s
epoch 10, training loss: 0.000022, time: 0.1s
Start Inference on the training data...



testing: 100%|██████████| 36/36 [00:00<00:00, 1122.00it/s]

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 1116.15it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  30%|███       | 112/370 [00:00<00:00, 1116.31it/s][A
testing:  61%|██████    | 224/370 [00:00<00:00, 1114.08it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 1106.72it/s][A

testing:   0%|          | 0/375 [00:00<?, ?it/s][A
testing:  30%|██▉       | 112/375 [00:00<00:00, 1114.77it/s][A
testing:  60%|█████▉    | 224/375 [00:00<00:00, 1115.45it/s][A
testing: 100%|██████████| 375/375 [00:00<00:00, 1111.66it/s][A

testing: 100%|██████████| 61/61 [00:00<00:00, 1106.74it/s]

testing:   0%|          | 0/238 [00:00<?, ?it/s][A
testing:  47%|████▋     | 113/238 [00:00<00:00, 1120.59it/s][A
testing: 100%|██████████| 238/238 [00:00<00:00, 1115.47it/s][A
Evaluating using naive strategy: 2it [00:08,  4.20s/it]

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=61, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000009, time: 0.2s
epoch 10, training loss: 0.000030, time: 0.2s
Start Inference on the training data...



testing: 100%|██████████| 81/81 [00:00<00:00, 1129.16it/s]

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 1086.33it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  30%|███       | 112/370 [00:00<00:00, 1115.37it/s][A
testing:  61%|██████    | 224/370 [00:00<00:00, 1111.97it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 1109.17it/s][A

testing:   0%|          | 0/375 [00:00<?, ?it/s][A
testing:  30%|██▉       | 112/375 [00:00<00:00, 1114.72it/s][A
testing:  60%|█████▉    | 224/375 [00:00<00:00, 1115.00it/s][A
testing: 100%|██████████| 375/375 [00:00<00:00, 1111.31it/s][A

testing: 100%|██████████| 61/61 [00:00<00:00, 1104.02it/s]

testing:   0%|          | 0/238 [00:00<?, ?it/s][A
testing:  47%|████▋     | 112/238 [00:00<00:00, 1116.56it/s][A
testing: 100%|██████████| 238/238 [00:00<00:00, 1108.78it/s][A
Evaluating using naive strategy: 3it [00:16,  5.60s/it]

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=61, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000022, time: 0.1s
epoch 10, training loss: 0.000026, time: 0.1s
Start Inference on the training data...



testing: 100%|██████████| 46/46 [00:00<00:00, 1123.59it/s]

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 1122.09it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  31%|███       | 113/370 [00:00<00:00, 1122.93it/s][A
testing:  61%|██████    | 226/370 [00:00<00:00, 1125.84it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 1119.42it/s][A

testing:   0%|          | 0/375 [00:00<?, ?it/s][A
testing:  30%|██▉       | 112/375 [00:00<00:00, 1115.81it/s][A
testing:  60%|█████▉    | 224/375 [00:00<00:00, 1117.29it/s][A
testing: 100%|██████████| 375/375 [00:00<00:00, 1117.66it/s][A

testing: 100%|██████████| 61/61 [00:00<00:00, 1118.85it/s]

testing:   0%|          | 0/238 [00:00<?, ?it/s][A
testing:  47%|████▋     | 113/238 [00:00<00:00, 1122.24it/s][A
testing: 100%|██████████| 238/238 [00:00<00:00, 1116.55it/s][A
Evaluating using naive strategy: 4it [00:18,  4.39s/it]

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=61, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000041, time: 0.3s
epoch 10, training loss: 0.000014, time: 0.3s
Start Inference on the training data...



testing: 100%|██████████| 24/24 [00:00<00:00, 1104.42it/s]

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 1122.34it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  30%|███       | 112/370 [00:00<00:00, 1118.52it/s][A
testing:  61%|██████    | 225/370 [00:00<00:00, 1121.36it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 1122.71it/s][A

testing:   0%|          | 0/375 [00:00<?, ?it/s][A
testing:  30%|███       | 113/375 [00:00<00:00, 1120.68it/s][A
testing:  60%|██████    | 226/375 [00:00<00:00, 1122.46it/s][A
testing: 100%|██████████| 375/375 [00:00<00:00, 1116.94it/s][A

testing: 100%|██████████| 61/61 [00:00<00:00, 1120.44it/s]

testing:   0%|          | 0/238 [00:00<?, ?it/s][A
testing:  47%|████▋     | 112/238 [00:00<00:00, 1119.42it/s][A
testing: 100%|██████████| 238/238 [00:00<00:00, 1114.80it/s][A
Evaluating using naive strategy: 5it [00:22,  4.56s/it]

Lifelong ROC-AUC: 0.39524095441962026, BWT: 0.3392051136682391, FWT: 0.38131470018108704





In [23]:
R_replay = evaluation_protocol(T, E, Y, RDP(epochs=10), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=61, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000016, time: 0.5s
epoch 10, training loss: 0.000018, time: 0.2s
Start Inference on the training data...



testing: 100%|██████████| 42/42 [00:00<00:00, 1099.73it/s]

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 1125.93it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  31%|███       | 113/370 [00:00<00:00, 1128.87it/s][A
testing:  61%|██████    | 226/370 [00:00<00:00, 1127.04it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 1126.68it/s][A

testing:   0%|          | 0/375 [00:00<?, ?it/s][A
testing:  30%|███       | 113/375 [00:00<00:00, 1124.49it/s][A
testing:  61%|██████    | 227/375 [00:00<00:00, 1129.32it/s][A
testing: 100%|██████████| 375/375 [00:00<00:00, 1125.56it/s][A

testing: 100%|██████████| 61/61 [00:00<00:00, 1112.12it/s]

testing:   0%|          | 0/238 [00:00<?, ?it/s][A
testing:  47%|████▋     | 113/238 [00:00<00:00, 1128.89it/s][A
testing: 100%|██████████| 238/238 [00:00<00:00, 1123.85it/s][A
Evaluating using replay strategy: 1it [00:05,  5.43s/it]

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=61, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000015, time: 0.2s
epoch 10, training loss: 0.000026, time: 0.9s
Start Inference on the training data...



testing: 100%|██████████| 78/78 [00:00<00:00, 1126.67it/s]

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 1119.22it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  31%|███       | 113/370 [00:00<00:00, 1118.47it/s][A
testing:  61%|██████    | 226/370 [00:00<00:00, 1121.84it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 1120.04it/s][A

testing:   0%|          | 0/375 [00:00<?, ?it/s][A
testing:  30%|██▉       | 112/375 [00:00<00:00, 1119.37it/s][A
testing:  60%|██████    | 225/375 [00:00<00:00, 1123.79it/s][A
testing: 100%|██████████| 375/375 [00:00<00:00, 1123.69it/s][A

testing: 100%|██████████| 61/61 [00:00<00:00, 1110.51it/s]

testing:   0%|          | 0/238 [00:00<?, ?it/s][A
testing:  47%|████▋     | 113/238 [00:00<00:00, 1120.45it/s][A
testing: 100%|██████████| 238/238 [00:00<00:00, 1119.22it/s][A
Evaluating using replay strategy: 2it [00:13,  7.24s/it]

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=61, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000015, time: 1.9s
epoch 10, training loss: 0.000045, time: 1.8s
Start Inference on the training data...



testing:   0%|          | 0/158 [00:00<?, ?it/s][A
testing: 100%|██████████| 158/158 [00:00<00:00, 1067.30it/s][A

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 1120.69it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  30%|███       | 112/370 [00:00<00:00, 1119.66it/s][A
testing:  61%|██████    | 224/370 [00:00<00:00, 1117.57it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 1111.94it/s][A

testing:   0%|          | 0/375 [00:00<?, ?it/s][A
testing:  30%|██▉       | 112/375 [00:00<00:00, 1112.01it/s][A
testing:  60%|█████▉    | 224/375 [00:00<00:00, 1115.50it/s][A
testing: 100%|██████████| 375/375 [00:00<00:00, 1115.07it/s][A

testing: 100%|██████████| 61/61 [00:00<00:00, 1104.26it/s]

testing:   0%|          | 0/238 [00:00<?, ?it/s][A
testing:  47%|████▋     | 112/238 [00:00<00:00, 1119.90it/s][A
testing: 100%|██████████| 238/238 [00:00<00:00, 1115.13it/s][A
Evaluating using replay strategy: 3

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=61, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000015, time: 0.5s
epoch 10, training loss: 0.000042, time: 0.3s
Start Inference on the training data...



testing:   0%|          | 0/124 [00:00<?, ?it/s][A
testing: 100%|██████████| 124/124 [00:00<00:00, 1093.03it/s][A

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 1110.57it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  30%|███       | 111/370 [00:00<00:00, 1107.54it/s][A
testing:  60%|██████    | 223/370 [00:00<00:00, 1111.21it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 1109.07it/s][A

testing:   0%|          | 0/375 [00:00<?, ?it/s][A
testing:  30%|██▉       | 112/375 [00:00<00:00, 1114.68it/s][A
testing:  60%|██████    | 225/375 [00:00<00:00, 1123.50it/s][A
testing: 100%|██████████| 375/375 [00:00<00:00, 1114.06it/s][A

testing: 100%|██████████| 61/61 [00:00<00:00, 1113.00it/s]

testing:   0%|          | 0/238 [00:00<?, ?it/s][A
testing:  47%|████▋     | 112/238 [00:00<00:00, 1118.83it/s][A
testing: 100%|██████████| 238/238 [00:00<00:00, 1116.74it/s][A
Evaluating using replay strategy: 4

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=61, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000017, time: 1.3s
epoch 10, training loss: 0.000045, time: 1.2s
Start Inference on the training data...



testing: 100%|██████████| 103/103 [00:00<00:00, 1135.33it/s]

testing:   0%|          | 0/168 [00:00<?, ?it/s][A
testing: 100%|██████████| 168/168 [00:00<00:00, 1124.26it/s][A

testing:   0%|          | 0/370 [00:00<?, ?it/s][A
testing:  30%|███       | 112/370 [00:00<00:00, 1116.21it/s][A
testing:  61%|██████    | 225/370 [00:00<00:00, 1119.31it/s][A
testing: 100%|██████████| 370/370 [00:00<00:00, 1117.86it/s][A

testing:   0%|          | 0/375 [00:00<?, ?it/s][A
testing:  30%|██▉       | 112/375 [00:00<00:00, 1117.41it/s][A
testing:  60%|██████    | 226/375 [00:00<00:00, 1125.90it/s][A
testing: 100%|██████████| 375/375 [00:00<00:00, 1120.64it/s][A

testing: 100%|██████████| 61/61 [00:00<00:00, 1117.39it/s]

testing:   0%|          | 0/238 [00:00<?, ?it/s][A
testing:  47%|████▋     | 113/238 [00:00<00:00, 1124.05it/s][A
testing: 100%|██████████| 238/238 [00:00<00:00, 1120.21it/s][A
Evaluating using replay strategy: 5it [00:52, 10.49s/it]

Lifelong ROC-AUC: 0.39267806177273507, BWT: 0.2900278717009567, FWT: 0.4353395144027494



