# Imports

In [1]:
import numpy as np
#import pandas as pd
#import matplotlib.pyplot as plt
from tqdm import tqdm
import math

from tqdm import tqdm
import time

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from copy import deepcopy

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split

from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.linear_model import SGDOneClassSVM
from sklearn.base import clone

from scipy.spatial.distance import cdist
from scipy.stats import ks_2samp
from scipy.optimize import minimize
from scipy.stats import wasserstein_distance

from sklearn.metrics import average_precision_score

from k_means_constrained import KMeansConstrained

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.6 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/opt/conda/lib/python3.11/runpy.py", line 198, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/opt/conda/lib/python3.11/runpy.py", line 88, in _run_code
    exec(code, run_globals)
  File "/home/jovyan/Desktop/AdversarialAttacks/_SSL/mqtt/deepod2/lib/python3.11/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/home/jovyan/Desktop/AdversarialAttacks/_SSL/mqtt/deepod2/lib/python3.11/site-packages/traitlets/config/application.py", line 1075

In [4]:
X_train = np.load('data/x_train.npy')
y_train = np.load('data/y_train.npy')

X_test = np.load('data/x_test.npy')
y_test = np.load('data/y_test.npy')

X = np.concatenate([X_train, X_test], axis=0)
y = np.concatenate([y_train, y_test], axis=0)

y = np.where(y == 4, 0, 1)

In [5]:
len(X_train), len(y_train), len(X_test), len(y_test)

(955571, 955571, 238893, 238893)

# Setup

In [6]:
def create_phi(normal_data, c):
    """
    Concept creation function for normal data.
    Uses k-Means clustering to partition normal data into c clusters.
    
    Args:
        normal_data (numpy array): The normal data points.
        c (int): Number of desired normal concepts.
    
    Returns:
        list of numpy arrays: List of normal clusters.
    """
    kmeans = KMeansConstrained(n_clusters=c, size_min=20, random_state=42)
    labels = kmeans.fit_predict(normal_data)
    
    normal_concepts = [normal_data[labels == i] for i in range(c)]
    print("Finished creating normal concepts")
    
    return normal_concepts


def create_gamma(anomaly_data, c):
    """
    Concept creation function for anomaly data.
    Uses k-Means clustering to partition anomaly data into c clusters.
    
    Args:
        anomaly_data (numpy array): The anomaly data points.
        c (int): Number of desired anomaly concepts.
    
    Returns:
        list of numpy arrays: List of anomaly clusters.
    """
    kmeans = KMeansConstrained(n_clusters=c, size_min=20, random_state=42)
    labels = kmeans.fit_predict(anomaly_data)
    
    anomaly_concepts = [anomaly_data[labels == i] for i in range(c)]
    print("Finished creating anomaly concepts")
    
    return anomaly_concepts
    
def match_lambda(anomaly_concepts, normal_concepts):
    """
    Matches each normal concept with the closest anomaly concept.
    Uses Euclidean distance to determine the best match.
    
    Args:
        anomaly_concepts (list of numpy arrays): List of anomaly clusters.
        normal_concepts (list of numpy arrays): List of normal clusters.
    
    Returns:
        list of tuples: Pairs of (normal_concept, matched_anomaly_concept)
    """
    pairs = []
    remaining_anomalies = anomaly_concepts.copy()

    for normal_concept in normal_concepts:
        normal_centroid = np.mean(normal_concept, axis=0)
        anomaly_centroids = [np.mean(ac, axis=0) for ac in remaining_anomalies]

        distances = cdist([normal_centroid], anomaly_centroids, metric='euclidean')[0]
        closest_idx = np.argmin(distances)

        pairs.append((normal_concept, remaining_anomalies[closest_idx]))
        remaining_anomalies.pop(closest_idx)

    print("Finished matching concept pairs")
    
    return pairs

def lifelong_roc_auc(R):
    """
    Computes the Lifelong ROC-AUC metric.
    
    Args:
        R (numpy array): NxN matrix of ROC-AUC scores, where R[i, j] is the model's 
                         performance on concept j after learning concept i.
    
    Returns:
        float: Lifelong ROC-AUC score.
    """
    N = R.shape[0]
    lower_triangular_sum = np.sum(np.tril(R))
    normalization_factor = (N * (N + 1)) / 2

    return lower_triangular_sum / normalization_factor

def BWT(R):
    """
    Computes the Backward Transfer (BWT) score.
    
    Args:
        R (numpy array): NxN results matrix.
    
    Returns:
        float: BWT score.
    """
    N = R.shape[0]
    backward_transfer = 0
    count = 0

    for i in range(1, N):
        for j in range(i):
            backward_transfer += (R[i, j] - R[j, j])
            count += 1

    return backward_transfer / count if count > 0 else 0

def FWT(R):
    """
    Computes the Forward Transfer (FWT) score.
    
    Args:
        R (numpy array): NxN results matrix.
    
    Returns:
        float: FWT score.
    """
    N = R.shape[0]
    forward_transfer = 0
    count = 0

    for i in range(N):
        for j in range(i + 1, N): 
            forward_transfer += R[i, j]
            count += 1

    return forward_transfer / count if count > 0 else 0 

def kolmogorov_smirnov_test(X_old, X_new, alpha=0.05):
    """Detect concept drift using KS-test on feature distributions."""
    
    p_values = [ks_2samp(X_old[:, i], X_new[:, i]).pvalue for i in range(X_old.shape[1])]
    return np.any(np.array(p_values) < alpha)

def histogram_binning(X, bins=25):
    """Convert sample distributions into histograms."""
    
    return np.array([np.histogram(X[:, i], bins=bins, density=True)[0] for i in range(X.shape[1])]).T

def kl_divergence(P, Q):
    """Compute KL divergence between two distributions."""
    
    P, Q = np.clip(P, 1e-10, None), np.clip(Q, 1e-10, None)  # Avoid log(0)
    return np.sum(P * np.log(P / Q))

def strategic_sample_selection(X_old, X_new, top_k=100, learning_rate=0.01, num_iterations=100):
    """
    Selects representative new samples by minimizing KL divergence.
    
    Args:
        X_old (numpy.ndarray): Old memory buffer samples.
        X_new (numpy.ndarray): Incoming new samples.
        top_k (int): Number of samples to retain.
        learning_rate (float): Step size for optimization.
        num_iterations (int): Number of optimization steps.

    Returns:
        numpy.ndarray: Selected representative new samples.
    """
    
    H_old, H_new = histogram_binning(X_old), histogram_binning(X_new)
    m_n = np.random.rand(H_new.shape[0])  

    def loss_function(m_n):
        """Computes KL divergence loss for selected samples."""
        weighted_H_new = H_new * m_n[:, np.newaxis]  
        combined_H = (H_old + weighted_H_new) / 2 
        return kl_divergence(H_new, combined_H) 

    progress_bar = tqdm(total=num_iterations, desc="Optimizing Sample Selection", position=0, leave=True)

    def callback(xk):
        progress_bar.update(1)  

    result = minimize(loss_function, m_n, method="L-BFGS-B", bounds=[(0, 1)] * len(m_n), 
                      options={"maxiter": num_iterations, "ftol": 1e-10}, callback=callback)

    progress_bar.close()

    selected_indices = np.argsort(result.x)[-top_k:]

    return X_new[selected_indices] 


def update_memory_buffer(X_old, X_new_selected, memory_size=3000):
    """Updates memory buffer using strategic forgetting."""
    updated_buffer = np.vstack((X_old, X_new_selected))  

    if updated_buffer.shape[0] > memory_size:
        updated_buffer = updated_buffer[-memory_size:]

    return updated_buffer

class HierarchicalMemory:
    def __init__(self, memory_limit=5000, pyramid_factor=2, centroids_per_concept=10):
        self.memory_limit = memory_limit
        self.pyramid_factor = pyramid_factor
        self.centroids_per_concept = centroids_per_concept
        self.memory = {}  # level: [concept1, concept2, ...]

    def add_concept(self, data, level=1):
        if level not in self.memory:
            self.memory[level] = []
        self.memory[level].append(np.array(data))
        self._summarize_memory()

    def _pyramidal_allocation(self):
        levels = sorted(self.memory.keys())
        weights = np.array([1 / (self.pyramid_factor ** (lvl - 1)) for lvl in levels])
        total_weight = weights.sum()
        allocations = (weights / total_weight) * self.memory_limit
        return {lvl: int(alloc) for lvl, alloc in zip(levels, allocations)}

    def _summarize_concept(self, concept, n_samples):
        if len(concept) <= n_samples:
            return concept
        kmeans = KMeans(n_clusters=min(self.centroids_per_concept, len(concept)), random_state=42).fit(concept)
        centroids = kmeans.cluster_centers_
        distances = np.linalg.norm(concept[:, None] - centroids, axis=2)
        closest_indices = np.argmin(distances, axis=0)
        summarized = concept[closest_indices]
        return summarized

    def _summarize_memory(self):
        allocations = self._pyramidal_allocation()
        for level, concepts in self.memory.items():
            summarized_level = []
            alloc_per_concept = max(1, allocations[level] // len(concepts))
            for concept in concepts:
                summarized = self._summarize_concept(concept, alloc_per_concept)
                summarized_level.append(summarized)
            self.memory[level] = summarized_level

    def get_all_memory(self):
        all_data = []
        for level_concepts in self.memory.values():
            for concept in level_concepts:
                all_data.append(concept)
        return np.vstack(all_data) if all_data else np.empty((0,))

def scenario_design(normal_data, anomaly_data, c):
    """
    Implements Algorithm 1 to create a lifelong learning scenario.
    
    Args:
        normal_data (numpy array): The normal data points.
        anomaly_data (numpy array): The anomaly data points.
        c (int): Number of desired concepts.
    
    Returns:
        list of tuples: List of (normal_concept, anomaly_concept) pairs forming the scenario.
    """
    normal_concepts = create_phi(normal_data, c)
    anomaly_concepts = create_gamma(anomaly_data, c)
    
    scenario = match_lambda(anomaly_concepts, normal_concepts)
    
    return scenario

def evaluation_protocol(T, E, Y, model, strategy="naive", replay_buffer_size=5000, memory_size=5000, alpha=0.05):
    """
    Implements Algorithm 2: Lifelong Learning Evaluation Protocol with multiple strategies.
    
    Args:
        T (list): Sequence of N training sets.
        E (list): Sequence of N testing sets.
        Y (list): Sequence of true labels for test sets.
        model (sklearn.base.BaseEstimator): A scikit-learn-like model instance that supports `fit` and `decision_function`.
        strategy (str): Strategy for training.
        replay_buffer_size (int): Maximum size of replay buffer if applicable
        memory_size (int): Maximum memory size if applicable
        alpha (float): KS-test threshold for drift detection.

    Returns:
        numpy array: NxN results matrix R where R[i, j] is ROC-AUC of model on E[j] after learning T[i].
    """
    N = len(T)
    R = np.zeros((N, N))  

    if strategy in ["cumulative"]:
        cumulative_data = []
    
    if strategy in ["replay"]:
        replay_buffer = []

    if strategy == "SSF":
        memory_buffer = None 

    if strategy == "hierarchical":
        h_memory = HierarchicalMemory(memory_limit=memory_size, pyramid_factor=2, centroids_per_concept=10)

    for i, Ti in tqdm(enumerate(T), desc=f"Evaluating using {strategy} strategy"):
        current_model = deepcopy(model)

        # -- NAIVE --
        if strategy == "naive":
            current_model.fit(Ti)

        # -- CUMULATIVE --
        elif strategy == "cumulative":
            cumulative_data.extend(Ti.tolist())
            current_model.fit(np.array(cumulative_data)) 

        # -- REPLAY -- 
        elif strategy == "replay":
            if replay_buffer:
                combined_data = np.vstack((np.array(replay_buffer), Ti))
            else:
                combined_data = Ti

            current_model.fit(combined_data)
            replay_buffer.extend(Ti.tolist())

            if len(replay_buffer) > replay_buffer_size:
                replay_buffer = replay_buffer[-replay_buffer_size:]
        
        # -- SSF -- 
        elif strategy == "SSF":
            if memory_buffer is None:
                memory_buffer = Ti[:memory_size]  
            else:
                drift_detected = kolmogorov_smirnov_test(memory_buffer, Ti, alpha)
                if drift_detected:
                    X_new_selected = strategic_sample_selection(memory_buffer, Ti, top_k=1000)
                    memory_buffer = update_memory_buffer(memory_buffer, X_new_selected, memory_size=memory_size)
            memory_buffer = np.unique(memory_buffer, axis=0)
            current_model.fit(memory_buffer)

        # -- HIERARCHICAL --
        elif strategy == "hierarchical":

            memory_data = h_memory.get_all_memory()
            if memory_data.size == 0:
                drift_level = 1
            else:
                drift_distances = [
                    wasserstein_distance(Ti[:, d], memory_data[:, d])
                    for d in range(Ti.shape[1])
                ]
                drift_score = np.mean(drift_distances)
                print(f"drift: {drift_score}")
                
                if drift_score < 0.05:
                    drift_level = 1
                elif drift_score < 0.1:
                    drift_level = 2
                elif drift_score < 0.2:
                    drift_level = 3
                else:
                    drift_level = 4
        
            h_memory.add_concept(Ti, level=drift_level)
            summarized_memory = h_memory.get_all_memory()
            current_model.fit(summarized_memory)

        # -- Evaluation --
        for j, ((Ej_normal, Ej_anomaly), (y_normal, y_anomaly)) in enumerate(zip(E, Y)):
            test_data = np.vstack((Ej_normal, Ej_anomaly))
            test_labels = np.hstack((y_normal, y_anomaly))  
        
            scores = -current_model.decision_function(test_data)  
            R[i, j] = average_precision_score(test_labels, scores)

    return R


# Experiments

In [7]:
num_concepts = 5

X_normal = X[y == 0]  
X_anomaly = X[y == 1]

normal_concepts = create_phi(X_normal, num_concepts)
anomaly_concepts = create_gamma(X_anomaly, num_concepts)

concept_pairs = match_lambda(anomaly_concepts, normal_concepts)

T = []  
E = [] 
Y = []

for normal, anomaly in concept_pairs:

    normal_train, normal_test = train_test_split(normal, test_size=0.3, random_state=42)
    anomaly_train, anomaly_test = train_test_split(anomaly, test_size=0.3, random_state=42)  

    print(f"normal_train: {len(normal_train)}, normal_test: {len(normal_test)}, anomaly_train: {len(anomaly_train)}, anomaly_test: {len(anomaly_test)}")

    T.append(normal_train)
    E.append((normal_test, anomaly_test))

    y_normal_test = np.zeros(len(normal_test))
    y_anomaly_test = np.ones(len(anomaly_test))
    
    Y.append((y_normal_test, y_anomaly_test))

Finished creating normal concepts
Finished creating anomaly concepts
Finished matching concept pairs
normal_train: 14, normal_test: 6, anomaly_train: 777967, anomaly_test: 333415
normal_train: 71, normal_test: 31, anomaly_train: 3091, anomaly_test: 1325
normal_train: 14, normal_test: 6, anomaly_train: 26499, anomaly_test: 11357
normal_train: 14, normal_test: 6, anomaly_train: 27995, anomaly_test: 11999
normal_train: 35, normal_test: 15, anomaly_train: 422, anomaly_test: 182


# Eval

## LOF

In [10]:
R_hm = evaluation_protocol(T, E, Y, LocalOutlierFactor(n_neighbors=20, novelty=True), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 1it [00:00,  3.69it/s]

drift: 9.033929752227811


Evaluating using hierarchical strategy: 2it [00:00,  3.51it/s]

drift: 17.485859619105664


Evaluating using hierarchical strategy: 3it [00:00,  3.59it/s]

drift: 11.26888823583639


Evaluating using hierarchical strategy: 4it [00:01,  3.56it/s]

drift: 6.863647354128857


Evaluating using hierarchical strategy: 5it [00:01,  3.56it/s]

Lifelong ROC-AUC: 0.520468044585926, BWT: -0.028904618664700465, FWT: 0.46200064652463074





In [26]:
R_ssf = evaluation_protocol(T, E, Y, LocalOutlierFactor(n_neighbors=20, novelty=True), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")


[Aluating using SSF strategy: 0it [00:00, ?it/s]
Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 273.05it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 256.08it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 276.87it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 326.53it/s]

Evaluating using SSF strategy: 5it [00:05,  1.14s/it]

Lifelong ROC-AUC: 0.7544561432893502, BWT: -0.0039135441339650965, FWT: 0.45301001764958054





In [29]:
R_naive = evaluation_protocol(T, E, Y, LocalOutlierFactor(n_neighbors=20, novelty=True), strategy="naive")
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 5it [00:02,  1.70it/s]

Lifelong PR-AUC: 0.9869316956452233, BWT: -0.01950174037502963, FWT: 0.9839654653838258





In [7]:
R_cumulative = evaluation_protocol(T, E, Y, LocalOutlierFactor(n_neighbors=20, novelty=True), strategy="cumulative")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_cumulative)}, BWT: {BWT(R_cumulative)}, FWT: {FWT(R_cumulative)}")

Evaluating using cumulative strategy: 5it [02:10, 26.08s/it]

Lifelong ROC-AUC: 0.9238354062988797, BWT: -0.004499087364359666, FWT: 0.17379610432039533





In [30]:
R_replay = evaluation_protocol(T, E, Y, LocalOutlierFactor(n_neighbors=20, novelty=True), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 5it [00:02,  2.03it/s]

Lifelong PR-AUC: 0.9972366305757738, BWT: -0.0025869867197455764, FWT: 0.9891627119274304





## IF

In [11]:
R_hm = evaluation_protocol(T, E, Y, IsolationForest(n_estimators=100), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 1it [00:00,  1.27it/s]

drift: 9.033929752227811


Evaluating using hierarchical strategy: 2it [00:01,  1.14it/s]

drift: 17.485859619105664


Evaluating using hierarchical strategy: 3it [00:02,  1.09it/s]

drift: 11.26888823583639


Evaluating using hierarchical strategy: 4it [00:03,  1.07it/s]

drift: 6.863647354128857


Evaluating using hierarchical strategy: 5it [00:04,  1.08it/s]

Lifelong ROC-AUC: 0.8105676014480313, BWT: -0.0024381561580483304, FWT: 0.32030028822198414





In [27]:
R_ssf = evaluation_protocol(T, E, Y,  IsolationForest(n_estimators=100), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")


[Aluating using SSF strategy: 0it [00:00, ?it/s]
Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 281.14it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 253.28it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 237.31it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 244.61it/s]

Evaluating using SSF strategy: 5it [00:05,  1.06s/it]

Lifelong ROC-AUC: 0.6559285630296237, BWT: 0.06868127809581366, FWT: 0.804018261100736





In [31]:
R_naive = evaluation_protocol(T, E, Y, IsolationForest(n_estimators=100), strategy="naive")
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 5it [00:04,  1.24it/s]

Lifelong PR-AUC: 0.9818301936295014, BWT: -0.02259953580254522, FWT: 0.9656734349848101





In [9]:
R_cumulative = evaluation_protocol(T, E, Y, IsolationForest(n_estimators=100), strategy="cumulative")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_cumulative)}, BWT: {BWT(R_cumulative)}, FWT: {FWT(R_cumulative)}")

Evaluating using cumulative strategy: 5it [00:07,  1.55s/it]

Lifelong ROC-AUC: 0.753897896406868, BWT: -0.0066406078135951676, FWT: 0.7719174809547474





In [32]:
R_replay = evaluation_protocol(T, E, Y, IsolationForest(n_estimators=100), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 5it [00:04,  1.13it/s]

Lifelong PR-AUC: 0.9993487268937558, BWT: -7.805230346656922e-05, FWT: 0.9751907969596898





## SGDOCSVM

In [12]:
R_hm = evaluation_protocol(T, E, Y, SGDOneClassSVM(), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 2it [00:00,  7.67it/s]

drift: 9.033929752227811
drift: 17.485859619105664


Evaluating using hierarchical strategy: 4it [00:00,  7.51it/s]

drift: 11.26888823583639
drift: 6.863647354128857


Evaluating using hierarchical strategy: 5it [00:00,  7.53it/s]

Lifelong ROC-AUC: 0.9851953661715797, BWT: -0.020546483927901583, FWT: 0.637531291800638





In [28]:
R_ssf = evaluation_protocol(T, E, Y,  SGDOneClassSVM(), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")


[Aluating using SSF strategy: 0it [00:00, ?it/s]
Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 288.13it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 307.55it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 306.85it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 271.76it/s]

Evaluating using SSF strategy: 5it [00:02,  2.14it/s]

Lifelong ROC-AUC: 0.790790866239933, BWT: -0.004091332504534584, FWT: 0.5467376505389003





In [33]:
R_naive = evaluation_protocol(T, E, Y, SGDOneClassSVM(), strategy="naive")
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 5it [00:00,  8.49it/s]

Lifelong PR-AUC: 0.9972044916780316, BWT: -0.0030840371627078754, FWT: 0.9546643931975775





In [17]:
R_cumulative = evaluation_protocol(T, E, Y, SGDOneClassSVM(), strategy="cumulative")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_cumulative)}, BWT: {BWT(R_cumulative)}, FWT: {FWT(R_cumulative)}")

Evaluating using cumulative strategy: 5it [00:04,  1.03it/s]


Lifelong ROC-AUC: 0.597799790906325, BWT: -0.215598947769658, FWT: 0.6579349113663381


In [34]:
R_replay = evaluation_protocol(T, E, Y, SGDOneClassSVM(), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 5it [00:00,  8.32it/s]

Lifelong PR-AUC: 0.9960680539848313, BWT: -0.001328496084602504, FWT: 0.9565455314857584





# SLAD

In [8]:
from deepod.models.tabular import SLAD

In [8]:
R_hm = evaluation_protocol(T, E, Y, SLAD(), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 43, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43]
epoch  1, training loss: 0.942087, time: 0.5s
epoch 10, training loss: 0.547117, time: 0.1s
epoch 20, training loss: 0.546567, time: 0.1s
epoch 30, training loss: 0.558276, time: 0.1s
epoch 40, training loss: 0.546435, time: 0.1s
epoch 50, training loss: 0.546931, time: 0.1s
epoch 60, training loss: 0.546680, time: 0.1s
epoch 70, training loss: 0.546194, time: 0.1s
epoch 80, training loss: 0.546021, time: 0.1s
epoch 90, training loss: 0.545187, time: 0.1s
epoch100, training loss: 0.546762, time: 0.1s
Start Inference on the training data...


Evaluating using hierarchical strategy: 1it [02:19, 139.10s/it]

drift: 9.033929752227811
Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 43, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43]
epoch  1, training loss: 0.777894, time: 0.1s
epoch 10, training loss: 0.616681, time: 0.1s
epoch 20, training loss: 0.600655, time: 0.1s
epoch 30, training loss: 0.611302, time: 0.1s
epoch 40, training loss: 0.600753, time: 0.1s
epoch 50, training loss: 0.595786, time: 0.1s
epoch 60, training loss: 0.598656, time: 0.1s
epoch 70, training loss: 0.603434, time: 0.1s
epoch 80, training loss: 0.611243, time: 0.1s
epoch 90, training loss: 0.610948, time: 0.1s
epoch100, training loss: 0.599505, time: 0.1s
Start Inference on the training data...


Evaluating using hierarchical strategy: 2it [04:36, 138.29s/it]

drift: 17.485859619105664
Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 43, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43]
epoch  1, training loss: 0.839456, time: 0.2s
epoch 10, training loss: 0.586506, time: 0.1s
epoch 20, training loss: 0.577086, time: 0.1s
epoch 30, training loss: 0.573118, time: 0.1s
epoch 40, training loss: 0.577079, time: 0.1s
epoch 50, training loss: 0.574417, time: 0.1s
epoch 60, training loss: 0.574711, time: 0.2s
epoch 70, training loss: 0.573260, time: 0.1s
epoch 80, training loss: 0.570490, time: 0.1s
epoch 90, training loss: 0.565250, time: 0.1s
epoch100, training loss: 0.578763, time: 0.1s
Start Inference on the training data...


Evaluating using hierarchical strategy: 3it [06:54, 137.80s/it]

drift: 11.26888823583639
Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 43, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43]
epoch  1, training loss: 0.807866, time: 0.1s
epoch 10, training loss: 0.610720, time: 0.1s
epoch 20, training loss: 0.605486, time: 0.1s
epoch 30, training loss: 0.600085, time: 0.1s
epoch 40, training loss: 0.603552, time: 0.1s
epoch 50, training loss: 0.619184, time: 0.1s
epoch 60, training loss: 0.606468, time: 0.1s
epoch 70, training loss: 0.611199, time: 0.1s
epoch 80, training loss: 0.604636, time: 0.1s
epoch 90, training loss: 0.607801, time: 0.1s
epoch100, training loss: 0.598212, time: 0.1s
Start Inference on the training data...


Evaluating using hierarchical strategy: 4it [09:10, 137.33s/it]

drift: 6.863647354128857
Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 43, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43]
epoch  1, training loss: 0.879887, time: 0.1s
epoch 10, training loss: 0.619500, time: 0.1s
epoch 20, training loss: 0.611566, time: 0.1s
epoch 30, training loss: 0.610254, time: 0.1s
epoch 40, training loss: 0.610114, time: 0.1s
epoch 50, training loss: 0.602414, time: 0.7s
epoch 60, training loss: 0.608052, time: 0.1s
epoch 70, training loss: 0.599205, time: 0.1s
epoch 80, training loss: 0.607303, time: 0.1s
epoch 90, training loss: 0.606334, time: 0.1s
epoch100, training loss: 0.609260, time: 0.1s
Start Inference on the training data...


Evaluating using hierarchical strategy: 5it [11:29, 137.86s/it]

Lifelong ROC-AUC: 0.2581187663594956, BWT: 0.05378771477511142, FWT: 0.5442699839625316





In [42]:
R_ssf = evaluation_protocol(T, E, Y, SLAD(epochs=10), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")

Evaluating using SSF strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 43, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43]
epoch  1, training loss: 0.705401, time: 0.0s
epoch 10, training loss: 0.583712, time: 0.0s
Start Inference on the training data...


Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 326.53it/s]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 43, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43]
epoch  1, training loss: 0.887392, time: 0.0s
epoch 10, training loss: 0.671017, time: 0.0s
Start Inference on the training data...


Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 310.39it/s]
Evaluating using SSF strategy: 2it [04:00, 120.27s/it]


IndexError: index 7 is out of bounds for axis 0 with size 6

In [36]:
R_naive = evaluation_protocol(T, E, Y, SLAD(epochs=10), strategy="naive")
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 43, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43]




epoch  1, training loss: 0.701310, time: 0.4s
epoch 10, training loss: 0.591755, time: 0.0s
Start Inference on the training data...


Evaluating using naive strategy: 1it [14:21, 861.48s/it]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 43, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43]
epoch  1, training loss: 1.064758, time: 0.3s
epoch 10, training loss: 0.540652, time: 0.4s
Start Inference on the training data...


Evaluating using naive strategy: 2it [24:24, 709.31s/it]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 43, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43]
epoch  1, training loss: 1.288056, time: 0.0s
epoch 10, training loss: 1.043205, time: 0.0s
Start Inference on the training data...




Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 43, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43]
epoch  1, training loss: 0.792895, time: 0.0s
epoch 10, training loss: 0.578901, time: 0.0s
Start Inference on the training data...


Evaluating using naive strategy: 4it [29:14, 331.03s/it]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 43, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43]
epoch  1, training loss: 0.717511, time: 0.0s
epoch 10, training loss: 0.599024, time: 0.0s
Start Inference on the training data...


Evaluating using naive strategy: 5it [31:21, 376.22s/it]

Lifelong PR-AUC: 0.9797437209477171, BWT: 0.02687664847407458, FWT: 0.9157562997077079





In [37]:
R_replay = evaluation_protocol(T, E, Y, SLAD(epochs=10), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")



Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 43, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43]
epoch  1, training loss: 0.701310, time: 0.0s
epoch 10, training loss: 0.591755, time: 0.0s
Start Inference on the training data...


Evaluating using replay strategy: 1it [02:08, 128.78s/it]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 43, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43]
epoch  1, training loss: 0.978277, time: 0.1s
epoch 10, training loss: 0.601198, time: 0.1s
Start Inference on the training data...


Evaluating using replay strategy: 2it [04:14, 127.16s/it]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 43, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43]
epoch  1, training loss: 0.916418, time: 0.1s
epoch 10, training loss: 0.628715, time: 0.1s
Start Inference on the training data...


Evaluating using replay strategy: 3it [06:45, 137.76s/it]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 43, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43]
epoch  1, training loss: 0.917388, time: 0.1s
epoch 10, training loss: 0.581045, time: 0.1s
Start Inference on the training data...


Evaluating using replay strategy: 4it [08:49, 132.50s/it]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 43, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43]
epoch  1, training loss: 0.822122, time: 0.2s
epoch 10, training loss: 0.605516, time: 0.1s
Start Inference on the training data...


Evaluating using replay strategy: 5it [10:55, 131.02s/it]

Lifelong PR-AUC: 0.9615231696812983, BWT: 0.00037781483281170705, FWT: 0.928321484759897





# ICL

In [9]:
from deepod.models.tabular import ICL

In [10]:
R_hm = evaluation_protocol(T, E, Y, ICL(epochs=100), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 3
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing: 100%|██████████| 2/2 [00:00<00:00, 558.12it/s]

testing: 100%|██████████| 2/2 [00:00<00:00, 604.41it/s]

testing: 100%|██████████| 2/2 [00:00<00:00, 591.08it/s]

testing:   0%|          | 0/189 [00:00<?, ?it/s][A
testing:  34%|███▍      | 65/189 [00:00<00:00, 642.62it/s][A
testing: 100%|██████████| 189/189 [00:00<00:00, 639.04it/s][A

testing:   0%|          | 0/189 [00:00<?, ?it/s][A
testing:  34%|███▍      | 65/189 [00:00<00:00, 644.35it/s][A
testing: 100%|██████████| 189/189 [00:00<00:00, 641.96it/s][A

testing:   0%|          | 0/189 [00:00<?, ?it/s][A
testing:  34%|███▍      | 65/189 [00:00<00:00, 644.62it/s][A
testing: 100%|██████████| 189/189 [00:00<00:00, 641.10it/s][A

testing:   0%|          | 0/5221 [00:00<?, ?it/s][A
testing:   1%|          | 65/5221 [00:00<00:08, 640.79it/s][A
testing:   2%|▏         | 130/5221 [00:00<00:07, 641.61it/s][A
testing:   4%|▎         | 195/5221 [00:00<00:07, 641.41it/s][A
testing:   5%|▍         | 260/5221 [00:00<00:07, 6

drift: 9.033929752227811
Start Training...
ensemble size: 3
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=1


testing: 100%|██████████| 3/3 [00:00<00:00, 604.31it/s]

testing: 100%|██████████| 3/3 [00:00<00:00, 610.67it/s]

testing: 100%|██████████| 3/3 [00:00<00:00, 614.61it/s]

testing:   0%|          | 0/189 [00:00<?, ?it/s][A
testing:  34%|███▍      | 65/189 [00:00<00:00, 644.96it/s][A
testing: 100%|██████████| 189/189 [00:00<00:00, 643.41it/s][A

testing:   0%|          | 0/189 [00:00<?, ?it/s][A
testing:  34%|███▍      | 65/189 [00:00<00:00, 645.75it/s][A
testing: 100%|██████████| 189/189 [00:00<00:00, 645.13it/s][A

testing:   0%|          | 0/189 [00:00<?, ?it/s][A
testing:  34%|███▍      | 65/189 [00:00<00:00, 647.91it/s][A
testing: 100%|██████████| 189/189 [00:00<00:00, 645.22it/s][A

testing:   0%|          | 0/5221 [00:00<?, ?it/s][A
testing:   1%|          | 65/5221 [00:00<00:08, 642.82it/s][A
testing:   2%|▏         | 130/5221 [00:00<00:07, 643.34it/s][A
testing:   4%|▎         | 195/5221 [00:00<00:07, 643.72it/s][A
testing:   5%|▍         | 260/5221 [00:00<00:07, 6

drift: 17.485859619105664
Start Training...
ensemble size: 3
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=


testing: 100%|██████████| 3/3 [00:00<00:00, 598.90it/s]

testing: 100%|██████████| 3/3 [00:00<00:00, 606.29it/s]

testing: 100%|██████████| 3/3 [00:00<00:00, 612.64it/s]

testing:   0%|          | 0/189 [00:00<?, ?it/s][A
testing:  34%|███▍      | 65/189 [00:00<00:00, 644.66it/s][A
testing: 100%|██████████| 189/189 [00:00<00:00, 643.46it/s][A

testing:   0%|          | 0/189 [00:00<?, ?it/s][A
testing:  34%|███▍      | 65/189 [00:00<00:00, 647.13it/s][A
testing: 100%|██████████| 189/189 [00:00<00:00, 645.59it/s][A

testing:   0%|          | 0/189 [00:00<?, ?it/s][A
testing:  34%|███▍      | 65/189 [00:00<00:00, 647.93it/s][A
testing: 100%|██████████| 189/189 [00:00<00:00, 646.42it/s][A

testing:   0%|          | 0/5221 [00:00<?, ?it/s][A
testing:   1%|          | 65/5221 [00:00<00:08, 643.35it/s][A
testing:   2%|▏         | 130/5221 [00:00<00:07, 643.91it/s][A
testing:   4%|▎         | 195/5221 [00:00<00:07, 643.83it/s][A
testing:   5%|▍         | 260/5221 [00:00<00:07, 6

drift: 11.26888823583639
Start Training...
ensemble size: 3
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=1


testing: 100%|██████████| 3/3 [00:00<00:00, 613.47it/s]

testing: 100%|██████████| 3/3 [00:00<00:00, 619.94it/s]

testing: 100%|██████████| 3/3 [00:00<00:00, 621.41it/s]

testing:   0%|          | 0/189 [00:00<?, ?it/s][A
testing:  34%|███▍      | 65/189 [00:00<00:00, 642.44it/s][A
testing: 100%|██████████| 189/189 [00:00<00:00, 643.89it/s][A

testing:   0%|          | 0/189 [00:00<?, ?it/s][A
testing:  34%|███▍      | 65/189 [00:00<00:00, 646.36it/s][A
testing: 100%|██████████| 189/189 [00:00<00:00, 646.05it/s][A

testing:   0%|          | 0/189 [00:00<?, ?it/s][A
testing:  34%|███▍      | 65/189 [00:00<00:00, 648.29it/s][A
testing: 100%|██████████| 189/189 [00:00<00:00, 647.87it/s][A

testing:   0%|          | 0/5221 [00:00<?, ?it/s][A
testing:   1%|          | 65/5221 [00:00<00:08, 640.68it/s][A
testing:   2%|▏         | 130/5221 [00:00<00:07, 643.90it/s][A
testing:   4%|▎         | 195/5221 [00:00<00:07, 644.94it/s][A
testing:   5%|▍         | 260/5221 [00:00<00:07, 6

drift: 6.863647354128857
Start Training...
ensemble size: 3
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=1


testing: 100%|██████████| 3/3 [00:00<00:00, 607.69it/s]

testing: 100%|██████████| 3/3 [00:00<00:00, 593.25it/s]

testing: 100%|██████████| 3/3 [00:00<00:00, 619.60it/s]

testing:   0%|          | 0/189 [00:00<?, ?it/s][A
testing:  34%|███▍      | 65/189 [00:00<00:00, 644.85it/s][A
testing: 100%|██████████| 189/189 [00:00<00:00, 645.56it/s][A

testing:   0%|          | 0/189 [00:00<?, ?it/s][A
testing:  34%|███▍      | 65/189 [00:00<00:00, 648.78it/s][A
testing: 100%|██████████| 189/189 [00:00<00:00, 647.08it/s][A

testing:   0%|          | 0/189 [00:00<?, ?it/s][A
testing:  34%|███▍      | 65/189 [00:00<00:00, 648.87it/s][A
testing: 100%|██████████| 189/189 [00:00<00:00, 647.62it/s][A

testing:   0%|          | 0/5221 [00:00<?, ?it/s][A
testing:   1%|          | 65/5221 [00:00<00:08, 642.46it/s][A
testing:   2%|▏         | 130/5221 [00:00<00:07, 644.86it/s][A
testing:   4%|▎         | 195/5221 [00:00<00:07, 645.37it/s][A
testing:   5%|▍         | 260/5221 [00:00<00:07, 6

Lifelong ROC-AUC: 0.007858929996564144, BWT: -0.0074766055384658334, FWT: 0.36056330981351264





In [None]:
R_ssf = evaluation_protocol(T, E, Y, ICL(epochs=10), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")

In [39]:
R_naive = evaluation_protocol(T, E, Y, ICL(epochs=10), strategy="naive")
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 3
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing: 100%|██████████| 1/1 [00:00<00:00, 496.78it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 551.59it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 540.99it/s]

testing:   0%|          | 0/5210 [00:00<?, ?it/s][A
testing:   1%|          | 62/5210 [00:00<00:08, 615.67it/s][A
testing:   2%|▏         | 124/5210 [00:00<00:08, 614.23it/s][A
testing:   4%|▎         | 186/5210 [00:00<00:08, 615.91it/s][A
testing:   5%|▍         | 248/5210 [00:00<00:08, 614.05it/s][A
testing:   6%|▌         | 310/5210 [00:00<00:07, 613.00it/s][A
testing:   7%|▋         | 372/5210 [00:00<00:07, 612.15it/s][A
testing:   8%|▊         | 434/5210 [00:00<00:07, 613.21it/s][A
testing:  10%|▉         | 496/5210 [00:00<00:07, 613.98it/s][A
testing:  11%|█         | 558/5210 [00:00<00:07, 614.67it/s][A
testing:  12%|█▏        | 620/5210 [00:01<00:07, 615.52it/s][A
testing:  13%|█▎        | 682/5210 [00:01<00:07, 616.15it/s][A
testing:  14%|█▍        | 744/5210 [00:01<00:07, 616.57it/s][A
testing:

Start Training...
ensemble size: 3
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing: 100%|██████████| 2/2 [00:00<00:00, 566.42it/s]

testing: 100%|██████████| 2/2 [00:00<00:00, 586.57it/s]

testing: 100%|██████████| 2/2 [00:00<00:00, 580.57it/s]

testing:   0%|          | 0/5210 [00:00<?, ?it/s][A
testing:   1%|          | 62/5210 [00:00<00:08, 615.03it/s][A
testing:   2%|▏         | 124/5210 [00:00<00:08, 607.93it/s][A
testing:   4%|▎         | 186/5210 [00:00<00:08, 610.76it/s][A
testing:   5%|▍         | 248/5210 [00:00<00:08, 612.59it/s][A
testing:   6%|▌         | 310/5210 [00:00<00:07, 612.63it/s][A
testing:   7%|▋         | 372/5210 [00:00<00:07, 613.12it/s][A
testing:   8%|▊         | 434/5210 [00:00<00:07, 614.01it/s][A
testing:  10%|▉         | 496/5210 [00:00<00:07, 615.27it/s][A
testing:  11%|█         | 558/5210 [00:00<00:07, 614.84it/s][A
testing:  12%|█▏        | 620/5210 [00:01<00:07, 614.24it/s][A
testing:  13%|█▎        | 682/5210 [00:01<00:07, 614.01it/s][A
testing:  14%|█▍        | 744/5210 [00:01<00:07, 614.38it/s][A
testing:

Start Training...
ensemble size: 3
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing: 100%|██████████| 1/1 [00:00<00:00, 524.48it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 536.49it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 540.02it/s]

testing:   0%|          | 0/5210 [00:00<?, ?it/s][A
testing:   1%|          | 62/5210 [00:00<00:08, 618.59it/s][A
testing:   2%|▏         | 124/5210 [00:00<00:08, 618.91it/s][A
testing:   4%|▎         | 186/5210 [00:00<00:08, 616.94it/s][A
testing:   5%|▍         | 248/5210 [00:00<00:08, 616.47it/s][A
testing:   6%|▌         | 310/5210 [00:00<00:07, 617.72it/s][A
testing:   7%|▋         | 372/5210 [00:00<00:07, 616.22it/s][A
testing:   8%|▊         | 434/5210 [00:00<00:07, 615.02it/s][A
testing:  10%|▉         | 496/5210 [00:00<00:07, 616.21it/s][A
testing:  11%|█         | 558/5210 [00:00<00:07, 616.47it/s][A
testing:  12%|█▏        | 620/5210 [00:01<00:07, 617.45it/s][A
testing:  13%|█▎        | 682/5210 [00:01<00:07, 616.77it/s][A
testing:  14%|█▍        | 744/5210 [00:01<00:07, 616.84it/s][A
testing:

Start Training...
ensemble size: 3
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing: 100%|██████████| 1/1 [00:00<00:00, 547.34it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 565.35it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 570.73it/s]

testing:   0%|          | 0/5210 [00:00<?, ?it/s][A
testing:   1%|          | 62/5210 [00:00<00:08, 615.48it/s][A
testing:   2%|▏         | 124/5210 [00:00<00:08, 616.60it/s][A
testing:   4%|▎         | 187/5210 [00:00<00:08, 618.47it/s][A
testing:   5%|▍         | 249/5210 [00:00<00:08, 619.06it/s][A
testing:   6%|▌         | 311/5210 [00:00<00:07, 618.75it/s][A
testing:   7%|▋         | 373/5210 [00:00<00:07, 617.53it/s][A
testing:   8%|▊         | 435/5210 [00:00<00:07, 617.86it/s][A
testing:  10%|▉         | 497/5210 [00:00<00:07, 618.06it/s][A
testing:  11%|█         | 559/5210 [00:00<00:07, 618.48it/s][A
testing:  12%|█▏        | 621/5210 [00:01<00:07, 618.43it/s][A
testing:  13%|█▎        | 683/5210 [00:01<00:07, 618.17it/s][A
testing:  14%|█▍        | 745/5210 [00:01<00:07, 618.08it/s][A
testing:

Start Training...
ensemble size: 3
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing: 100%|██████████| 1/1 [00:00<00:00, 517.88it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 541.20it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 547.92it/s]

testing:   0%|          | 0/5210 [00:00<?, ?it/s][A
testing:   1%|          | 62/5210 [00:00<00:08, 618.28it/s][A
testing:   2%|▏         | 124/5210 [00:00<00:08, 618.38it/s][A
testing:   4%|▎         | 186/5210 [00:00<00:08, 618.81it/s][A
testing:   5%|▍         | 248/5210 [00:00<00:08, 617.77it/s][A
testing:   6%|▌         | 310/5210 [00:00<00:07, 615.95it/s][A
testing:   7%|▋         | 372/5210 [00:00<00:07, 615.65it/s][A
testing:   8%|▊         | 434/5210 [00:00<00:07, 615.92it/s][A
testing:  10%|▉         | 496/5210 [00:00<00:07, 616.21it/s][A
testing:  11%|█         | 558/5210 [00:00<00:07, 616.10it/s][A
testing:  12%|█▏        | 620/5210 [00:01<00:07, 615.84it/s][A
testing:  13%|█▎        | 682/5210 [00:01<00:07, 616.29it/s][A
testing:  14%|█▍        | 744/5210 [00:01<00:07, 616.15it/s][A
testing:

Lifelong PR-AUC: 0.9699575328362695, BWT: 0.006623652170684824, FWT: 0.9885452359877063





In [10]:
R_replay = evaluation_protocol(T, E, Y, ICL(epochs=10), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 3
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing: 100%|██████████| 1/1 [00:00<00:00, 503.34it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 525.80it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 531.87it/s]

testing:   0%|          | 0/5210 [00:00<?, ?it/s][A
testing:   1%|          | 63/5210 [00:00<00:08, 625.62it/s][A
testing:   2%|▏         | 127/5210 [00:00<00:08, 630.17it/s][A
testing:   4%|▎         | 191/5210 [00:00<00:07, 633.50it/s][A
testing:   5%|▍         | 255/5210 [00:00<00:07, 635.77it/s][A
testing:   6%|▌         | 320/5210 [00:00<00:07, 638.12it/s][A
testing:   7%|▋         | 385/5210 [00:00<00:07, 638.82it/s][A
testing:   9%|▊         | 449/5210 [00:00<00:07, 638.84it/s][A
testing:  10%|▉         | 513/5210 [00:00<00:07, 638.74it/s][A
testing:  11%|█         | 577/5210 [00:00<00:07, 638.45it/s][A
testing:  12%|█▏        | 641/5210 [00:01<00:07, 638.66it/s][A
testing:  14%|█▎        | 705/5210 [00:01<00:07, 638.56it/s][A
testing:  15%|█▍        | 769/5210 [00:01<00:06, 638.57it/s][A
testing:

Start Training...
ensemble size: 3
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing: 100%|██████████| 2/2 [00:00<00:00, 574.64it/s]

testing: 100%|██████████| 2/2 [00:00<00:00, 587.89it/s]

testing: 100%|██████████| 2/2 [00:00<00:00, 581.17it/s]

testing:   0%|          | 0/5210 [00:00<?, ?it/s][A
testing:   1%|          | 65/5210 [00:00<00:07, 646.58it/s][A
testing:   2%|▏         | 130/5210 [00:00<00:07, 647.23it/s][A
testing:   4%|▎         | 195/5210 [00:00<00:07, 647.65it/s][A
testing:   5%|▍         | 260/5210 [00:00<00:07, 646.45it/s][A
testing:   6%|▌         | 325/5210 [00:00<00:07, 644.77it/s][A
testing:   7%|▋         | 390/5210 [00:00<00:07, 645.13it/s][A
testing:   9%|▊         | 455/5210 [00:00<00:07, 644.89it/s][A
testing:  10%|▉         | 520/5210 [00:00<00:07, 645.69it/s][A
testing:  11%|█         | 585/5210 [00:00<00:07, 644.22it/s][A
testing:  12%|█▏        | 650/5210 [00:01<00:07, 644.29it/s][A
testing:  14%|█▎        | 715/5210 [00:01<00:06, 644.60it/s][A
testing:  15%|█▍        | 780/5210 [00:01<00:06, 645.66it/s][A
testing:

Start Training...
ensemble size: 3
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing: 100%|██████████| 2/2 [00:00<00:00, 532.20it/s]

testing: 100%|██████████| 2/2 [00:00<00:00, 542.50it/s]

testing: 100%|██████████| 2/2 [00:00<00:00, 562.65it/s]

testing:   0%|          | 0/5210 [00:00<?, ?it/s][A
testing:   1%|          | 65/5210 [00:00<00:07, 644.88it/s][A
testing:   2%|▏         | 130/5210 [00:00<00:07, 643.00it/s][A
testing:   4%|▎         | 195/5210 [00:00<00:07, 642.23it/s][A
testing:   5%|▍         | 260/5210 [00:00<00:07, 640.31it/s][A
testing:   6%|▌         | 325/5210 [00:00<00:07, 637.79it/s][A
testing:   7%|▋         | 390/5210 [00:00<00:07, 639.99it/s][A
testing:   9%|▊         | 455/5210 [00:00<00:07, 635.81it/s][A
testing:  10%|▉         | 519/5210 [00:00<00:07, 635.47it/s][A
testing:  11%|█         | 583/5210 [00:00<00:07, 635.28it/s][A
testing:  12%|█▏        | 648/5210 [00:01<00:07, 636.89it/s][A
testing:  14%|█▎        | 712/5210 [00:01<00:07, 637.41it/s][A
testing:  15%|█▍        | 777/5210 [00:01<00:06, 638.53it/s][A
testing:

Start Training...
ensemble size: 3
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing: 100%|██████████| 2/2 [00:00<00:00, 555.28it/s]

testing: 100%|██████████| 2/2 [00:00<00:00, 575.11it/s]

testing: 100%|██████████| 2/2 [00:00<00:00, 574.60it/s]

testing:   0%|          | 0/5210 [00:00<?, ?it/s][A
testing:   1%|▏         | 66/5210 [00:00<00:07, 650.13it/s][A
testing:   3%|▎         | 132/5210 [00:00<00:07, 648.27it/s][A
testing:   4%|▍         | 197/5210 [00:00<00:07, 646.41it/s][A
testing:   5%|▌         | 262/5210 [00:00<00:07, 646.44it/s][A
testing:   6%|▋         | 327/5210 [00:00<00:07, 647.30it/s][A
testing:   8%|▊         | 392/5210 [00:00<00:07, 646.81it/s][A
testing:   9%|▉         | 457/5210 [00:00<00:07, 647.05it/s][A
testing:  10%|█         | 522/5210 [00:00<00:07, 646.95it/s][A
testing:  11%|█▏        | 587/5210 [00:00<00:07, 647.82it/s][A
testing:  13%|█▎        | 652/5210 [00:01<00:07, 647.48it/s][A
testing:  14%|█▍        | 717/5210 [00:01<00:06, 647.03it/s][A
testing:  15%|█▌        | 782/5210 [00:01<00:06, 646.95it/s][A
testing:

Start Training...
ensemble size: 3
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(34, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing: 100%|██████████| 3/3 [00:00<00:00, 636.53it/s]

testing: 100%|██████████| 3/3 [00:00<00:00, 606.11it/s]

testing: 100%|██████████| 3/3 [00:00<00:00, 606.17it/s]

testing:   0%|          | 0/5210 [00:00<?, ?it/s][A
testing:   1%|          | 65/5210 [00:00<00:07, 648.08it/s][A
testing:   2%|▏         | 130/5210 [00:00<00:07, 644.68it/s][A
testing:   4%|▎         | 195/5210 [00:00<00:07, 643.69it/s][A
testing:   5%|▍         | 260/5210 [00:00<00:07, 644.25it/s][A
testing:   6%|▌         | 325/5210 [00:00<00:07, 643.19it/s][A
testing:   7%|▋         | 390/5210 [00:00<00:07, 644.46it/s][A
testing:   9%|▊         | 455/5210 [00:00<00:07, 645.23it/s][A
testing:  10%|▉         | 520/5210 [00:00<00:07, 645.80it/s][A
testing:  11%|█         | 585/5210 [00:00<00:07, 645.90it/s][A
testing:  12%|█▏        | 650/5210 [00:01<00:07, 646.01it/s][A
testing:  14%|█▎        | 715/5210 [00:01<00:06, 646.77it/s][A
testing:  15%|█▍        | 780/5210 [00:01<00:06, 646.86it/s][A
testing:

Lifelong PR-AUC: 0.9615458481839805, BWT: 0.0003526953194274207, FWT: 0.9328542584355585





# RCA

In [11]:
from deepod.models.tabular import RCA

In [12]:
R_hm = evaluation_protocol(T, E, Y, RCA(epochs=10), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


100%|██████████| 10/10 [00:00<00:00, 550.22it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  5.90it/s][A
 20%|██        | 2/10 [00:00<00:01,  5.89it/s][A
 30%|███       | 3/10 [00:00<00:01,  5.89it/s][A
 40%|████      | 4/10 [00:00<00:01,  5.89it/s][A
 50%|█████     | 5/10 [00:00<00:00,  5.89it/s][A
 60%|██████    | 6/10 [00:01<00:00,  5.89it/s][A
 70%|███████   | 7/10 [00:01<00:00,  5.86it/s][A
 80%|████████  | 8/10 [00:01<00:00,  5.87it/s][A
 90%|█████████ | 9/10 [00:01<00:00,  5.87it/s][A
100%|██████████| 10/10 [00:01<00:00,  5.88it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:04<00:42,  4.74s/it][A
 20%|██        | 2/10 [00:09<00:37,  4.73s/it][A
 30%|███       | 3/10 [00:14<00:33,  4.74s/it][A
 40%|████      | 4/10 [00:18<00:28,  4.74s/it][A
 50%|█████     | 5/10 [00:23<00:23,  4.74s/it][A
 60%|██████    | 6/10 [00:28<00:18,  4.74s/it][A
 70%|███████   | 7/10 [00:33<00:14,  4.75s/it][A
 80%|████████

drift: 9.033929752227811
Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, o


100%|██████████| 10/10 [00:00<00:00, 364.69it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  5.91it/s][A
 20%|██        | 2/10 [00:00<00:01,  5.89it/s][A
 30%|███       | 3/10 [00:00<00:01,  5.88it/s][A
 40%|████      | 4/10 [00:00<00:01,  5.86it/s][A
 50%|█████     | 5/10 [00:00<00:00,  5.86it/s][A
 60%|██████    | 6/10 [00:01<00:00,  5.85it/s][A
 70%|███████   | 7/10 [00:01<00:00,  5.86it/s][A
 80%|████████  | 8/10 [00:01<00:00,  5.86it/s][A
 90%|█████████ | 9/10 [00:01<00:00,  5.86it/s][A
100%|██████████| 10/10 [00:01<00:00,  5.86it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:04<00:42,  4.72s/it][A
 20%|██        | 2/10 [00:09<00:37,  4.72s/it][A
 30%|███       | 3/10 [00:14<00:33,  4.73s/it][A
 40%|████      | 4/10 [00:18<00:28,  4.74s/it][A
 50%|█████     | 5/10 [00:23<00:23,  4.75s/it][A
 60%|██████    | 6/10 [00:28<00:18,  4.74s/it][A
 70%|███████   | 7/10 [00:33<00:14,  4.75s/it][A
 80%|████████

drift: 17.485859619105664
Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, 


100%|██████████| 10/10 [00:00<00:00, 365.25it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  5.89it/s][A
 20%|██        | 2/10 [00:00<00:01,  5.88it/s][A
 30%|███       | 3/10 [00:00<00:01,  5.88it/s][A
 40%|████      | 4/10 [00:00<00:01,  5.88it/s][A
 50%|█████     | 5/10 [00:00<00:00,  5.88it/s][A
 60%|██████    | 6/10 [00:01<00:00,  5.88it/s][A
 70%|███████   | 7/10 [00:01<00:00,  5.88it/s][A
 80%|████████  | 8/10 [00:01<00:00,  5.88it/s][A
 90%|█████████ | 9/10 [00:01<00:00,  5.88it/s][A
100%|██████████| 10/10 [00:01<00:00,  5.88it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:04<00:42,  4.73s/it][A
 20%|██        | 2/10 [00:09<00:37,  4.73s/it][A
 30%|███       | 3/10 [00:14<00:33,  4.74s/it][A
 40%|████      | 4/10 [00:18<00:28,  4.74s/it][A
 50%|█████     | 5/10 [00:23<00:23,  4.74s/it][A
 60%|██████    | 6/10 [00:28<00:18,  4.74s/it][A
 70%|███████   | 7/10 [00:33<00:14,  4.77s/it][A
 80%|████████

drift: 11.26888823583639
Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, o


100%|██████████| 10/10 [00:00<00:00, 365.95it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  5.87it/s][A
 20%|██        | 2/10 [00:00<00:01,  5.86it/s][A
 30%|███       | 3/10 [00:00<00:01,  5.86it/s][A
 40%|████      | 4/10 [00:00<00:01,  5.86it/s][A
 50%|█████     | 5/10 [00:00<00:00,  5.85it/s][A
 60%|██████    | 6/10 [00:01<00:00,  5.85it/s][A
 70%|███████   | 7/10 [00:01<00:00,  5.85it/s][A
 80%|████████  | 8/10 [00:01<00:00,  5.84it/s][A
 90%|█████████ | 9/10 [00:01<00:00,  5.85it/s][A
100%|██████████| 10/10 [00:01<00:00,  5.85it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:04<00:42,  4.71s/it][A
 20%|██        | 2/10 [00:09<00:37,  4.72s/it][A
 30%|███       | 3/10 [00:14<00:33,  4.73s/it][A
 40%|████      | 4/10 [00:18<00:28,  4.74s/it][A
 50%|█████     | 5/10 [00:23<00:23,  4.75s/it][A
 60%|██████    | 6/10 [00:28<00:19,  4.78s/it][A
 70%|███████   | 7/10 [00:33<00:14,  4.77s/it][A
 80%|████████

drift: 6.863647354128857
Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, o


100%|██████████| 10/10 [00:00<00:00, 360.06it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  5.90it/s][A
 20%|██        | 2/10 [00:00<00:01,  5.89it/s][A
 30%|███       | 3/10 [00:00<00:01,  5.89it/s][A
 40%|████      | 4/10 [00:00<00:01,  5.88it/s][A
 50%|█████     | 5/10 [00:00<00:00,  5.88it/s][A
 60%|██████    | 6/10 [00:01<00:00,  5.88it/s][A
 70%|███████   | 7/10 [00:01<00:00,  5.88it/s][A
 80%|████████  | 8/10 [00:01<00:00,  5.88it/s][A
 90%|█████████ | 9/10 [00:01<00:00,  5.88it/s][A
100%|██████████| 10/10 [00:01<00:00,  5.88it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:04<00:42,  4.71s/it][A
 20%|██        | 2/10 [00:09<00:37,  4.72s/it][A
 30%|███       | 3/10 [00:14<00:33,  4.74s/it][A
 40%|████      | 4/10 [00:18<00:28,  4.74s/it][A
 50%|█████     | 5/10 [00:23<00:23,  4.78s/it][A
 60%|██████    | 6/10 [00:28<00:19,  4.77s/it][A
 70%|███████   | 7/10 [00:33<00:14,  4.76s/it][A
 80%|████████

Lifelong ROC-AUC: 0.9300602880705339, BWT: -7.23431765690119e-05, FWT: 0.598798912880806





In [None]:
R_ssf = evaluation_protocol(T, E, Y, RCA(epochs=10), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")

In [12]:
R_naive = evaluation_protocol(T, E, Y, RCA(epochs=10), strategy="naive")
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


100%|██████████| 10/10 [00:00<00:00, 1066.60it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:05<00:48,  5.35s/it][A
 20%|██        | 2/10 [00:10<00:39,  4.99s/it][A
 30%|███       | 3/10 [00:14<00:34,  4.88s/it][A
 40%|████      | 4/10 [00:19<00:28,  4.82s/it][A
 50%|█████     | 5/10 [00:24<00:24,  4.81s/it][A
 60%|██████    | 6/10 [00:29<00:19,  4.79s/it][A
 70%|███████   | 7/10 [00:33<00:14,  4.77s/it][A
 80%|████████  | 8/10 [00:38<00:09,  4.74s/it][A
 90%|█████████ | 9/10 [00:43<00:04,  4.73s/it][A
100%|██████████| 10/10 [00:47<00:00,  4.80s/it][A

  0%|          | 0/10 [00:00<?, ?it/s][A
100%|██████████| 10/10 [00:00<00:00, 50.34it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  6.29it/s][A
 20%|██        | 2/10 [00:00<00:01,  6.27it/s][A
 30%|███       | 3/10 [00:00<00:01,  6.26it/s][A
 40%|████      | 4/10 [00:00<00:00,  6.25it/s][A
 50%|█████     | 5/10 [00:00<00:00,  6.24it/s][A
 60%|██████    | 6/

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


100%|██████████| 10/10 [00:00<00:00, 560.42it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:04<00:42,  4.67s/it][A
 20%|██        | 2/10 [00:09<00:37,  4.69s/it][A
 30%|███       | 3/10 [00:14<00:32,  4.71s/it][A
 40%|████      | 4/10 [00:18<00:28,  4.71s/it][A
 50%|█████     | 5/10 [00:23<00:23,  4.72s/it][A
 60%|██████    | 6/10 [00:28<00:18,  4.73s/it][A
 70%|███████   | 7/10 [00:33<00:14,  4.76s/it][A
 80%|████████  | 8/10 [00:37<00:09,  4.75s/it][A
 90%|█████████ | 9/10 [00:42<00:04,  4.74s/it][A
100%|██████████| 10/10 [00:47<00:00,  4.73s/it][A

  0%|          | 0/10 [00:00<?, ?it/s][A
100%|██████████| 10/10 [00:00<00:00, 50.38it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  6.28it/s][A
 20%|██        | 2/10 [00:00<00:01,  6.25it/s][A
 30%|███       | 3/10 [00:00<00:01,  6.24it/s][A
 40%|████      | 4/10 [00:00<00:00,  6.23it/s][A
 50%|█████     | 5/10 [00:00<00:00,  6.23it/s][A
 60%|██████    | 6/1

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


100%|██████████| 10/10 [00:00<00:00, 1098.79it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:04<00:41,  4.66s/it][A
 20%|██        | 2/10 [00:09<00:37,  4.70s/it][A
 30%|███       | 3/10 [00:14<00:32,  4.71s/it][A
 40%|████      | 4/10 [00:18<00:28,  4.72s/it][A
 50%|█████     | 5/10 [00:23<00:23,  4.72s/it][A
 60%|██████    | 6/10 [00:28<00:18,  4.73s/it][A
 70%|███████   | 7/10 [00:33<00:14,  4.73s/it][A
 80%|████████  | 8/10 [00:37<00:09,  4.73s/it][A
 90%|█████████ | 9/10 [00:42<00:04,  4.76s/it][A
100%|██████████| 10/10 [00:47<00:00,  4.73s/it][A

  0%|          | 0/10 [00:00<?, ?it/s][A
100%|██████████| 10/10 [00:00<00:00, 50.36it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  6.28it/s][A
 20%|██        | 2/10 [00:00<00:01,  6.25it/s][A
 30%|███       | 3/10 [00:00<00:01,  6.26it/s][A
 40%|████      | 4/10 [00:00<00:00,  6.25it/s][A
 50%|█████     | 5/10 [00:00<00:00,  6.25it/s][A
 60%|██████    | 6/

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


100%|██████████| 10/10 [00:00<00:00, 1074.77it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:04<00:42,  4.69s/it][A
 20%|██        | 2/10 [00:09<00:37,  4.72s/it][A
 30%|███       | 3/10 [00:14<00:33,  4.73s/it][A
 40%|████      | 4/10 [00:18<00:28,  4.74s/it][A
 50%|█████     | 5/10 [00:23<00:23,  4.74s/it][A
 60%|██████    | 6/10 [00:28<00:18,  4.74s/it][A
 70%|███████   | 7/10 [00:33<00:14,  4.74s/it][A
 80%|████████  | 8/10 [00:37<00:09,  4.73s/it][A
 90%|█████████ | 9/10 [00:42<00:04,  4.73s/it][A
100%|██████████| 10/10 [00:47<00:00,  4.73s/it][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 50%|█████     | 5/10 [00:00<00:00, 49.90it/s][A
100%|██████████| 10/10 [00:00<00:00, 49.44it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  6.21it/s][A
 20%|██        | 2/10 [00:00<00:01,  6.20it/s][A
 30%|███       | 3/10 [00:00<00:01,  6.20it/s][A
 40%|████      | 4/10 [00:00<00:00,  6.19it/s][A
 50%|█████     | 5

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


100%|██████████| 10/10 [00:00<00:00, 1009.02it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:04<00:43,  4.83s/it][A
 20%|██        | 2/10 [00:09<00:38,  4.79s/it][A
 30%|███       | 3/10 [00:14<00:33,  4.77s/it][A
 40%|████      | 4/10 [00:19<00:28,  4.77s/it][A
 50%|█████     | 5/10 [00:23<00:23,  4.76s/it][A
 60%|██████    | 6/10 [00:28<00:19,  4.76s/it][A
 70%|███████   | 7/10 [00:33<00:14,  4.77s/it][A
 80%|████████  | 8/10 [00:38<00:09,  4.76s/it][A
 90%|█████████ | 9/10 [00:42<00:04,  4.76s/it][A
100%|██████████| 10/10 [00:47<00:00,  4.77s/it][A

  0%|          | 0/10 [00:00<?, ?it/s][A
100%|██████████| 10/10 [00:00<00:00, 50.00it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  6.20it/s][A
 20%|██        | 2/10 [00:00<00:01,  6.18it/s][A
 30%|███       | 3/10 [00:00<00:01,  6.18it/s][A
 40%|████      | 4/10 [00:00<00:00,  6.19it/s][A
 50%|█████     | 5/10 [00:00<00:00,  6.20it/s][A
 60%|██████    | 6/

Lifelong PR-AUC: 0.9858790768979343, BWT: 1.4620844117740894e-05, FWT: 0.9160177796181503





In [13]:
R_replay = evaluation_protocol(T, E, Y, RCA(epochs=10), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


100%|██████████| 10/10 [00:00<00:00, 1075.46it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:04<00:42,  4.73s/it][A
 20%|██        | 2/10 [00:09<00:37,  4.74s/it][A
 30%|███       | 3/10 [00:14<00:33,  4.79s/it][A
 40%|████      | 4/10 [00:19<00:28,  4.78s/it][A
 50%|█████     | 5/10 [00:23<00:23,  4.78s/it][A
 60%|██████    | 6/10 [00:28<00:19,  4.77s/it][A
 70%|███████   | 7/10 [00:33<00:14,  4.76s/it][A
 80%|████████  | 8/10 [00:38<00:09,  4.76s/it][A
 90%|█████████ | 9/10 [00:42<00:04,  4.76s/it][A
100%|██████████| 10/10 [00:47<00:00,  4.76s/it][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 50%|█████     | 5/10 [00:00<00:00, 49.67it/s][A
100%|██████████| 10/10 [00:00<00:00, 49.36it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  6.25it/s][A
 20%|██        | 2/10 [00:00<00:01,  6.21it/s][A
 30%|███       | 3/10 [00:00<00:01,  6.20it/s][A
 40%|████      | 4/10 [00:00<00:00,  6.19it/s][A
 50%|█████     | 5

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


100%|██████████| 10/10 [00:00<00:00, 530.85it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:04<00:42,  4.73s/it][A
 20%|██        | 2/10 [00:09<00:38,  4.80s/it][A
 30%|███       | 3/10 [00:14<00:33,  4.77s/it][A
 40%|████      | 4/10 [00:19<00:28,  4.76s/it][A
 50%|█████     | 5/10 [00:23<00:23,  4.76s/it][A
 60%|██████    | 6/10 [00:28<00:19,  4.75s/it][A
 70%|███████   | 7/10 [00:33<00:14,  4.75s/it][A
 80%|████████  | 8/10 [00:38<00:09,  4.75s/it][A
 90%|█████████ | 9/10 [00:42<00:04,  4.75s/it][A
100%|██████████| 10/10 [00:47<00:00,  4.76s/it][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 50%|█████     | 5/10 [00:00<00:00, 49.93it/s][A
100%|██████████| 10/10 [00:00<00:00, 49.32it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  6.23it/s][A
 20%|██        | 2/10 [00:00<00:01,  6.21it/s][A
 30%|███       | 3/10 [00:00<00:01,  6.21it/s][A
 40%|████      | 4/10 [00:00<00:00,  6.22it/s][A
 50%|█████     | 5/

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


100%|██████████| 10/10 [00:00<00:00, 529.84it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:04<00:42,  4.73s/it][A
 20%|██        | 2/10 [00:09<00:37,  4.74s/it][A
 30%|███       | 3/10 [00:14<00:33,  4.74s/it][A
 40%|████      | 4/10 [00:19<00:28,  4.79s/it][A
 50%|█████     | 5/10 [00:23<00:23,  4.77s/it][A
 60%|██████    | 6/10 [00:28<00:19,  4.76s/it][A
 70%|███████   | 7/10 [00:33<00:14,  4.76s/it][A
 80%|████████  | 8/10 [00:38<00:09,  4.75s/it][A
 90%|█████████ | 9/10 [00:42<00:04,  4.75s/it][A
100%|██████████| 10/10 [00:47<00:00,  4.75s/it][A

  0%|          | 0/10 [00:00<?, ?it/s][A
100%|██████████| 10/10 [00:00<00:00, 49.87it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  6.22it/s][A
 20%|██        | 2/10 [00:00<00:01,  6.19it/s][A
 30%|███       | 3/10 [00:00<00:01,  6.18it/s][A
 40%|████      | 4/10 [00:00<00:00,  6.16it/s][A
 50%|█████     | 5/10 [00:00<00:00,  6.17it/s][A
 60%|██████    | 6/1

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


100%|██████████| 10/10 [00:00<00:00, 514.83it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:04<00:42,  4.72s/it][A
 20%|██        | 2/10 [00:09<00:37,  4.74s/it][A
 30%|███       | 3/10 [00:14<00:33,  4.74s/it][A
 40%|████      | 4/10 [00:18<00:28,  4.74s/it][A
 50%|█████     | 5/10 [00:23<00:23,  4.72s/it][A
 60%|██████    | 6/10 [00:28<00:19,  4.76s/it][A
 70%|███████   | 7/10 [00:33<00:14,  4.75s/it][A
 80%|████████  | 8/10 [00:37<00:09,  4.74s/it][A
 90%|█████████ | 9/10 [00:42<00:04,  4.73s/it][A
100%|██████████| 10/10 [00:47<00:00,  4.74s/it][A

  0%|          | 0/10 [00:00<?, ?it/s][A
100%|██████████| 10/10 [00:00<00:00, 49.95it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  6.27it/s][A
 20%|██        | 2/10 [00:00<00:01,  6.26it/s][A
 30%|███       | 3/10 [00:00<00:01,  6.27it/s][A
 40%|████      | 4/10 [00:00<00:00,  6.24it/s][A
 50%|█████     | 5/10 [00:00<00:00,  6.25it/s][A
 60%|██████    | 6/1

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=43, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


100%|██████████| 10/10 [00:00<00:00, 363.32it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:04<00:42,  4.68s/it][A
 20%|██        | 2/10 [00:09<00:37,  4.69s/it][A
 30%|███       | 3/10 [00:14<00:32,  4.71s/it][A
 40%|████      | 4/10 [00:18<00:28,  4.72s/it][A
 50%|█████     | 5/10 [00:23<00:23,  4.77s/it][A
 60%|██████    | 6/10 [00:28<00:19,  4.76s/it][A
 70%|███████   | 7/10 [00:33<00:14,  4.75s/it][A
 80%|████████  | 8/10 [00:37<00:09,  4.76s/it][A
 90%|█████████ | 9/10 [00:42<00:04,  4.75s/it][A
100%|██████████| 10/10 [00:47<00:00,  4.74s/it][A

  0%|          | 0/10 [00:00<?, ?it/s][A
100%|██████████| 10/10 [00:00<00:00, 50.47it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  6.30it/s][A
 20%|██        | 2/10 [00:00<00:01,  6.26it/s][A
 30%|███       | 3/10 [00:00<00:01,  6.28it/s][A
 40%|████      | 4/10 [00:00<00:00,  6.27it/s][A
 50%|█████     | 5/10 [00:00<00:00,  6.25it/s][A
 60%|██████    | 6/1

Lifelong PR-AUC: 0.9858643959540109, BWT: -2.683552163018632e-06, FWT: 0.9160174323186847





# RDP

In [14]:
from deepod.models.tabular import RDP

In [14]:
R_hm = evaluation_protocol(T, E, Y, RDP(epochs=10), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=43, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.004941, time: 0.0s
epoch 10, training loss: 0.008679, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 2/2 [00:00<00:00, 925.69it/s]

testing:   0%|          | 0/189 [00:00<?, ?it/s][A
testing: 100%|██████████| 189/189 [00:00<00:00, 1056.08it/s][A

testing:   0%|          | 0/5221 [00:00<?, ?it/s][A
testing:   2%|▏         | 106/5221 [00:00<00:04, 1052.75it/s][A
testing:   4%|▍         | 212/5221 [00:00<00:04, 1054.12it/s][A
testing:   6%|▌         | 318/5221 [00:00<00:04, 1054.63it/s][A
testing:   8%|▊         | 424/5221 [00:00<00:04, 1055.42it/s][A
testing:  10%|█         | 530/5221 [00:00<00:04, 1055.34it/s][A
testing:  12%|█▏        | 637/5221 [00:00<00:04, 1057.70it/s][A
testing:  14%|█▍        | 744/5221 [00:00<00:04, 1058.87it/s][A
testing:  16%|█▋        | 851/5221 [00:00<00:04, 1059.59it/s][A
testing:  18%|█▊        | 957/5221 [00:00<00:04, 1058.62it/s][A
testing:  20%|██        | 1064/5221 [00:01<00:03, 1059.35it/s][A
testing:  22%|██▏       | 1171/5221 [00:01<00:03, 1060.68it/s][A
testing:  24%|██▍       | 1278/5221 [00:01<00:03, 1061.

drift: 9.033929752227811
Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=43, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.036421, time: 0.0s
epoch 10, training loss: 0.001843, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 3/3 [00:00<00:00, 975.65it/s]

testing:   0%|          | 0/189 [00:00<?, ?it/s][A
testing: 100%|██████████| 189/189 [00:00<00:00, 1061.96it/s][A

testing:   0%|          | 0/5221 [00:00<?, ?it/s][A
testing:   2%|▏         | 107/5221 [00:00<00:04, 1061.81it/s][A
testing:   4%|▍         | 214/5221 [00:00<00:04, 1059.09it/s][A
testing:   6%|▌         | 321/5221 [00:00<00:04, 1059.72it/s][A
testing:   8%|▊         | 428/5221 [00:00<00:04, 1060.95it/s][A
testing:  10%|█         | 535/5221 [00:00<00:04, 1060.43it/s][A
testing:  12%|█▏        | 642/5221 [00:00<00:04, 1061.20it/s][A
testing:  14%|█▍        | 749/5221 [00:00<00:04, 1061.09it/s][A
testing:  16%|█▋        | 856/5221 [00:00<00:04, 1060.97it/s][A
testing:  18%|█▊        | 963/5221 [00:00<00:04, 1059.35it/s][A
testing:  20%|██        | 1070/5221 [00:01<00:03, 1059.46it/s][A
testing:  23%|██▎       | 1176/5221 [00:01<00:03, 1059.07it/s][A
testing:  25%|██▍       | 1282/5221 [00:01<00:03, 1059.

drift: 17.485859619105664
Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=43, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.020872, time: 0.0s
epoch 10, training loss: 0.002741, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 3/3 [00:00<00:00, 973.31it/s]

testing:   0%|          | 0/189 [00:00<?, ?it/s][A
testing: 100%|██████████| 189/189 [00:00<00:00, 1060.93it/s][A

testing:   0%|          | 0/5221 [00:00<?, ?it/s][A
testing:   2%|▏         | 106/5221 [00:00<00:04, 1058.75it/s][A
testing:   4%|▍         | 213/5221 [00:00<00:04, 1060.57it/s][A
testing:   6%|▌         | 320/5221 [00:00<00:04, 1061.04it/s][A
testing:   8%|▊         | 427/5221 [00:00<00:04, 1061.38it/s][A
testing:  10%|█         | 534/5221 [00:00<00:04, 1062.05it/s][A
testing:  12%|█▏        | 641/5221 [00:00<00:04, 1061.79it/s][A
testing:  14%|█▍        | 748/5221 [00:00<00:04, 1063.03it/s][A
testing:  16%|█▋        | 855/5221 [00:00<00:04, 1060.69it/s][A
testing:  18%|█▊        | 962/5221 [00:00<00:04, 1060.48it/s][A
testing:  20%|██        | 1069/5221 [00:01<00:03, 1061.58it/s][A
testing:  23%|██▎       | 1176/5221 [00:01<00:03, 1062.35it/s][A
testing:  25%|██▍       | 1283/5221 [00:01<00:03, 1062.

drift: 11.26888823583639
Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=43, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.031406, time: 0.0s
epoch 10, training loss: 0.004282, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 3/3 [00:00<00:00, 983.50it/s]

testing:   0%|          | 0/189 [00:00<?, ?it/s][A
testing: 100%|██████████| 189/189 [00:00<00:00, 1060.65it/s][A

testing:   0%|          | 0/5221 [00:00<?, ?it/s][A
testing:   2%|▏         | 107/5221 [00:00<00:04, 1062.45it/s][A
testing:   4%|▍         | 214/5221 [00:00<00:04, 1058.82it/s][A
testing:   6%|▌         | 320/5221 [00:00<00:04, 1058.39it/s][A
testing:   8%|▊         | 426/5221 [00:00<00:04, 1057.53it/s][A
testing:  10%|█         | 532/5221 [00:00<00:04, 1058.15it/s][A
testing:  12%|█▏        | 638/5221 [00:00<00:04, 1058.15it/s][A
testing:  14%|█▍        | 744/5221 [00:00<00:04, 1058.71it/s][A
testing:  16%|█▋        | 850/5221 [00:00<00:04, 1059.11it/s][A
testing:  18%|█▊        | 956/5221 [00:00<00:04, 1059.15it/s][A
testing:  20%|██        | 1062/5221 [00:01<00:03, 1058.16it/s][A
testing:  22%|██▏       | 1169/5221 [00:01<00:03, 1059.12it/s][A
testing:  24%|██▍       | 1275/5221 [00:01<00:03, 1058.

drift: 6.863647354128857
Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=43, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.009494, time: 0.0s
epoch 10, training loss: 0.002171, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 3/3 [00:00<00:00, 957.82it/s]

testing:   0%|          | 0/189 [00:00<?, ?it/s][A
testing: 100%|██████████| 189/189 [00:00<00:00, 1056.89it/s][A

testing:   0%|          | 0/5221 [00:00<?, ?it/s][A
testing:   2%|▏         | 106/5221 [00:00<00:04, 1057.26it/s][A
testing:   4%|▍         | 212/5221 [00:00<00:04, 1058.01it/s][A
testing:   6%|▌         | 319/5221 [00:00<00:04, 1059.32it/s][A
testing:   8%|▊         | 425/5221 [00:00<00:04, 1058.99it/s][A
testing:  10%|█         | 532/5221 [00:00<00:04, 1060.85it/s][A
testing:  12%|█▏        | 639/5221 [00:00<00:04, 1060.36it/s][A
testing:  14%|█▍        | 746/5221 [00:00<00:04, 1061.50it/s][A
testing:  16%|█▋        | 853/5221 [00:00<00:04, 1061.62it/s][A
testing:  18%|█▊        | 960/5221 [00:00<00:04, 1062.03it/s][A
testing:  20%|██        | 1067/5221 [00:01<00:03, 1062.42it/s][A
testing:  22%|██▏       | 1174/5221 [00:01<00:03, 1062.55it/s][A
testing:  25%|██▍       | 1281/5221 [00:01<00:03, 1062.

Lifelong ROC-AUC: 0.8714016611717592, BWT: 0.08574400357883394, FWT: 0.5969335576215077





In [None]:
R_ssf = evaluation_protocol(T, E, Y, RDP(epochs=10), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")

In [15]:
R_naive = evaluation_protocol(T, E, Y, RDP(epochs=10), strategy="naive")
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=43, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000000, time: 0.0s
epoch 10, training loss: 0.000127, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 1/1 [00:00<00:00, 835.52it/s]

testing:   0%|          | 0/5210 [00:00<?, ?it/s][A
testing:   2%|▏         | 107/5210 [00:00<00:04, 1063.45it/s][A
testing:   4%|▍         | 214/5210 [00:00<00:04, 1057.55it/s][A
testing:   6%|▌         | 320/5210 [00:00<00:04, 1058.61it/s][A
testing:   8%|▊         | 427/5210 [00:00<00:04, 1061.46it/s][A
testing:  10%|█         | 535/5210 [00:00<00:04, 1065.36it/s][A
testing:  12%|█▏        | 642/5210 [00:00<00:04, 1065.62it/s][A
testing:  14%|█▍        | 749/5210 [00:00<00:04, 1065.82it/s][A
testing:  16%|█▋        | 856/5210 [00:00<00:04, 1065.16it/s][A
testing:  18%|█▊        | 963/5210 [00:00<00:03, 1064.25it/s][A
testing:  21%|██        | 1071/5210 [00:01<00:03, 1066.27it/s][A
testing:  23%|██▎       | 1178/5210 [00:01<00:03, 1066.38it/s][A
testing:  25%|██▍       | 1285/5210 [00:01<00:03, 1066.73it/s][A
testing:  27%|██▋       | 1392/5210 [00:01<00:03, 1060.01it/s][A
testing:  29%|██▉       | 1499/5210 [00:

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=43, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.005491, time: 0.0s
epoch 10, training loss: 0.008794, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 2/2 [00:00<00:00, 958.92it/s]

testing:   0%|          | 0/5210 [00:00<?, ?it/s][A
testing:   2%|▏         | 106/5210 [00:00<00:04, 1059.78it/s][A
testing:   4%|▍         | 214/5210 [00:00<00:04, 1067.04it/s][A
testing:   6%|▌         | 321/5210 [00:00<00:04, 1066.76it/s][A
testing:   8%|▊         | 428/5210 [00:00<00:04, 1062.88it/s][A
testing:  10%|█         | 535/5210 [00:00<00:04, 1062.26it/s][A
testing:  12%|█▏        | 642/5210 [00:00<00:04, 1060.05it/s][A
testing:  14%|█▍        | 749/5210 [00:00<00:04, 1059.55it/s][A
testing:  16%|█▋        | 855/5210 [00:00<00:04, 1057.68it/s][A
testing:  18%|█▊        | 961/5210 [00:00<00:04, 1056.95it/s][A
testing:  20%|██        | 1067/5210 [00:01<00:03, 1057.41it/s][A
testing:  23%|██▎       | 1173/5210 [00:01<00:03, 1056.01it/s][A
testing:  25%|██▍       | 1279/5210 [00:01<00:03, 1055.60it/s][A
testing:  27%|██▋       | 1385/5210 [00:01<00:03, 1051.97it/s][A
testing:  29%|██▊       | 1491/5210 [00:

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=43, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000000, time: 0.0s
epoch 10, training loss: 0.044568, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 1/1 [00:00<00:00, 893.55it/s]

testing:   0%|          | 0/5210 [00:00<?, ?it/s][A
testing:   2%|▏         | 107/5210 [00:00<00:04, 1061.29it/s][A
testing:   4%|▍         | 214/5210 [00:00<00:04, 1061.04it/s][A
testing:   6%|▌         | 321/5210 [00:00<00:04, 1063.88it/s][A
testing:   8%|▊         | 428/5210 [00:00<00:04, 1065.42it/s][A
testing:  10%|█         | 535/5210 [00:00<00:04, 1066.60it/s][A
testing:  12%|█▏        | 643/5210 [00:00<00:04, 1067.91it/s][A
testing:  14%|█▍        | 750/5210 [00:00<00:04, 1063.23it/s][A
testing:  16%|█▋        | 857/5210 [00:00<00:04, 1063.02it/s][A
testing:  19%|█▊        | 964/5210 [00:00<00:03, 1062.34it/s][A
testing:  21%|██        | 1071/5210 [00:01<00:03, 1061.18it/s][A
testing:  23%|██▎       | 1178/5210 [00:01<00:03, 1061.40it/s][A
testing:  25%|██▍       | 1285/5210 [00:01<00:03, 1063.45it/s][A
testing:  27%|██▋       | 1392/5210 [00:01<00:03, 1063.16it/s][A
testing:  29%|██▉       | 1499/5210 [00:

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=43, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000000, time: 0.0s
epoch 10, training loss: 0.000643, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 1/1 [00:00<00:00, 648.77it/s]

testing:   0%|          | 0/5210 [00:00<?, ?it/s][A
testing:   2%|▏         | 107/5210 [00:00<00:04, 1062.94it/s][A
testing:   4%|▍         | 214/5210 [00:00<00:04, 1061.25it/s][A
testing:   6%|▌         | 321/5210 [00:00<00:04, 1062.92it/s][A
testing:   8%|▊         | 428/5210 [00:00<00:04, 1064.93it/s][A
testing:  10%|█         | 535/5210 [00:00<00:04, 1065.74it/s][A
testing:  12%|█▏        | 642/5210 [00:00<00:04, 1065.67it/s][A
testing:  14%|█▍        | 749/5210 [00:00<00:04, 1066.56it/s][A
testing:  16%|█▋        | 857/5210 [00:00<00:04, 1067.68it/s][A
testing:  19%|█▊        | 964/5210 [00:00<00:03, 1064.99it/s][A
testing:  21%|██        | 1071/5210 [00:01<00:03, 1064.72it/s][A
testing:  23%|██▎       | 1178/5210 [00:01<00:03, 1065.89it/s][A
testing:  25%|██▍       | 1285/5210 [00:01<00:03, 1065.23it/s][A
testing:  27%|██▋       | 1392/5210 [00:01<00:03, 1065.53it/s][A
testing:  29%|██▉       | 1499/5210 [00:

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=43, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000000, time: 0.0s
epoch 10, training loss: 0.000032, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 1/1 [00:00<00:00, 795.88it/s]

testing:   0%|          | 0/5210 [00:00<?, ?it/s][A
testing:   2%|▏         | 107/5210 [00:00<00:04, 1063.95it/s][A
testing:   4%|▍         | 214/5210 [00:00<00:04, 1057.29it/s][A
testing:   6%|▌         | 321/5210 [00:00<00:04, 1061.10it/s][A
testing:   8%|▊         | 428/5210 [00:00<00:04, 1063.32it/s][A
testing:  10%|█         | 535/5210 [00:00<00:04, 1064.75it/s][A
testing:  12%|█▏        | 642/5210 [00:00<00:04, 1065.29it/s][A
testing:  14%|█▍        | 749/5210 [00:00<00:04, 1059.55it/s][A
testing:  16%|█▋        | 856/5210 [00:00<00:04, 1061.49it/s][A
testing:  18%|█▊        | 963/5210 [00:00<00:03, 1063.43it/s][A
testing:  21%|██        | 1070/5210 [00:01<00:03, 1062.84it/s][A
testing:  23%|██▎       | 1177/5210 [00:01<00:03, 1063.71it/s][A
testing:  25%|██▍       | 1284/5210 [00:01<00:03, 1063.04it/s][A
testing:  27%|██▋       | 1391/5210 [00:01<00:03, 1062.00it/s][A
testing:  29%|██▉       | 1498/5210 [00:

Lifelong PR-AUC: 0.9856491053888253, BWT: 0.0005282834578947959, FWT: 0.9160205394392686





In [16]:
R_replay = evaluation_protocol(T, E, Y, RDP(epochs=10), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=43, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000000, time: 0.0s
epoch 10, training loss: 0.000127, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 1/1 [00:00<00:00, 893.74it/s]

testing:   0%|          | 0/5210 [00:00<?, ?it/s][A
testing:   2%|▏         | 107/5210 [00:00<00:04, 1062.26it/s][A
testing:   4%|▍         | 214/5210 [00:00<00:04, 1057.69it/s][A
testing:   6%|▌         | 320/5210 [00:00<00:04, 1054.07it/s][A
testing:   8%|▊         | 426/5210 [00:00<00:04, 1055.61it/s][A
testing:  10%|█         | 534/5210 [00:00<00:04, 1061.53it/s][A
testing:  12%|█▏        | 641/5210 [00:00<00:04, 1064.28it/s][A
testing:  14%|█▍        | 748/5210 [00:00<00:04, 1063.20it/s][A
testing:  16%|█▋        | 855/5210 [00:00<00:04, 1058.99it/s][A
testing:  18%|█▊        | 962/5210 [00:00<00:04, 1059.50it/s][A
testing:  21%|██        | 1069/5210 [00:01<00:03, 1060.79it/s][A
testing:  23%|██▎       | 1176/5210 [00:01<00:03, 1062.20it/s][A
testing:  25%|██▍       | 1283/5210 [00:01<00:03, 1064.24it/s][A
testing:  27%|██▋       | 1390/5210 [00:01<00:03, 1062.31it/s][A
testing:  29%|██▊       | 1497/5210 [00:

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=43, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.004896, time: 0.0s
epoch 10, training loss: 0.007682, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 2/2 [00:00<00:00, 935.92it/s]

testing:   0%|          | 0/5210 [00:00<?, ?it/s][A
testing:   2%|▏         | 107/5210 [00:00<00:04, 1062.93it/s][A
testing:   4%|▍         | 214/5210 [00:00<00:04, 1063.01it/s][A
testing:   6%|▌         | 321/5210 [00:00<00:04, 1065.88it/s][A
testing:   8%|▊         | 428/5210 [00:00<00:04, 1065.53it/s][A
testing:  10%|█         | 535/5210 [00:00<00:04, 1066.24it/s][A
testing:  12%|█▏        | 642/5210 [00:00<00:04, 1063.66it/s][A
testing:  14%|█▍        | 749/5210 [00:00<00:04, 1065.13it/s][A
testing:  16%|█▋        | 856/5210 [00:00<00:04, 1062.99it/s][A
testing:  18%|█▊        | 963/5210 [00:00<00:03, 1062.32it/s][A
testing:  21%|██        | 1070/5210 [00:01<00:03, 1063.48it/s][A
testing:  23%|██▎       | 1177/5210 [00:01<00:03, 1061.61it/s][A
testing:  25%|██▍       | 1284/5210 [00:01<00:03, 1061.43it/s][A
testing:  27%|██▋       | 1391/5210 [00:01<00:03, 1062.67it/s][A
testing:  29%|██▉       | 1498/5210 [00:

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=43, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.005696, time: 0.0s
epoch 10, training loss: 0.006116, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 2/2 [00:00<00:00, 958.37it/s]

testing:   0%|          | 0/5210 [00:00<?, ?it/s][A
testing:   2%|▏         | 106/5210 [00:00<00:04, 1058.53it/s][A
testing:   4%|▍         | 213/5210 [00:00<00:04, 1061.80it/s][A
testing:   6%|▌         | 320/5210 [00:00<00:04, 1061.90it/s][A
testing:   8%|▊         | 427/5210 [00:00<00:04, 1062.65it/s][A
testing:  10%|█         | 534/5210 [00:00<00:04, 1063.62it/s][A
testing:  12%|█▏        | 641/5210 [00:00<00:04, 1063.34it/s][A
testing:  14%|█▍        | 748/5210 [00:00<00:04, 1063.92it/s][A
testing:  16%|█▋        | 855/5210 [00:00<00:04, 1063.15it/s][A
testing:  18%|█▊        | 962/5210 [00:00<00:03, 1064.98it/s][A
testing:  21%|██        | 1069/5210 [00:01<00:03, 1062.05it/s][A
testing:  23%|██▎       | 1176/5210 [00:01<00:03, 1055.72it/s][A
testing:  25%|██▍       | 1283/5210 [00:01<00:03, 1058.30it/s][A
testing:  27%|██▋       | 1390/5210 [00:01<00:03, 1060.57it/s][A
testing:  29%|██▊       | 1497/5210 [00:

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=43, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.004558, time: 0.0s
epoch 10, training loss: 0.005264, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 2/2 [00:00<00:00, 925.38it/s]

testing:   0%|          | 0/5210 [00:00<?, ?it/s][A
testing:   2%|▏         | 107/5210 [00:00<00:04, 1063.02it/s][A
testing:   4%|▍         | 214/5210 [00:00<00:04, 1059.15it/s][A
testing:   6%|▌         | 321/5210 [00:00<00:04, 1061.27it/s][A
testing:   8%|▊         | 428/5210 [00:00<00:04, 1063.40it/s][A
testing:  10%|█         | 535/5210 [00:00<00:04, 1064.90it/s][A
testing:  12%|█▏        | 642/5210 [00:00<00:04, 1063.93it/s][A
testing:  14%|█▍        | 749/5210 [00:00<00:04, 1062.34it/s][A
testing:  16%|█▋        | 856/5210 [00:00<00:04, 1061.56it/s][A
testing:  18%|█▊        | 963/5210 [00:00<00:03, 1062.38it/s][A
testing:  21%|██        | 1070/5210 [00:01<00:03, 1062.68it/s][A
testing:  23%|██▎       | 1177/5210 [00:01<00:03, 1061.71it/s][A
testing:  25%|██▍       | 1284/5210 [00:01<00:03, 1062.70it/s][A
testing:  27%|██▋       | 1391/5210 [00:01<00:03, 1062.90it/s][A
testing:  29%|██▉       | 1498/5210 [00:

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=43, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.017191, time: 0.0s
epoch 10, training loss: 0.003446, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 3/3 [00:00<00:00, 1035.37it/s]

testing:   0%|          | 0/5210 [00:00<?, ?it/s][A
testing:   2%|▏         | 107/5210 [00:00<00:04, 1069.31it/s][A
testing:   4%|▍         | 214/5210 [00:00<00:04, 1058.76it/s][A
testing:   6%|▌         | 321/5210 [00:00<00:04, 1062.32it/s][A
testing:   8%|▊         | 428/5210 [00:00<00:04, 1061.04it/s][A
testing:  10%|█         | 535/5210 [00:00<00:04, 1061.27it/s][A
testing:  12%|█▏        | 642/5210 [00:00<00:04, 1058.94it/s][A
testing:  14%|█▍        | 748/5210 [00:00<00:04, 1059.23it/s][A
testing:  16%|█▋        | 854/5210 [00:00<00:04, 1058.37it/s][A
testing:  18%|█▊        | 960/5210 [00:00<00:04, 1056.43it/s][A
testing:  20%|██        | 1066/5210 [00:01<00:03, 1055.06it/s][A
testing:  22%|██▏       | 1172/5210 [00:01<00:03, 1054.85it/s][A
testing:  25%|██▍       | 1279/5210 [00:01<00:03, 1057.10it/s][A
testing:  27%|██▋       | 1385/5210 [00:01<00:03, 1054.97it/s][A
testing:  29%|██▊       | 1491/5210 [00

Lifelong PR-AUC: 0.985353005392868, BWT: 3.835378325880434e-05, FWT: 0.9160214245775121



