# Imports

In [30]:
import numpy as np
import pandas as pd
#import matplotlib.pyplot as plt
from tqdm import tqdm
import math

from tqdm import tqdm
import time

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from copy import deepcopy

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split

from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.linear_model import SGDOneClassSVM
from sklearn.base import clone

from scipy.spatial.distance import cdist
from scipy.stats import ks_2samp
from scipy.optimize import minimize
from scipy.stats import wasserstein_distance

from sklearn.metrics import average_precision_score


In [31]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [32]:
X_train = np.load('data/x_train.npy')
y_train = np.load('data/y_train.npy')

X_test = np.load('data/x_test.npy')
y_test = np.load('data/y_test.npy')

X = np.concatenate([X_train, X_test], axis=0)
y = np.concatenate([y_train, y_test], axis=0)

y = np.where(y == 11, 0, 1)

# Setup

In [33]:
def create_phi(normal_data, c):
    """
    Concept creation function for normal data.
    Uses k-Means clustering to partition normal data into c clusters.
    
    Args:
        normal_data (numpy array): The normal data points.
        c (int): Number of desired normal concepts.
    
    Returns:
        list of numpy arrays: List of normal clusters.
    """
    kmeans = KMeans(n_clusters=c, random_state=42)
    labels = kmeans.fit_predict(normal_data)
    
    normal_concepts = [normal_data[labels == i] for i in range(c)]
    print("Finished creating normal concepts")
    
    return normal_concepts


def create_gamma(anomaly_data, c):
    """
    Concept creation function for anomaly data.
    Uses k-Means clustering to partition anomaly data into c clusters.
    
    Args:
        anomaly_data (numpy array): The anomaly data points.
        c (int): Number of desired anomaly concepts.
    
    Returns:
        list of numpy arrays: List of anomaly clusters.
    """
    kmeans = KMeans(n_clusters=c, random_state=42)
    labels = kmeans.fit_predict(anomaly_data)
    
    anomaly_concepts = [anomaly_data[labels == i] for i in range(c)]
    print("Finished creating anomaly concepts")
    
    return anomaly_concepts
    
def match_lambda(anomaly_concepts, normal_concepts):
    """
    Matches each normal concept with the closest anomaly concept.
    Uses Euclidean distance to determine the best match.
    
    Args:
        anomaly_concepts (list of numpy arrays): List of anomaly clusters.
        normal_concepts (list of numpy arrays): List of normal clusters.
    
    Returns:
        list of tuples: Pairs of (normal_concept, matched_anomaly_concept)
    """
    pairs = []
    remaining_anomalies = anomaly_concepts.copy()

    for normal_concept in normal_concepts:
        normal_centroid = np.mean(normal_concept, axis=0)
        anomaly_centroids = [np.mean(ac, axis=0) for ac in remaining_anomalies]

        distances = cdist([normal_centroid], anomaly_centroids, metric='euclidean')[0]
        closest_idx = np.argmin(distances)

        pairs.append((normal_concept, remaining_anomalies[closest_idx]))
        remaining_anomalies.pop(closest_idx)

    print("Finished matching concept pairs")
    
    return pairs

def lifelong_roc_auc(R):
    """
    Computes the Lifelong ROC-AUC metric.
    
    Args:
        R (numpy array): NxN matrix of ROC-AUC scores, where R[i, j] is the model's 
                         performance on concept j after learning concept i.
    
    Returns:
        float: Lifelong ROC-AUC score.
    """
    N = R.shape[0]
    lower_triangular_sum = np.sum(np.tril(R))
    normalization_factor = (N * (N + 1)) / 2

    return lower_triangular_sum / normalization_factor

def BWT(R):
    """
    Computes the Backward Transfer (BWT) score.
    
    Args:
        R (numpy array): NxN results matrix.
    
    Returns:
        float: BWT score.
    """
    N = R.shape[0]
    backward_transfer = 0
    count = 0

    for i in range(1, N):
        for j in range(i):
            backward_transfer += (R[i, j] - R[j, j])
            count += 1

    return backward_transfer / count if count > 0 else 0

def FWT(R):
    """
    Computes the Forward Transfer (FWT) score.
    
    Args:
        R (numpy array): NxN results matrix.
    
    Returns:
        float: FWT score.
    """
    N = R.shape[0]
    forward_transfer = 0
    count = 0

    for i in range(N):
        for j in range(i + 1, N): 
            forward_transfer += R[i, j]
            count += 1

    return forward_transfer / count if count > 0 else 0 

def kolmogorov_smirnov_test(X_old, X_new, alpha=0.05):
    """Detect concept drift using KS-test on feature distributions."""
    
    p_values = [ks_2samp(X_old[:, i], X_new[:, i]).pvalue for i in range(X_old.shape[1])]
    return np.any(np.array(p_values) < alpha)

def histogram_binning(X, bins=25):
    """Convert sample distributions into histograms."""
    
    return np.array([np.histogram(X[:, i], bins=bins, density=True)[0] for i in range(X.shape[1])]).T

def kl_divergence(P, Q):
    """Compute KL divergence between two distributions."""
    
    P, Q = np.clip(P, 1e-10, None), np.clip(Q, 1e-10, None)  # Avoid log(0)
    return np.sum(P * np.log(P / Q))

def strategic_sample_selection(X_old, X_new, top_k=100, learning_rate=0.01, num_iterations=100):
    """
    Selects representative new samples by minimizing KL divergence.
    
    Args:
        X_old (numpy.ndarray): Old memory buffer samples.
        X_new (numpy.ndarray): Incoming new samples.
        top_k (int): Number of samples to retain.
        learning_rate (float): Step size for optimization.
        num_iterations (int): Number of optimization steps.

    Returns:
        numpy.ndarray: Selected representative new samples.
    """
    
    H_old, H_new = histogram_binning(X_old), histogram_binning(X_new)
    m_n = np.random.rand(H_new.shape[0])  

    def loss_function(m_n):
        """Computes KL divergence loss for selected samples."""
        weighted_H_new = H_new * m_n[:, np.newaxis]  
        combined_H = (H_old + weighted_H_new) / 2 
        return kl_divergence(H_new, combined_H) 

    progress_bar = tqdm(total=num_iterations, desc="Optimizing Sample Selection", position=0, leave=True)

    def callback(xk):
        progress_bar.update(1)  

    result = minimize(loss_function, m_n, method="L-BFGS-B", bounds=[(0, 1)] * len(m_n), 
                      options={"maxiter": num_iterations, "ftol": 1e-10}, callback=callback)

    progress_bar.close()

    selected_indices = np.argsort(result.x)[-top_k:]

    return X_new[selected_indices] 


def update_memory_buffer(X_old, X_new_selected, memory_size=3000):
    """Updates memory buffer using strategic forgetting."""
    updated_buffer = np.vstack((X_old, X_new_selected))  

    if updated_buffer.shape[0] > memory_size:
        updated_buffer = updated_buffer[-memory_size:]

    return updated_buffer

class HierarchicalMemory:
    def __init__(self, memory_limit=5000, pyramid_factor=2, centroids_per_concept=10):
        self.memory_limit = memory_limit
        self.pyramid_factor = pyramid_factor
        self.centroids_per_concept = centroids_per_concept
        self.memory = {}  # level: [concept1, concept2, ...]

    def add_concept(self, data, level=1):
        if level not in self.memory:
            self.memory[level] = []
        self.memory[level].append(np.array(data))
        self._summarize_memory()

    def _pyramidal_allocation(self):
        levels = sorted(self.memory.keys())
        weights = np.array([1 / (self.pyramid_factor ** (lvl - 1)) for lvl in levels])
        total_weight = weights.sum()
        allocations = (weights / total_weight) * self.memory_limit
        return {lvl: int(alloc) for lvl, alloc in zip(levels, allocations)}

    def _summarize_concept(self, concept, n_samples):
        if len(concept) <= n_samples:
            return concept
        kmeans = KMeans(n_clusters=min(self.centroids_per_concept, len(concept)), random_state=42).fit(concept)
        centroids = kmeans.cluster_centers_
        distances = np.linalg.norm(concept[:, None] - centroids, axis=2)
        closest_indices = np.argmin(distances, axis=0)
        summarized = concept[closest_indices]
        return summarized

    def _summarize_memory(self):
        allocations = self._pyramidal_allocation()
        for level, concepts in self.memory.items():
            summarized_level = []
            alloc_per_concept = max(1, allocations[level] // len(concepts))
            for concept in concepts:
                summarized = self._summarize_concept(concept, alloc_per_concept)
                summarized_level.append(summarized)
            self.memory[level] = summarized_level

    def get_all_memory(self):
        all_data = []
        for level_concepts in self.memory.values():
            for concept in level_concepts:
                all_data.append(concept)
        return np.vstack(all_data) if all_data else np.empty((0,))

def scenario_design(normal_data, anomaly_data, c):
    """
    Implements Algorithm 1 to create a lifelong learning scenario.
    
    Args:
        normal_data (numpy array): The normal data points.
        anomaly_data (numpy array): The anomaly data points.
        c (int): Number of desired concepts.
    
    Returns:
        list of tuples: List of (normal_concept, anomaly_concept) pairs forming the scenario.
    """
    normal_concepts = create_phi(normal_data, c)
    anomaly_concepts = create_gamma(anomaly_data, c)
    
    scenario = match_lambda(anomaly_concepts, normal_concepts)
    
    return scenario

def evaluation_protocol(T, E, Y, model, strategy="naive", replay_buffer_size=5000, memory_size=5000, alpha=0.05):
    """
    Implements Algorithm 2: Lifelong Learning Evaluation Protocol with multiple strategies.
    
    Args:
        T (list): Sequence of N training sets.
        E (list): Sequence of N testing sets.
        Y (list): Sequence of true labels for test sets.
        model (sklearn.base.BaseEstimator): A scikit-learn-like model instance that supports `fit` and `decision_function`.
        strategy (str): Strategy for training.
        replay_buffer_size (int): Maximum size of replay buffer if applicable
        memory_size (int): Maximum memory size if applicable
        alpha (float): KS-test threshold for drift detection.

    Returns:
        numpy array: NxN results matrix R where R[i, j] is ROC-AUC of model on E[j] after learning T[i].
    """
    N = len(T)
    R = np.zeros((N, N))  

    if strategy in ["cumulative"]:
        cumulative_data = []
    
    if strategy in ["replay"]:
        replay_buffer = []

    if strategy == "SSF":
        memory_buffer = None 

    if strategy == "hierarchical":
        h_memory = HierarchicalMemory(memory_limit=memory_size, pyramid_factor=2, centroids_per_concept=10)

    
    inference_times = []
    for i, Ti in tqdm(enumerate(T), desc=f"Evaluating using {strategy} strategy"):
        current_model = deepcopy(model)

        # -- NAIVE --
        if strategy == "naive":
            current_model.fit(Ti)

        # -- CUMULATIVE --
        elif strategy == "cumulative":
            cumulative_data.extend(Ti.tolist())
            current_model.fit(np.array(cumulative_data)) 

        # -- REPLAY -- 
        elif strategy == "replay":
            if replay_buffer:
                combined_data = np.vstack((np.array(replay_buffer), Ti))
            else:
                combined_data = Ti

            current_model.fit(combined_data)
            replay_buffer.extend(Ti.tolist())

            if len(replay_buffer) > replay_buffer_size:
                replay_buffer = replay_buffer[-replay_buffer_size:]
        
        # -- SSF -- 
        elif strategy == "SSF":
            if memory_buffer is None:
                memory_buffer = Ti[:memory_size]  
            else:
                drift_detected = kolmogorov_smirnov_test(memory_buffer, Ti, alpha)
                if drift_detected:
                    X_new_selected = strategic_sample_selection(memory_buffer, Ti, top_k=1000)
                    memory_buffer = update_memory_buffer(memory_buffer, X_new_selected, memory_size=memory_size)
            memory_buffer = np.unique(memory_buffer, axis=0)
            current_model.fit(memory_buffer)

        # -- HIERARCHICAL --
        elif strategy == "hierarchical":

            memory_data = h_memory.get_all_memory()
            if memory_data.size == 0:
                drift_level = 1
            else:
                drift_distances = [
                    wasserstein_distance(Ti[:, d], memory_data[:, d])
                    for d in range(Ti.shape[1])
                ]
                drift_score = np.mean(drift_distances)
                print(f"drift: {drift_score}")
                
                if drift_score < 0.05:
                    drift_level = 1
                elif drift_score < 0.1:
                    drift_level = 2
                elif drift_score < 0.2:
                    drift_level = 3
                else:
                    drift_level = 4
        
            h_memory.add_concept(Ti, level=drift_level)
            summarized_memory = h_memory.get_all_memory()
            current_model.fit(summarized_memory)

        # -- Evaluation --
        for j, ((Ej_normal, Ej_anomaly), (y_normal, y_anomaly)) in enumerate(zip(E, Y)):
            test_data = np.vstack((Ej_normal, Ej_anomaly))
            test_labels = np.hstack((y_normal, y_anomaly))  

            start_time = time.time()
            scores = -current_model.decision_function(test_data)  
            elapsed = time.time() - start_time
            inference_times.extend([elapsed / len(test_data)] * len(test_data))
            
            R[i, j] = average_precision_score(test_labels, scores)

    avg_inference_time = np.mean(inference_times)
    print(f"Average Inference Time per Evaluation: {avg_inference_time:.10f} seconds")

    return R


# Experiments

In [34]:
num_concepts = 5

X_normal = X[y == 0]  
X_anomaly = X[y == 1]

normal_concepts = create_phi(X_normal, num_concepts)
anomaly_concepts = create_gamma(X_anomaly, num_concepts)

concept_pairs = match_lambda(anomaly_concepts, normal_concepts)

T = []  
E = [] 
Y = []

for normal, anomaly in concept_pairs:

    normal_train, normal_test = train_test_split(normal, test_size=0.3, random_state=42)
    anomaly_train, anomaly_test = train_test_split(anomaly, test_size=0.3, random_state=42)  

    T.append(normal_train)
    E.append((normal_test, anomaly_test))

    y_normal_test = np.zeros(len(normal_test))
    y_anomaly_test = np.ones(len(anomaly_test))
    
    Y.append((y_normal_test, y_anomaly_test))

Finished creating normal concepts
Finished creating anomaly concepts
Finished matching concept pairs


# Eval

## LOF

In [19]:
R_hm = evaluation_protocol(T, E, Y, LocalOutlierFactor(n_neighbors=20, novelty=True), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 1it [00:00,  2.57it/s]

drift: 0.15575845828560167


Evaluating using hierarchical strategy: 2it [00:01,  1.17it/s]

drift: 0.10687482627406424


Evaluating using hierarchical strategy: 3it [00:02,  1.42it/s]

drift: 0.08550491766410032


Evaluating using hierarchical strategy: 4it [00:02,  1.31it/s]

drift: 0.14433198880791898


Evaluating using hierarchical strategy: 5it [00:03,  1.43it/s]

Lifelong ROC-AUC: 0.8010632635023135, BWT: -0.03189907514987032, FWT: 0.61397202689583





In [26]:
R_ssf = evaluation_protocol(T, E, Y, LocalOutlierFactor(n_neighbors=20, novelty=True), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")


[Aluating using SSF strategy: 0it [00:00, ?it/s]
Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 273.05it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 256.08it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 276.87it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 326.53it/s]

Evaluating using SSF strategy: 5it [00:05,  1.14s/it]

Lifelong ROC-AUC: 0.7544561432893502, BWT: -0.0039135441339650965, FWT: 0.45301001764958054





In [12]:
R_naive = evaluation_protocol(T, E, Y, LocalOutlierFactor(n_neighbors=20, novelty=True), strategy="naive")
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 5it [00:16,  3.27s/it]

Lifelong PR-AUC: 0.6259146979625797, BWT: -0.2298943824165936, FWT: 0.42150088732223623





In [7]:
R_cumulative = evaluation_protocol(T, E, Y, LocalOutlierFactor(n_neighbors=20, novelty=True), strategy="cumulative")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_cumulative)}, BWT: {BWT(R_cumulative)}, FWT: {FWT(R_cumulative)}")

Evaluating using cumulative strategy: 5it [02:10, 26.08s/it]

Lifelong ROC-AUC: 0.9238354062988797, BWT: -0.004499087364359666, FWT: 0.17379610432039533





In [19]:
R_replay = evaluation_protocol(T, E, Y, LocalOutlierFactor(n_neighbors=20, novelty=True), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 5it [00:19,  3.87s/it]

Average Inference Time per Evaluation: 0.0000153946 seconds
Lifelong PR-AUC: 0.7076565525940482, BWT: -0.1496000260952163, FWT: 0.3201269420302635





## IF

In [20]:
R_hm = evaluation_protocol(T, E, Y, IsolationForest(n_estimators=100), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 1it [00:00,  1.35it/s]

drift: 0.15575845828560167


Evaluating using hierarchical strategy: 2it [00:02,  1.24s/it]

drift: 0.10687482627406424


Evaluating using hierarchical strategy: 3it [00:03,  1.11s/it]

drift: 0.08550491766410032


Evaluating using hierarchical strategy: 4it [00:04,  1.26s/it]

drift: 0.14433198880791898


Evaluating using hierarchical strategy: 5it [00:05,  1.16s/it]

Lifelong ROC-AUC: 0.6850161874595665, BWT: -0.034863433306860495, FWT: 0.6161049330175039





In [27]:
R_ssf = evaluation_protocol(T, E, Y,  IsolationForest(n_estimators=100), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")


[Aluating using SSF strategy: 0it [00:00, ?it/s]
Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 281.14it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 253.28it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 237.31it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 244.61it/s]

Evaluating using SSF strategy: 5it [00:05,  1.06s/it]

Lifelong ROC-AUC: 0.6559285630296237, BWT: 0.06868127809581366, FWT: 0.804018261100736





In [14]:
R_naive = evaluation_protocol(T, E, Y, IsolationForest(n_estimators=100), strategy="naive")
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 5it [00:03,  1.38it/s]

Lifelong PR-AUC: 0.6032600619774786, BWT: -0.1718058652463209, FWT: 0.6548393577005718





In [9]:
R_cumulative = evaluation_protocol(T, E, Y, IsolationForest(n_estimators=100), strategy="cumulative")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_cumulative)}, BWT: {BWT(R_cumulative)}, FWT: {FWT(R_cumulative)}")

Evaluating using cumulative strategy: 5it [00:07,  1.55s/it]

Lifelong ROC-AUC: 0.753897896406868, BWT: -0.0066406078135951676, FWT: 0.7719174809547474





In [20]:
R_replay = evaluation_protocol(T, E, Y, IsolationForest(n_estimators=100), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 5it [00:04,  1.10it/s]

Average Inference Time per Evaluation: 0.0000027950 seconds
Lifelong PR-AUC: 0.623437461532319, BWT: -0.15620535333182275, FWT: 0.6756342504106363





## SGDOCSVM

In [21]:
R_hm = evaluation_protocol(T, E, Y, SGDOneClassSVM(), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 1it [00:00,  3.21it/s]

drift: 0.15575845828560167


Evaluating using hierarchical strategy: 3it [00:01,  1.91it/s]

drift: 0.10687482627406424
drift: 0.08550491766410032


Evaluating using hierarchical strategy: 5it [00:02,  1.83it/s]

drift: 0.14433198880791898
Lifelong ROC-AUC: 0.6133161578664278, BWT: -0.1228301210171924, FWT: 0.5027924783558533





In [28]:
R_ssf = evaluation_protocol(T, E, Y,  SGDOneClassSVM(), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")


[Aluating using SSF strategy: 0it [00:00, ?it/s]
Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 288.13it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 307.55it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 306.85it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 271.76it/s]

Evaluating using SSF strategy: 5it [00:02,  2.14it/s]

Lifelong ROC-AUC: 0.790790866239933, BWT: -0.004091332504534584, FWT: 0.5467376505389003





In [16]:
R_naive = evaluation_protocol(T, E, Y, SGDOneClassSVM(), strategy="naive")
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 5it [00:00,  8.77it/s]

Lifelong PR-AUC: 0.6338207341822814, BWT: -0.15638596219766385, FWT: 0.5669742953319241





In [17]:
R_cumulative = evaluation_protocol(T, E, Y, SGDOneClassSVM(), strategy="cumulative")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_cumulative)}, BWT: {BWT(R_cumulative)}, FWT: {FWT(R_cumulative)}")

Evaluating using cumulative strategy: 5it [00:04,  1.03it/s]


Lifelong ROC-AUC: 0.597799790906325, BWT: -0.215598947769658, FWT: 0.6579349113663381


In [21]:
R_replay = evaluation_protocol(T, E, Y, SGDOneClassSVM(), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 5it [00:01,  2.77it/s]

Average Inference Time per Evaluation: 0.0000001217 seconds
Lifelong PR-AUC: 0.6296275394432779, BWT: -0.15245286514912806, FWT: 0.5503971510496488





# SLAD

In [22]:
from deepod.models.tabular import SLAD

In [8]:
R_hm = evaluation_protocol(T, E, Y, SLAD(), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  9 10 11 12 13 14 16 17 18 19 20 22 23 24 25 26 27
 28 30 31 32 33 34 35 36 37 38 40 41 42 44 45 46 47 48 49 50 51 53 54 55
 56 57]
epoch  1, training loss: 0.789064, time: 136.0s
epoch 10, training loss: 0.745528, time: 0.0s
epoch 20, training loss: 0.716392, time: 0.0s
epoch 30, training loss: 0.690674, time: 0.0s
epoch 40, training loss: 0.671540, time: 0.0s
epoch 50, training loss: 0.664055, time: 0.0s
epoch 60, training loss: 0.661995, time: 0.0s
epoch 70, training loss: 0.654417, time: 0.0s
epoch 80, training loss: 0.654082, time: 0.0s
epoch 90, training loss: 0.652842, time: 0.0s
epoch100, training loss: 0.648777, time: 0.0s
Start Inference on the training data...


Evaluating using hierarchical strategy: 1it [05:54, 354.02s/it]

drift: 0.15575845828560167
Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  4  5  6  9 10 11 12 13 15 16 18 19 20 21 22 23 24 25 26 27 28
 29 30 31 32 33 34 37 38 39 40 41 42 43 45 46 48 49 50 51 52 53 54 55 56
 57 58]
epoch  1, training loss: 0.821572, time: 0.0s
epoch 10, training loss: 0.729630, time: 0.0s
epoch 20, training loss: 0.705427, time: 0.0s
epoch 30, training loss: 0.697949, time: 0.0s
epoch 40, training loss: 0.695073, time: 0.0s
epoch 50, training loss: 0.693725, time: 0.0s
epoch 60, training loss: 0.693104, time: 0.0s
epoch 70, training loss: 0.692327, time: 0.0s
epoch 80, training loss: 0.692073, time: 0.0s
epoch 90, training loss: 0.692050, time: 0.0s
epoch100, training loss: 0.691381, time: 0.0s
Start Inference on the training data...


Evaluating using hierarchical strategy: 2it [07:04, 187.13s/it]

drift: 0.10687482627406424
Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  4  5  6 10 11 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 29
 30 31 32 33 35 36 37 38 40 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
 57 58]
epoch  1, training loss: 0.763545, time: 0.0s
epoch 10, training loss: 0.663196, time: 0.0s
epoch 20, training loss: 0.637593, time: 0.0s
epoch 30, training loss: 0.629254, time: 0.0s
epoch 40, training loss: 0.626131, time: 0.0s
epoch 50, training loss: 0.623964, time: 0.0s
epoch 60, training loss: 0.622913, time: 0.0s
epoch 70, training loss: 0.622253, time: 0.0s
epoch 80, training loss: 0.621161, time: 0.0s
epoch 90, training loss: 0.621062, time: 0.0s
epoch100, training loss: 0.621025, time: 0.0s
Start Inference on the training data...


Evaluating using hierarchical strategy: 3it [08:17, 134.97s/it]

drift: 0.08550491766410032
Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  4  5  7  8  9 10 12 13 14 15 16 17 18 19 21 22 23 24 25 26 27
 29 31 32 33 34 35 36 37 38 39 40 41 42 43 45 46 47 48 49 50 51 52 53 54
 56 58]
epoch  1, training loss: 0.792425, time: 0.0s
epoch 10, training loss: 0.687732, time: 0.1s
epoch 20, training loss: 0.678437, time: 0.0s
epoch 30, training loss: 0.675762, time: 0.0s
epoch 40, training loss: 0.674444, time: 0.0s
epoch 50, training loss: 0.673544, time: 0.0s
epoch 60, training loss: 0.674035, time: 0.0s
epoch 70, training loss: 0.673031, time: 0.0s
epoch 80, training loss: 0.673678, time: 0.0s
epoch 90, training loss: 0.673214, time: 0.0s
epoch100, training loss: 0.672836, time: 0.0s
Start Inference on the training data...


Evaluating using hierarchical strategy: 4it [09:28, 109.66s/it]

drift: 0.14433198880791898
Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  5  7  8  9 10 12 13 14 15 16 18 19 20 21 22 23 24 25 27 28 29
 30 31 33 34 35 36 37 38 39 40 41 42 43 45 46 47 48 49 50 51 52 53 54 55
 57 58]
epoch  1, training loss: 0.771024, time: 0.0s
epoch 10, training loss: 0.658865, time: 0.0s
epoch 20, training loss: 0.647916, time: 0.0s
epoch 30, training loss: 0.643690, time: 0.0s
epoch 40, training loss: 0.642270, time: 0.0s
epoch 50, training loss: 0.641485, time: 0.0s
epoch 60, training loss: 0.641479, time: 0.0s
epoch 70, training loss: 0.641318, time: 0.0s
epoch 80, training loss: 0.640435, time: 0.0s
epoch 90, training loss: 0.639958, time: 0.0s
epoch100, training loss: 0.640519, time: 0.0s
Start Inference on the training data...


Evaluating using hierarchical strategy: 5it [10:37, 127.52s/it]

Lifelong ROC-AUC: 0.16119022197491426, BWT: 0.055047207586719736, FWT: 0.22260804538321585





In [10]:
R_ssf = evaluation_protocol(T, E, Y, SLAD(), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")

Evaluating using SSF strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  9 10 11 12 13 14 16 17 18 19 20 22 23 24 25 26 27
 28 30 31 32 33 34 35 36 37 38 40 41 42 44 45 46 47 48 49 50 51 53 54 55
 56 57]
epoch  1, training loss: 0.689123, time: 0.9s
epoch 10, training loss: 0.610598, time: 0.9s
epoch 20, training loss: 0.610824, time: 0.9s
epoch 30, training loss: 0.610099, time: 0.9s
epoch 40, training loss: 0.609887, time: 0.9s
epoch 50, training loss: 0.609973, time: 0.9s
epoch 60, training loss: 0.609928, time: 0.2s
epoch 70, training loss: 0.610051, time: 0.2s
epoch 80, training loss: 0.610074, time: 0.2s
epoch 90, training loss: 0.610767, time: 0.2s
epoch100, training loss: 0.609855, time: 0.2s
Start Inference on the training data...


Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 242.42it/s]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 2  3  4  5  6  8  9 10 11 12 13 14 16 17 18 19 20 21 22 23 24 26 27 28
 30 31 32 33 34 35 36 37 38 39 40 42 43 44 46 47 48 49 50 51 52 53 54 55
 56 57]
epoch  1, training loss: 0.683285, time: 0.9s
epoch 10, training loss: 0.606829, time: 0.9s
epoch 20, training loss: 0.606917, time: 0.2s
epoch 30, training loss: 0.606242, time: 0.2s
epoch 40, training loss: 0.605342, time: 0.2s
epoch 50, training loss: 0.606010, time: 0.2s
epoch 60, training loss: 0.605944, time: 0.2s
epoch 70, training loss: 0.605754, time: 0.2s
epoch 80, training loss: 0.605357, time: 0.2s
epoch 90, training loss: 0.605265, time: 0.3s
epoch100, training loss: 0.606159, time: 0.2s
Start Inference on the training data...


Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 249.32it/s]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  4  5  7  9 10 11 12 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
 29 30 31 32 33 34 35 36 37 38 40 42 43 44 45 46 47 48 49 50 51 53 54 55
 56 58]
epoch  1, training loss: 0.693160, time: 0.9s
epoch 10, training loss: 0.618555, time: 0.9s
epoch 20, training loss: 0.617932, time: 0.9s
epoch 30, training loss: 0.618390, time: 0.2s
epoch 40, training loss: 0.618399, time: 0.2s
epoch 50, training loss: 0.617923, time: 0.2s
epoch 60, training loss: 0.618249, time: 0.2s
epoch 70, training loss: 0.617467, time: 0.2s
epoch 80, training loss: 0.617355, time: 0.2s
epoch 90, training loss: 0.616863, time: 0.2s
epoch100, training loss: 0.617548, time: 0.2s
Start Inference on the training data...


Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 302.18it/s]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 12 13 14 15 16 17 18 19 21 22 23 25 26 27
 28 29 30 31 32 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 50 51 53 55
 56 58]
epoch  1, training loss: 0.713907, time: 0.9s
epoch 10, training loss: 0.641691, time: 0.3s
epoch 20, training loss: 0.639720, time: 0.2s
epoch 30, training loss: 0.639970, time: 0.9s
epoch 40, training loss: 0.639926, time: 0.9s
epoch 50, training loss: 0.640661, time: 0.9s
epoch 60, training loss: 0.640567, time: 0.2s
epoch 70, training loss: 0.640408, time: 0.2s
epoch 80, training loss: 0.640357, time: 0.9s
epoch 90, training loss: 0.639724, time: 0.2s
epoch100, training loss: 0.640057, time: 0.2s
Start Inference on the training data...


Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 288.45it/s]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  3  4  5  6  7  8  9 10 11 12 13 14 15 16 18 19 20 21 24 25 28 29 30
 31 32 33 34 35 36 37 38 39 40 41 43 44 45 46 47 48 49 50 51 52 53 54 55
 57 58]
epoch  1, training loss: 0.682585, time: 0.9s
epoch 10, training loss: 0.606542, time: 0.9s
epoch 20, training loss: 0.606012, time: 0.9s
epoch 30, training loss: 0.606196, time: 0.2s
epoch 40, training loss: 0.605747, time: 0.2s
epoch 50, training loss: 0.605817, time: 0.2s
epoch 60, training loss: 0.605292, time: 0.2s
epoch 70, training loss: 0.605695, time: 0.2s
epoch 80, training loss: 0.606173, time: 0.2s
epoch 90, training loss: 0.605267, time: 0.2s
epoch100, training loss: 0.605704, time: 0.2s
Start Inference on the training data...


Evaluating using SSF strategy: 5it [23:46, 285.36s/it]

Lifelong ROC-AUC: 0.15203513147863595, BWT: 0.00557263662112336, FWT: 0.3739874334612322





In [19]:
R_naive = evaluation_protocol(T, E, Y, SLAD(epochs=10), strategy="naive")
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  9 10 11 12 13 14 16 17 18 19 20 22 23 24 25 26 27
 28 30 31 32 33 34 35 36 37 38 40 41 42 44 45 46 47 48 49 50 51 53 54 55
 56 57]
epoch  1, training loss: 0.651251, time: 131.3s
epoch 10, training loss: 0.623366, time: 0.7s
Start Inference on the training data...


Evaluating using naive strategy: 1it [05:44, 344.49s/it]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  4  5  6  9 10 11 12 13 15 16 18 19 20 21 22 23 24 25 26 27 28
 29 30 31 32 33 34 37 38 39 40 41 42 43 45 46 48 49 50 51 52 53 54 55 56
 57 58]
epoch  1, training loss: 0.655849, time: 3.3s
epoch 10, training loss: 0.648188, time: 3.3s
Start Inference on the training data...


Evaluating using naive strategy: 2it [07:49, 215.65s/it]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  4  5  6 10 11 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 29
 30 31 32 33 35 36 37 38 40 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
 57 58]
epoch  1, training loss: 0.638845, time: 0.9s
epoch 10, training loss: 0.619092, time: 1.0s
Start Inference on the training data...


Evaluating using naive strategy: 3it [09:06, 152.28s/it]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  4  5  7  8  9 10 12 13 14 15 16 17 18 19 21 22 23 24 25 26 27
 29 31 32 33 34 35 36 37 38 39 40 41 42 43 45 46 47 48 49 50 51 52 53 54
 56 58]
epoch  1, training loss: 0.650727, time: 2.5s
epoch 10, training loss: 0.641749, time: 2.2s
Start Inference on the training data...


Evaluating using naive strategy: 4it [10:49, 132.67s/it]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  5  7  8  9 10 12 13 14 15 16 18 19 20 21 22 23 24 25 27 28 29
 30 31 33 34 35 36 37 38 39 40 41 42 43 45 46 47 48 49 50 51 52 53 54 55
 57 58]
epoch  1, training loss: 0.666014, time: 1.3s
epoch 10, training loss: 0.650179, time: 1.5s
Start Inference on the training data...


Evaluating using naive strategy: 5it [12:14, 146.90s/it]

Lifelong PR-AUC: 0.4402965545107119, BWT: 0.17382286318518067, FWT: 0.510222573327393





In [23]:
R_replay = evaluation_protocol(T, E, Y, SLAD(epochs=10), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  9 10 11 12 13 14 16 17 18 19 20 22 23 24 25 26 27
 28 30 31 32 33 34 35 36 37 38 40 41 42 44 45 46 47 48 49 50 51 53 54 55
 56 57]
epoch  1, training loss: 0.651251, time: 0.9s
epoch 10, training loss: 0.623366, time: 0.7s
Start Inference on the training data...


Evaluating using replay strategy: 1it [01:27, 87.06s/it]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  4  5  6  9 10 11 12 13 15 16 18 19 20 21 22 23 24 25 26 27 28
 29 30 31 32 33 34 37 38 39 40 41 42 43 45 46 48 49 50 51 52 53 54 55 56
 57 58]
epoch  1, training loss: 0.632842, time: 3.4s
epoch 10, training loss: 0.624401, time: 3.4s
Start Inference on the training data...


Evaluating using replay strategy: 2it [03:49, 119.83s/it]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  4  5  6 10 11 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 29
 30 31 32 33 35 36 37 38 40 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
 57 58]
epoch  1, training loss: 0.707000, time: 1.0s
epoch 10, training loss: 0.688656, time: 1.0s
Start Inference on the training data...


Evaluating using replay strategy: 3it [05:19, 106.16s/it]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  4  5  7  8  9 10 12 13 14 15 16 17 18 19 21 22 23 24 25 26 27
 29 31 32 33 34 35 36 37 38 39 40 41 42 43 45 46 47 48 49 50 51 52 53 54
 56 58]
epoch  1, training loss: 0.684525, time: 2.3s
epoch 10, training loss: 0.676974, time: 2.3s
Start Inference on the training data...


Evaluating using replay strategy: 4it [07:13, 109.03s/it]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 50, ensemble size: 20
len pool: [ 1  2  3  5  7  8  9 10 12 13 14 15 16 18 19 20 21 22 23 24 25 27 28 29
 30 31 33 34 35 36 37 38 39 40 41 42 43 45 46 47 48 49 50 51 52 53 54 55
 57 58]
epoch  1, training loss: 0.662475, time: 1.4s
epoch 10, training loss: 0.648362, time: 1.4s
Start Inference on the training data...


Evaluating using replay strategy: 5it [08:50, 106.16s/it]

Average Inference Time per Evaluation: 0.0003795277 seconds
Lifelong PR-AUC: 0.38988992723596877, BWT: 0.1048683185396457, FWT: 0.4867854268343099





# ICL

In [24]:
from deepod.models.tabular import ICL

In [10]:
R_hm = evaluation_protocol(T, E, Y, ICL(epochs=100), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=48, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing: 100%|██████████| 1/1 [00:00<00:00, 499.92it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 554.29it/s]

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  12%|█▏        | 64/520 [00:00<00:00, 639.41it/s][A
testing:  25%|██▌       | 130/520 [00:00<00:00, 648.97it/s][A
testing:  38%|███▊      | 196/520 [00:00<00:00, 653.34it/s][A
testing:  50%|█████     | 262/520 [00:00<00:00, 652.42it/s][A
testing:  63%|██████▎   | 328/520 [00:00<00:00, 651.79it/s][A
testing:  76%|███████▌  | 394/520 [00:00<00:00, 652.80it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 649.78it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  13%|█▎        | 66/520 [00:00<00:00, 658.88it/s][A
testing:  25%|██▌       | 132/520 [00:00<00:00, 658.20it/s][A
testing:  38%|███▊      | 198/520 [00:00<00:00, 658.28it/s][A
testing:  51%|█████     | 264/520 [00:00<00:00, 657.70it/s][A
testing:  63%|██████▎   | 330/520 [00:00<00:00, 656.30it/s][A
testing:  76%|███████▌  | 

drift: 0.15575845828560167
Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=48, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features


testing: 100%|██████████| 1/1 [00:00<00:00, 522.33it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 549.21it/s]

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  12%|█▎        | 65/520 [00:00<00:00, 649.64it/s][A
testing:  25%|██▌       | 130/520 [00:00<00:00, 649.81it/s][A
testing:  38%|███▊      | 196/520 [00:00<00:00, 652.06it/s][A
testing:  50%|█████     | 262/520 [00:00<00:00, 652.39it/s][A
testing:  63%|██████▎   | 328/520 [00:00<00:00, 652.44it/s][A
testing:  76%|███████▌  | 394/520 [00:00<00:00, 653.37it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 652.87it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  13%|█▎        | 66/520 [00:00<00:00, 659.43it/s][A
testing:  25%|██▌       | 132/520 [00:00<00:00, 657.99it/s][A
testing:  38%|███▊      | 198/520 [00:00<00:00, 657.72it/s][A
testing:  51%|█████     | 264/520 [00:00<00:00, 657.34it/s][A
testing:  63%|██████▎   | 330/520 [00:00<00:00, 655.98it/s][A
testing:  76%|███████▌  | 

drift: 0.10687482627406424
Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=48, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features


testing: 100%|██████████| 1/1 [00:00<00:00, 511.44it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 533.97it/s]

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  13%|█▎        | 66/520 [00:00<00:00, 652.66it/s][A
testing:  25%|██▌       | 132/520 [00:00<00:00, 652.29it/s][A
testing:  38%|███▊      | 198/520 [00:00<00:00, 652.62it/s][A
testing:  51%|█████     | 264/520 [00:00<00:00, 652.54it/s][A
testing:  63%|██████▎   | 330/520 [00:00<00:00, 653.09it/s][A
testing:  76%|███████▌  | 396/520 [00:00<00:00, 653.00it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 652.69it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  13%|█▎        | 66/520 [00:00<00:00, 656.29it/s][A
testing:  25%|██▌       | 132/520 [00:00<00:00, 655.36it/s][A
testing:  38%|███▊      | 198/520 [00:00<00:00, 654.74it/s][A
testing:  51%|█████     | 264/520 [00:00<00:00, 654.78it/s][A
testing:  63%|██████▎   | 330/520 [00:00<00:00, 654.88it/s][A
testing:  76%|███████▌  | 

drift: 0.08550491766410032
Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=48, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features


testing: 100%|██████████| 1/1 [00:00<00:00, 502.43it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 538.35it/s]

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  13%|█▎        | 66/520 [00:00<00:00, 652.79it/s][A
testing:  25%|██▌       | 132/520 [00:00<00:00, 653.83it/s][A
testing:  38%|███▊      | 198/520 [00:00<00:00, 654.69it/s][A
testing:  51%|█████     | 264/520 [00:00<00:00, 654.58it/s][A
testing:  63%|██████▎   | 330/520 [00:00<00:00, 654.48it/s][A
testing:  76%|███████▌  | 396/520 [00:00<00:00, 654.65it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 654.76it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  13%|█▎        | 66/520 [00:00<00:00, 656.54it/s][A
testing:  25%|██▌       | 132/520 [00:00<00:00, 655.23it/s][A
testing:  38%|███▊      | 198/520 [00:00<00:00, 654.84it/s][A
testing:  51%|█████     | 264/520 [00:00<00:00, 655.20it/s][A
testing:  63%|██████▎   | 330/520 [00:00<00:00, 653.65it/s][A
testing:  76%|███████▌  | 

drift: 0.14433198880791898
Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=48, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features


testing: 100%|██████████| 1/1 [00:00<00:00, 537.46it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 548.71it/s]

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  13%|█▎        | 66/520 [00:00<00:00, 656.94it/s][A
testing:  25%|██▌       | 132/520 [00:00<00:00, 655.97it/s][A
testing:  38%|███▊      | 198/520 [00:00<00:00, 656.42it/s][A
testing:  51%|█████     | 264/520 [00:00<00:00, 656.11it/s][A
testing:  63%|██████▎   | 330/520 [00:00<00:00, 655.61it/s][A
testing:  76%|███████▌  | 396/520 [00:00<00:00, 655.49it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 655.77it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  13%|█▎        | 66/520 [00:00<00:00, 657.39it/s][A
testing:  25%|██▌       | 132/520 [00:00<00:00, 656.05it/s][A
testing:  38%|███▊      | 198/520 [00:00<00:00, 657.47it/s][A
testing:  51%|█████     | 264/520 [00:00<00:00, 657.34it/s][A
testing:  63%|██████▎   | 330/520 [00:00<00:00, 657.43it/s][A
testing:  76%|███████▌  | 

Lifelong ROC-AUC: 0.16753311302147786, BWT: 0.053168243625636824, FWT: 0.32290299924103544





In [15]:
R_ssf = evaluation_protocol(T, E, Y, ICL(epochs=100), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")

Evaluating using SSF strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=48, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing:   0%|          | 0/79 [00:00<?, ?it/s][A
testing: 100%|██████████| 79/79 [00:00<00:00, 673.86it/s][A

testing:   0%|          | 0/79 [00:00<?, ?it/s][A
testing: 100%|██████████| 79/79 [00:00<00:00, 678.66it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  13%|█▎        | 68/520 [00:00<00:00, 678.98it/s][A
testing:  26%|██▌       | 136/520 [00:00<00:00, 675.17it/s][A
testing:  39%|███▉      | 204/520 [00:00<00:00, 674.87it/s][A
testing:  52%|█████▏    | 272/520 [00:00<00:00, 673.63it/s][A
testing:  65%|██████▌   | 340/520 [00:00<00:00, 674.64it/s][A
testing:  78%|███████▊  | 408/520 [00:00<00:00, 674.27it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 675.17it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  13%|█▎        | 68/520 [00:00<00:00, 677.68it/s][A
testing:  26%|██▌       | 136/520 [00:00<00:00, 672.38it/s][A
testing:  39%|███▉      | 204/520 [00:00<00:00, 674.05it/s][A
testing:  52%|█████▏    | 272/520 [00:00

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=48, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing:   0%|          | 0/79 [00:00<?, ?it/s][A
testing: 100%|██████████| 79/79 [00:00<00:00, 677.20it/s][A

testing:   0%|          | 0/79 [00:00<?, ?it/s][A
testing: 100%|██████████| 79/79 [00:00<00:00, 680.49it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  13%|█▎        | 69/520 [00:00<00:00, 682.00it/s][A
testing:  27%|██▋       | 138/520 [00:00<00:00, 680.62it/s][A
testing:  40%|███▉      | 207/520 [00:00<00:00, 680.62it/s][A
testing:  53%|█████▎    | 276/520 [00:00<00:00, 680.22it/s][A
testing:  66%|██████▋   | 345/520 [00:00<00:00, 680.29it/s][A
testing:  80%|███████▉  | 414/520 [00:00<00:00, 677.97it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 678.82it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  13%|█▎        | 69/520 [00:00<00:00, 681.54it/s][A
testing:  27%|██▋       | 138/520 [00:00<00:00, 679.97it/s][A
testing:  40%|███▉      | 206/520 [00:00<00:00, 678.52it/s][A
testing:  53%|█████▎    | 274/520 [00:00

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=48, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing:   0%|          | 0/79 [00:00<?, ?it/s][A
testing: 100%|██████████| 79/79 [00:00<00:00, 681.60it/s][A

testing:   0%|          | 0/79 [00:00<?, ?it/s][A
testing: 100%|██████████| 79/79 [00:00<00:00, 682.66it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  13%|█▎        | 69/520 [00:00<00:00, 682.75it/s][A
testing:  27%|██▋       | 138/520 [00:00<00:00, 681.19it/s][A
testing:  40%|███▉      | 207/520 [00:00<00:00, 681.07it/s][A
testing:  53%|█████▎    | 276/520 [00:00<00:00, 680.71it/s][A
testing:  66%|██████▋   | 345/520 [00:00<00:00, 679.29it/s][A
testing:  80%|███████▉  | 414/520 [00:00<00:00, 679.68it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 679.57it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  13%|█▎        | 69/520 [00:00<00:00, 685.67it/s][A
testing:  27%|██▋       | 138/520 [00:00<00:00, 683.08it/s][A
testing:  40%|███▉      | 207/520 [00:00<00:00, 681.63it/s][A
testing:  53%|█████▎    | 276/520 [00:00

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=48, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing:   0%|          | 0/79 [00:00<?, ?it/s][A
testing: 100%|██████████| 79/79 [00:00<00:00, 683.06it/s][A

testing:   0%|          | 0/79 [00:00<?, ?it/s][A
testing: 100%|██████████| 79/79 [00:00<00:00, 679.62it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  13%|█▎        | 69/520 [00:00<00:00, 681.32it/s][A
testing:  27%|██▋       | 138/520 [00:00<00:00, 682.38it/s][A
testing:  40%|███▉      | 207/520 [00:00<00:00, 680.56it/s][A
testing:  53%|█████▎    | 276/520 [00:00<00:00, 679.98it/s][A
testing:  66%|██████▌   | 344/520 [00:00<00:00, 679.42it/s][A
testing:  79%|███████▉  | 412/520 [00:00<00:00, 679.57it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 679.83it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  13%|█▎        | 69/520 [00:00<00:00, 685.24it/s][A
testing:  27%|██▋       | 138/520 [00:00<00:00, 684.38it/s][A
testing:  40%|███▉      | 207/520 [00:00<00:00, 683.48it/s][A
testing:  53%|█████▎    | 276/520 [00:00

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=48, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing:   0%|          | 0/79 [00:00<?, ?it/s][A
testing: 100%|██████████| 79/79 [00:00<00:00, 680.86it/s][A

testing:   0%|          | 0/79 [00:00<?, ?it/s][A
testing: 100%|██████████| 79/79 [00:00<00:00, 680.44it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  13%|█▎        | 69/520 [00:00<00:00, 683.04it/s][A
testing:  27%|██▋       | 138/520 [00:00<00:00, 681.48it/s][A
testing:  40%|███▉      | 207/520 [00:00<00:00, 680.61it/s][A
testing:  53%|█████▎    | 276/520 [00:00<00:00, 680.26it/s][A
testing:  66%|██████▋   | 345/520 [00:00<00:00, 678.40it/s][A
testing:  79%|███████▉  | 413/520 [00:00<00:00, 677.01it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 677.37it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  13%|█▎        | 69/520 [00:00<00:00, 682.75it/s][A
testing:  27%|██▋       | 138/520 [00:00<00:00, 679.19it/s][A
testing:  40%|███▉      | 206/520 [00:00<00:00, 678.66it/s][A
testing:  53%|█████▎    | 274/520 [00:00

Lifelong ROC-AUC: 0.17998268086177635, BWT: 0.022856113012170224, FWT: 0.475937638473041





In [22]:
R_naive = evaluation_protocol(T, E, Y, ICL(epochs=100), strategy="naive")
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=48, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing:   0%|          | 0/312 [00:00<?, ?it/s][A
testing:  20%|██        | 63/312 [00:00<00:00, 626.59it/s][A
testing:  40%|████      | 126/312 [00:00<00:00, 626.61it/s][A
testing:  61%|██████    | 189/312 [00:00<00:00, 623.88it/s][A
testing: 100%|██████████| 312/312 [00:00<00:00, 625.25it/s][A

testing:   0%|          | 0/312 [00:00<?, ?it/s][A
testing:  21%|██        | 64/312 [00:00<00:00, 631.61it/s][A
testing:  41%|████      | 128/312 [00:00<00:00, 629.59it/s][A
testing:  62%|██████▏   | 192/312 [00:00<00:00, 630.23it/s][A
testing: 100%|██████████| 312/312 [00:00<00:00, 630.25it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  12%|█▏        | 63/520 [00:00<00:00, 628.99it/s][A
testing:  24%|██▍       | 126/520 [00:00<00:00, 629.02it/s][A
testing:  37%|███▋      | 190/520 [00:00<00:00, 630.18it/s][A
testing:  49%|████▉     | 254/520 [00:00<00:00, 631.23it/s][A
testing:  61%|██████    | 318/520 [00:00<00:00, 630.77it/s][A
testing:  73%|███████▎  |

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=48, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing:   0%|          | 0/1472 [00:00<?, ?it/s][A
testing:   4%|▍         | 63/1472 [00:00<00:02, 629.68it/s][A
testing:   9%|▊         | 126/1472 [00:00<00:02, 627.09it/s][A
testing:  13%|█▎        | 189/1472 [00:00<00:02, 627.26it/s][A
testing:  17%|█▋        | 253/1472 [00:00<00:01, 628.28it/s][A
testing:  21%|██▏       | 316/1472 [00:00<00:01, 628.00it/s][A
testing:  26%|██▌       | 379/1472 [00:00<00:01, 628.55it/s][A
testing:  30%|███       | 442/1472 [00:00<00:01, 628.59it/s][A
testing:  34%|███▍      | 505/1472 [00:00<00:01, 627.84it/s][A
testing:  39%|███▊      | 569/1472 [00:00<00:01, 628.82it/s][A
testing:  43%|████▎     | 632/1472 [00:01<00:01, 628.69it/s][A
testing:  47%|████▋     | 695/1472 [00:01<00:01, 628.78it/s][A
testing:  51%|█████▏    | 758/1472 [00:01<00:01, 627.97it/s][A
testing:  56%|█████▌    | 822/1472 [00:01<00:01, 628.95it/s][A
testing:  60%|██████    | 885/1472 [00:01<00:00, 629.13it/s][A
testing:  64%|██████▍   | 948/1472 [00:01<00:00, 62

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=48, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing:   0%|          | 0/392 [00:00<?, ?it/s][A
testing:  16%|█▌        | 63/392 [00:00<00:00, 627.29it/s][A
testing:  32%|███▏      | 126/392 [00:00<00:00, 626.16it/s][A
testing:  48%|████▊     | 189/392 [00:00<00:00, 627.89it/s][A
testing:  64%|██████▍   | 252/392 [00:00<00:00, 628.20it/s][A
testing:  80%|████████  | 315/392 [00:00<00:00, 626.42it/s][A
testing: 100%|██████████| 392/392 [00:00<00:00, 624.56it/s][A

testing:   0%|          | 0/392 [00:00<?, ?it/s][A
testing:  16%|█▋        | 64/392 [00:00<00:00, 631.35it/s][A
testing:  33%|███▎      | 128/392 [00:00<00:00, 632.05it/s][A
testing:  49%|████▉     | 192/392 [00:00<00:00, 631.36it/s][A
testing:  65%|██████▌   | 256/392 [00:00<00:00, 630.51it/s][A
testing:  82%|████████▏ | 320/392 [00:00<00:00, 630.93it/s][A
testing: 100%|██████████| 392/392 [00:00<00:00, 630.16it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  12%|█▏        | 63/520 [00:00<00:00, 626.82it/s][A
testing:  24%|██▍       |

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=48, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing:   0%|          | 0/968 [00:00<?, ?it/s][A
testing:   7%|▋         | 64/968 [00:00<00:01, 632.79it/s][A
testing:  13%|█▎        | 128/968 [00:00<00:01, 628.86it/s][A
testing:  20%|█▉        | 191/968 [00:00<00:01, 627.50it/s][A
testing:  26%|██▌       | 254/968 [00:00<00:01, 628.38it/s][A
testing:  33%|███▎      | 317/968 [00:00<00:01, 627.96it/s][A
testing:  39%|███▉      | 380/968 [00:00<00:00, 627.33it/s][A
testing:  46%|████▌     | 443/968 [00:00<00:00, 628.03it/s][A
testing:  52%|█████▏    | 506/968 [00:00<00:00, 628.08it/s][A
testing:  59%|█████▉    | 570/968 [00:00<00:00, 629.18it/s][A
testing:  65%|██████▌   | 633/968 [00:01<00:00, 629.01it/s][A
testing:  72%|███████▏  | 696/968 [00:01<00:00, 628.23it/s][A
testing:  78%|███████▊  | 759/968 [00:01<00:00, 628.34it/s][A
testing:  85%|████████▍ | 822/968 [00:01<00:00, 628.44it/s][A
testing:  91%|█████████▏| 885/968 [00:01<00:00, 628.86it/s][A
testing: 100%|██████████| 968/968 [00:01<00:00, 628.09it/s][A

te

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=48, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing:   0%|          | 0/590 [00:00<?, ?it/s][A
testing:  11%|█         | 64/590 [00:00<00:00, 633.60it/s][A
testing:  22%|██▏       | 128/590 [00:00<00:00, 631.58it/s][A
testing:  33%|███▎      | 192/590 [00:00<00:00, 631.09it/s][A
testing:  43%|████▎     | 256/590 [00:00<00:00, 631.47it/s][A
testing:  54%|█████▍    | 320/590 [00:00<00:00, 630.99it/s][A
testing:  65%|██████▌   | 384/590 [00:00<00:00, 630.10it/s][A
testing:  76%|███████▌  | 448/590 [00:00<00:00, 630.40it/s][A
testing:  87%|████████▋ | 512/590 [00:00<00:00, 631.18it/s][A
testing: 100%|██████████| 590/590 [00:00<00:00, 630.47it/s][A

testing:   0%|          | 0/590 [00:00<?, ?it/s][A
testing:  11%|█         | 64/590 [00:00<00:00, 632.60it/s][A
testing:  22%|██▏       | 128/590 [00:00<00:00, 631.41it/s][A
testing:  33%|███▎      | 192/590 [00:00<00:00, 631.47it/s][A
testing:  43%|████▎     | 256/590 [00:00<00:00, 632.50it/s][A
testing:  54%|█████▍    | 320/590 [00:00<00:00, 632.63it/s][A
testing:  65%|

Lifelong PR-AUC: 0.4759803647973265, BWT: 0.21407887749450372, FWT: 0.679633574763101





In [25]:
R_replay = evaluation_protocol(T, E, Y, ICL(epochs=100), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=48, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing:   0%|          | 0/312 [00:00<?, ?it/s][A
testing:  22%|██▏       | 68/312 [00:00<00:00, 671.19it/s][A
testing:  44%|████▎     | 136/312 [00:00<00:00, 667.22it/s][A
testing:  65%|██████▌   | 203/312 [00:00<00:00, 667.38it/s][A
testing: 100%|██████████| 312/312 [00:00<00:00, 666.51it/s][A

testing:   0%|          | 0/312 [00:00<?, ?it/s][A
testing:  22%|██▏       | 68/312 [00:00<00:00, 671.55it/s][A
testing:  44%|████▎     | 136/312 [00:00<00:00, 670.84it/s][A
testing:  65%|██████▌   | 204/312 [00:00<00:00, 670.38it/s][A
testing: 100%|██████████| 312/312 [00:00<00:00, 668.76it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  13%|█▎        | 67/520 [00:00<00:00, 668.55it/s][A
testing:  26%|██▌       | 134/520 [00:00<00:00, 664.79it/s][A
testing:  39%|███▊      | 201/520 [00:00<00:00, 665.42it/s][A
testing:  52%|█████▏    | 268/520 [00:00<00:00, 666.12it/s][A
testing:  64%|██████▍   | 335/520 [00:00<00:00, 666.89it/s][A
testing:  77%|███████▋  |

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=48, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing:   0%|          | 0/1550 [00:00<?, ?it/s][A
testing:   4%|▍         | 67/1550 [00:00<00:02, 665.83it/s][A
testing:   9%|▊         | 134/1550 [00:00<00:02, 665.24it/s][A
testing:  13%|█▎        | 201/1550 [00:00<00:02, 664.63it/s][A
testing:  17%|█▋        | 268/1550 [00:00<00:01, 665.64it/s][A
testing:  22%|██▏       | 335/1550 [00:00<00:01, 664.88it/s][A
testing:  26%|██▌       | 402/1550 [00:00<00:01, 665.03it/s][A
testing:  30%|███       | 469/1550 [00:00<00:01, 664.14it/s][A
testing:  35%|███▍      | 536/1550 [00:00<00:01, 663.48it/s][A
testing:  39%|███▉      | 603/1550 [00:00<00:01, 661.38it/s][A
testing:  43%|████▎     | 670/1550 [00:01<00:01, 661.70it/s][A
testing:  48%|████▊     | 737/1550 [00:01<00:01, 661.94it/s][A
testing:  52%|█████▏    | 804/1550 [00:01<00:01, 660.79it/s][A
testing:  56%|█████▌    | 871/1550 [00:01<00:01, 662.09it/s][A
testing:  61%|██████    | 938/1550 [00:01<00:00, 662.52it/s][A
testing:  65%|██████▍   | 1005/1550 [00:01<00:00, 6

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=48, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing:   0%|          | 0/470 [00:00<?, ?it/s][A
testing:  14%|█▍        | 67/470 [00:00<00:00, 667.61it/s][A
testing:  29%|██▊       | 134/470 [00:00<00:00, 668.05it/s][A
testing:  43%|████▎     | 201/470 [00:00<00:00, 668.52it/s][A
testing:  57%|█████▋    | 268/470 [00:00<00:00, 668.71it/s][A
testing:  71%|███████▏  | 335/470 [00:00<00:00, 667.50it/s][A
testing:  86%|████████▌ | 402/470 [00:00<00:00, 668.33it/s][A
testing: 100%|██████████| 470/470 [00:00<00:00, 666.93it/s][A

testing:   0%|          | 0/470 [00:00<?, ?it/s][A
testing:  14%|█▍        | 67/470 [00:00<00:00, 668.54it/s][A
testing:  29%|██▊       | 134/470 [00:00<00:00, 668.30it/s][A
testing:  43%|████▎     | 202/470 [00:00<00:00, 670.09it/s][A
testing:  57%|█████▋    | 270/470 [00:00<00:00, 670.75it/s][A
testing:  72%|███████▏  | 338/470 [00:00<00:00, 668.75it/s][A
testing: 100%|██████████| 470/470 [00:00<00:00, 669.24it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  13%|█▎        

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=48, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing:   0%|          | 0/1046 [00:00<?, ?it/s][A
testing:   7%|▋         | 68/1046 [00:00<00:01, 673.39it/s][A
testing:  13%|█▎        | 136/1046 [00:00<00:01, 671.81it/s][A
testing:  20%|█▉        | 204/1046 [00:00<00:01, 671.41it/s][A
testing:  26%|██▌       | 272/1046 [00:00<00:01, 671.18it/s][A
testing:  33%|███▎      | 340/1046 [00:00<00:01, 671.67it/s][A
testing:  39%|███▉      | 408/1046 [00:00<00:00, 670.58it/s][A
testing:  46%|████▌     | 476/1046 [00:00<00:00, 669.52it/s][A
testing:  52%|█████▏    | 543/1046 [00:00<00:00, 669.18it/s][A
testing:  58%|█████▊    | 611/1046 [00:00<00:00, 669.48it/s][A
testing:  65%|██████▍   | 678/1046 [00:01<00:00, 668.68it/s][A
testing:  71%|███████▏  | 746/1046 [00:01<00:00, 669.35it/s][A
testing:  78%|███████▊  | 813/1046 [00:01<00:00, 669.38it/s][A
testing:  84%|████████▍ | 880/1046 [00:01<00:00, 669.23it/s][A
testing:  91%|█████████ | 947/1046 [00:01<00:00, 669.33it/s][A
testing: 100%|██████████| 1046/1046 [00:01<00:00, 6

Start Training...
ensemble size: 2
kernel size: 10
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=48, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(49, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=10, out_features=50, bias=


testing:   0%|          | 0/668 [00:00<?, ?it/s][A
testing:  10%|█         | 68/668 [00:00<00:00, 672.53it/s][A
testing:  20%|██        | 136/668 [00:00<00:00, 670.55it/s][A
testing:  31%|███       | 204/668 [00:00<00:00, 670.81it/s][A
testing:  41%|████      | 272/668 [00:00<00:00, 671.52it/s][A
testing:  51%|█████     | 340/668 [00:00<00:00, 671.15it/s][A
testing:  61%|██████    | 408/668 [00:00<00:00, 671.25it/s][A
testing:  71%|███████▏  | 476/668 [00:00<00:00, 671.21it/s][A
testing:  81%|████████▏ | 544/668 [00:00<00:00, 670.80it/s][A
testing: 100%|██████████| 668/668 [00:00<00:00, 670.82it/s][A

testing:   0%|          | 0/668 [00:00<?, ?it/s][A
testing:  10%|█         | 68/668 [00:00<00:00, 671.29it/s][A
testing:  20%|██        | 136/668 [00:00<00:00, 672.77it/s][A
testing:  31%|███       | 204/668 [00:00<00:00, 672.74it/s][A
testing:  41%|████      | 272/668 [00:00<00:00, 673.04it/s][A
testing:  51%|█████     | 340/668 [00:00<00:00, 672.23it/s][A
testing:  61%|

Average Inference Time per Evaluation: 0.0000480988 seconds
Lifelong PR-AUC: 0.39922401499880816, BWT: 0.11964614686756707, FWT: 0.5010199439302738





# RCA

In [26]:
from deepod.models.tabular import RCA

In [12]:
R_hm = evaluation_protocol(T, E, Y, RCA(epochs=10), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


100%|██████████| 10/10 [00:00<00:00, 1118.33it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:04,  2.21it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.19it/s][A
 30%|███       | 3/10 [00:01<00:03,  2.19it/s][A
 40%|████      | 4/10 [00:01<00:02,  2.20it/s][A
 50%|█████     | 5/10 [00:02<00:02,  2.21it/s][A
 60%|██████    | 6/10 [00:02<00:01,  2.22it/s][A
 70%|███████   | 7/10 [00:03<00:01,  2.22it/s][A
 80%|████████  | 8/10 [00:03<00:00,  2.21it/s][A
 90%|█████████ | 9/10 [00:04<00:00,  2.21it/s][A
100%|██████████| 10/10 [00:04<00:00,  2.21it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.59it/s][A
 20%|██        | 2/10 [00:01<00:05,  1.58it/s][A
 30%|███       | 3/10 [00:01<00:04,  1.58it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.58it/s][A
 50%|█████     | 5/10 [00:03<00:03,  1.58it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.58it/s][A
 70%|███████   | 7/10 [00:04<00:01,  1.58it/s][A
 80%|███████

drift: 0.15575845828560167
Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100,


100%|██████████| 10/10 [00:00<00:00, 1088.92it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:04,  2.24it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.24it/s][A
 30%|███       | 3/10 [00:01<00:03,  2.23it/s][A
 40%|████      | 4/10 [00:01<00:02,  2.24it/s][A
 50%|█████     | 5/10 [00:02<00:02,  2.24it/s][A
 60%|██████    | 6/10 [00:02<00:01,  2.24it/s][A
 70%|███████   | 7/10 [00:03<00:01,  2.24it/s][A
 80%|████████  | 8/10 [00:03<00:00,  2.24it/s][A
 90%|█████████ | 9/10 [00:04<00:00,  2.24it/s][A
100%|██████████| 10/10 [00:04<00:00,  2.24it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.58it/s][A
 20%|██        | 2/10 [00:01<00:05,  1.57it/s][A
 30%|███       | 3/10 [00:01<00:04,  1.58it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.58it/s][A
 50%|█████     | 5/10 [00:03<00:03,  1.59it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.59it/s][A
 70%|███████   | 7/10 [00:04<00:01,  1.59it/s][A
 80%|███████

drift: 0.10687482627406424
Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100,


100%|██████████| 10/10 [00:00<00:00, 1057.17it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:04,  2.24it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.24it/s][A
 30%|███       | 3/10 [00:01<00:03,  2.24it/s][A
 40%|████      | 4/10 [00:01<00:02,  2.24it/s][A
 50%|█████     | 5/10 [00:02<00:02,  2.24it/s][A
 60%|██████    | 6/10 [00:02<00:01,  2.24it/s][A
 70%|███████   | 7/10 [00:03<00:01,  2.24it/s][A
 80%|████████  | 8/10 [00:03<00:00,  2.24it/s][A
 90%|█████████ | 9/10 [00:04<00:00,  2.24it/s][A
100%|██████████| 10/10 [00:04<00:00,  2.24it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.60it/s][A
 20%|██        | 2/10 [00:01<00:05,  1.58it/s][A
 30%|███       | 3/10 [00:01<00:04,  1.58it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.59it/s][A
 50%|█████     | 5/10 [00:03<00:03,  1.59it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.59it/s][A
 70%|███████   | 7/10 [00:04<00:01,  1.59it/s][A
 80%|███████

drift: 0.08550491766410032
Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100,


100%|██████████| 10/10 [00:00<00:00, 1034.23it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:04,  2.24it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.23it/s][A
 30%|███       | 3/10 [00:01<00:03,  2.23it/s][A
 40%|████      | 4/10 [00:01<00:02,  2.23it/s][A
 50%|█████     | 5/10 [00:02<00:02,  2.23it/s][A
 60%|██████    | 6/10 [00:02<00:01,  2.23it/s][A
 70%|███████   | 7/10 [00:03<00:01,  2.23it/s][A
 80%|████████  | 8/10 [00:03<00:00,  2.23it/s][A
 90%|█████████ | 9/10 [00:04<00:00,  2.23it/s][A
100%|██████████| 10/10 [00:04<00:00,  2.23it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.59it/s][A
 20%|██        | 2/10 [00:01<00:05,  1.58it/s][A
 30%|███       | 3/10 [00:01<00:04,  1.58it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.58it/s][A
 50%|█████     | 5/10 [00:03<00:03,  1.58it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.58it/s][A
 70%|███████   | 7/10 [00:04<00:01,  1.58it/s][A
 80%|███████

drift: 0.14433198880791898
Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100,


100%|██████████| 10/10 [00:00<00:00, 993.84it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:04,  2.23it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.22it/s][A
 30%|███       | 3/10 [00:01<00:03,  2.22it/s][A
 40%|████      | 4/10 [00:01<00:02,  2.22it/s][A
 50%|█████     | 5/10 [00:02<00:02,  2.22it/s][A
 60%|██████    | 6/10 [00:02<00:01,  2.22it/s][A
 70%|███████   | 7/10 [00:03<00:01,  2.22it/s][A
 80%|████████  | 8/10 [00:03<00:00,  2.22it/s][A
 90%|█████████ | 9/10 [00:04<00:00,  2.22it/s][A
100%|██████████| 10/10 [00:04<00:00,  2.22it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.60it/s][A
 20%|██        | 2/10 [00:01<00:05,  1.59it/s][A
 30%|███       | 3/10 [00:01<00:04,  1.59it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.59it/s][A
 50%|█████     | 5/10 [00:03<00:03,  1.59it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.59it/s][A
 70%|███████   | 7/10 [00:04<00:01,  1.59it/s][A
 80%|████████

Lifelong ROC-AUC: 0.46783162610323026, BWT: 0.013601554641246794, FWT: 0.35486222628635444





In [20]:
R_ssf = evaluation_protocol(T, E, Y, RCA(epochs=10), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")

Evaluating using SSF strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:00<00:00, 14.84it/s][A
 40%|████      | 4/10 [00:00<00:00, 14.76it/s][A
 60%|██████    | 6/10 [00:00<00:00, 14.76it/s][A
 80%|████████  | 8/10 [00:00<00:00, 14.75it/s][A
100%|██████████| 10/10 [00:00<00:00, 14.74it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:04,  2.05it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.17it/s][A
 30%|███       | 3/10 [00:01<00:03,  2.21it/s][A
 40%|████      | 4/10 [00:01<00:02,  2.23it/s][A
 50%|█████     | 5/10 [00:02<00:02,  2.25it/s][A
 60%|██████    | 6/10 [00:02<00:01,  2.25it/s][A
 70%|███████   | 7/10 [00:03<00:01,  2.26it/s][A
 80%|████████  | 8/10 [00:03<00:00,  2.26it/s][A
 90%|█████████ | 9/10 [00:04<00:00,  2.26it/s][A
100%|██████████| 10/10 [00:04<00:00,  2.24it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:02<00:21,  2.39s/it][A
 20%|██        | 2/10 [00:03<00:10,  1.35s/it][A
 30%|███       | 3/

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:00<00:00, 14.79it/s][A
 40%|████      | 4/10 [00:00<00:00, 14.75it/s][A
 60%|██████    | 6/10 [00:00<00:00, 14.76it/s][A
 80%|████████  | 8/10 [00:00<00:00, 14.76it/s][A
100%|██████████| 10/10 [00:00<00:00, 14.73it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:03,  2.26it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.26it/s][A
 30%|███       | 3/10 [00:01<00:03,  2.25it/s][A
 40%|████      | 4/10 [00:01<00:02,  2.26it/s][A
 50%|█████     | 5/10 [00:02<00:02,  2.26it/s][A
 60%|██████    | 6/10 [00:02<00:01,  2.26it/s][A
 70%|███████   | 7/10 [00:03<00:01,  2.26it/s][A
 80%|████████  | 8/10 [00:03<00:00,  2.27it/s][A
 90%|█████████ | 9/10 [00:03<00:00,  2.26it/s][A
100%|██████████| 10/10 [00:04<00:00,  2.26it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.62it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.61it/s][A
 30%|███       | 3/

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:00<00:00, 14.93it/s][A
 40%|████      | 4/10 [00:00<00:00, 14.92it/s][A
 60%|██████    | 6/10 [00:00<00:00, 14.90it/s][A
 80%|████████  | 8/10 [00:00<00:00, 14.92it/s][A
100%|██████████| 10/10 [00:00<00:00, 14.92it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:03,  2.29it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.29it/s][A
 30%|███       | 3/10 [00:01<00:03,  2.29it/s][A
 40%|████      | 4/10 [00:01<00:02,  2.28it/s][A
 50%|█████     | 5/10 [00:02<00:02,  2.28it/s][A
 60%|██████    | 6/10 [00:02<00:01,  2.28it/s][A
 70%|███████   | 7/10 [00:03<00:01,  2.27it/s][A
 80%|████████  | 8/10 [00:03<00:00,  2.27it/s][A
 90%|█████████ | 9/10 [00:03<00:00,  2.27it/s][A
100%|██████████| 10/10 [00:04<00:00,  2.28it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.62it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.61it/s][A
 30%|███       | 3/

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:00<00:00, 14.86it/s][A
 40%|████      | 4/10 [00:00<00:00, 14.71it/s][A
 60%|██████    | 6/10 [00:00<00:00, 14.72it/s][A
 80%|████████  | 8/10 [00:00<00:00, 14.71it/s][A
100%|██████████| 10/10 [00:00<00:00, 14.69it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:03,  2.27it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.26it/s][A
 30%|███       | 3/10 [00:01<00:03,  2.26it/s][A
 40%|████      | 4/10 [00:01<00:02,  2.25it/s][A
 50%|█████     | 5/10 [00:02<00:02,  2.25it/s][A
 60%|██████    | 6/10 [00:02<00:01,  2.26it/s][A
 70%|███████   | 7/10 [00:03<00:01,  2.26it/s][A
 80%|████████  | 8/10 [00:03<00:00,  2.26it/s][A
 90%|█████████ | 9/10 [00:03<00:00,  2.26it/s][A
100%|██████████| 10/10 [00:04<00:00,  2.26it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.60it/s][A
 20%|██        | 2/10 [00:01<00:05,  1.59it/s][A
 30%|███       | 3/

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:00<00:00, 15.13it/s][A
 40%|████      | 4/10 [00:00<00:00, 14.92it/s][A
 60%|██████    | 6/10 [00:00<00:00, 14.87it/s][A
 80%|████████  | 8/10 [00:00<00:00, 14.84it/s][A
100%|██████████| 10/10 [00:00<00:00, 14.83it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:03,  2.27it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.28it/s][A
 30%|███       | 3/10 [00:01<00:03,  2.28it/s][A
 40%|████      | 4/10 [00:01<00:02,  2.28it/s][A
 50%|█████     | 5/10 [00:02<00:02,  2.28it/s][A
 60%|██████    | 6/10 [00:02<00:01,  2.27it/s][A
 70%|███████   | 7/10 [00:03<00:01,  2.27it/s][A
 80%|████████  | 8/10 [00:03<00:00,  2.26it/s][A
 90%|█████████ | 9/10 [00:03<00:00,  2.26it/s][A
100%|██████████| 10/10 [00:04<00:00,  2.27it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:02<00:19,  2.18s/it][A
 20%|██        | 2/10 [00:02<00:10,  1.27s/it][A
 30%|███       | 3/

Lifelong ROC-AUC: 0.19839448098273754, BWT: 0.00014011255011197598, FWT: 0.2979311747222437





In [25]:
R_naive = evaluation_protocol(T, E, Y, RCA(epochs=10), strategy="naive")
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:02,  3.54it/s][A
 20%|██        | 2/10 [00:00<00:02,  3.53it/s][A
 30%|███       | 3/10 [00:00<00:01,  3.53it/s][A
 40%|████      | 4/10 [00:01<00:01,  3.38it/s][A
 50%|█████     | 5/10 [00:01<00:01,  3.42it/s][A
 60%|██████    | 6/10 [00:01<00:01,  3.45it/s][A
 70%|███████   | 7/10 [00:02<00:00,  3.46it/s][A
 80%|████████  | 8/10 [00:02<00:00,  3.43it/s][A
 90%|█████████ | 9/10 [00:02<00:00,  3.39it/s][A
100%|██████████| 10/10 [00:02<00:00,  3.44it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:04,  2.09it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.03it/s][A
 30%|███       | 3/10 [00:01<00:03,  2.06it/s][A
 40%|████      | 4/10 [00:01<00:02,  2.07it/s][A
 50%|█████     | 5/10 [00:02<00:02,  1.91it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.96it/s][A
 70%|███████   | 7/10 [00:03<00:01,  1.97it/s][A
 80%|████████  | 8/10 [00:04<00:00,  2.01it/s][A
 90%|████████

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:01<00:12,  1.37s/it][A
 20%|██        | 2/10 [00:02<00:10,  1.37s/it][A
 30%|███       | 3/10 [00:04<00:09,  1.37s/it][A
 40%|████      | 4/10 [00:05<00:08,  1.37s/it][A
 50%|█████     | 5/10 [00:06<00:06,  1.37s/it][A
 60%|██████    | 6/10 [00:08<00:05,  1.37s/it][A
 70%|███████   | 7/10 [00:09<00:04,  1.37s/it][A
 80%|████████  | 8/10 [00:10<00:02,  1.37s/it][A
 90%|█████████ | 9/10 [00:12<00:01,  1.37s/it][A
100%|██████████| 10/10 [00:13<00:00,  1.37s/it][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:04,  1.99it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.05it/s][A
 30%|███       | 3/10 [00:01<00:03,  2.07it/s][A
 40%|████      | 4/10 [00:01<00:02,  2.03it/s][A
 50%|█████     | 5/10 [00:02<00:02,  2.05it/s][A
 60%|██████    | 6/10 [00:02<00:01,  2.02it/s][A
 70%|███████   | 7/10 [00:03<00:01,  2.05it/s][A
 80%|████████  | 8/10 [00:03<00:00,  2.06it/s][A
 90%|████████

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:03,  2.78it/s][A
 20%|██        | 2/10 [00:00<00:02,  2.77it/s][A
 30%|███       | 3/10 [00:01<00:02,  2.77it/s][A
 40%|████      | 4/10 [00:01<00:02,  2.70it/s][A
 50%|█████     | 5/10 [00:01<00:01,  2.72it/s][A
 60%|██████    | 6/10 [00:02<00:01,  2.74it/s][A
 70%|███████   | 7/10 [00:02<00:01,  2.68it/s][A
 80%|████████  | 8/10 [00:02<00:00,  2.71it/s][A
 90%|█████████ | 9/10 [00:03<00:00,  2.73it/s][A
100%|██████████| 10/10 [00:03<00:00,  2.73it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:04,  2.00it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.05it/s][A
 30%|███       | 3/10 [00:01<00:03,  2.02it/s][A
 40%|████      | 4/10 [00:01<00:02,  2.04it/s][A
 50%|█████     | 5/10 [00:02<00:02,  2.05it/s][A
 60%|██████    | 6/10 [00:02<00:01,  2.03it/s][A
 70%|███████   | 7/10 [00:03<00:01,  2.05it/s][A
 80%|████████  | 8/10 [00:03<00:00,  2.06it/s][A
 90%|████████

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:08,  1.06it/s][A
 20%|██        | 2/10 [00:01<00:07,  1.04it/s][A
 30%|███       | 3/10 [00:02<00:06,  1.07it/s][A
 40%|████      | 4/10 [00:03<00:05,  1.04it/s][A
 50%|█████     | 5/10 [00:04<00:04,  1.04it/s][A
 60%|██████    | 6/10 [00:05<00:03,  1.07it/s][A
 70%|███████   | 7/10 [00:06<00:02,  1.08it/s][A
 80%|████████  | 8/10 [00:07<00:01,  1.06it/s][A
 90%|█████████ | 9/10 [00:08<00:00,  1.05it/s][A
100%|██████████| 10/10 [00:09<00:00,  1.06it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:04,  1.96it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.03it/s][A
 30%|███       | 3/10 [00:01<00:03,  2.05it/s][A
 40%|████      | 4/10 [00:01<00:02,  2.07it/s][A
 50%|█████     | 5/10 [00:02<00:02,  2.07it/s][A
 60%|██████    | 6/10 [00:02<00:01,  2.07it/s][A
 70%|███████   | 7/10 [00:03<00:01,  2.08it/s][A
 80%|████████  | 8/10 [00:03<00:00,  2.08it/s][A
 90%|████████

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:04,  1.85it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.82it/s][A
 30%|███       | 3/10 [00:01<00:03,  1.83it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.82it/s][A
 50%|█████     | 5/10 [00:02<00:02,  1.83it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.84it/s][A
 70%|███████   | 7/10 [00:03<00:01,  1.72it/s][A
 80%|████████  | 8/10 [00:04<00:01,  1.76it/s][A
 90%|█████████ | 9/10 [00:05<00:00,  1.78it/s][A
100%|██████████| 10/10 [00:05<00:00,  1.80it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:04,  2.04it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.96it/s][A
 30%|███       | 3/10 [00:01<00:03,  2.02it/s][A
 40%|████      | 4/10 [00:01<00:02,  2.02it/s][A
 50%|█████     | 5/10 [00:02<00:02,  2.05it/s][A
 60%|██████    | 6/10 [00:02<00:01,  2.06it/s][A
 70%|███████   | 7/10 [00:03<00:01,  1.95it/s][A
 80%|████████  | 8/10 [00:03<00:01,  1.99it/s][A
 90%|████████

Lifelong PR-AUC: 0.458677884288386, BWT: 0.190154328996587, FWT: 0.4224282129189828





In [27]:
R_replay = evaluation_protocol(T, E, Y, RCA(epochs=10), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:02,  3.74it/s][A
 20%|██        | 2/10 [00:00<00:02,  3.72it/s][A
 30%|███       | 3/10 [00:00<00:01,  3.72it/s][A
 40%|████      | 4/10 [00:01<00:01,  3.72it/s][A
 50%|█████     | 5/10 [00:01<00:01,  3.72it/s][A
 60%|██████    | 6/10 [00:01<00:01,  3.72it/s][A
 70%|███████   | 7/10 [00:01<00:00,  3.71it/s][A
 80%|████████  | 8/10 [00:02<00:00,  3.71it/s][A
 90%|█████████ | 9/10 [00:02<00:00,  3.71it/s][A
100%|██████████| 10/10 [00:02<00:00,  3.71it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:04,  2.25it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.25it/s][A
 30%|███       | 3/10 [00:01<00:03,  2.24it/s][A
 40%|████      | 4/10 [00:01<00:02,  2.24it/s][A
 50%|█████     | 5/10 [00:02<00:02,  2.23it/s][A
 60%|██████    | 6/10 [00:02<00:01,  2.22it/s][A
 70%|███████   | 7/10 [00:03<00:01,  2.23it/s][A
 80%|████████  | 8/10 [00:03<00:00,  2.23it/s][A
 90%|████████

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:01<00:12,  1.34s/it][A
 20%|██        | 2/10 [00:02<00:10,  1.35s/it][A
 30%|███       | 3/10 [00:04<00:09,  1.35s/it][A
 40%|████      | 4/10 [00:05<00:08,  1.35s/it][A
 50%|█████     | 5/10 [00:06<00:06,  1.35s/it][A
 60%|██████    | 6/10 [00:08<00:05,  1.35s/it][A
 70%|███████   | 7/10 [00:09<00:04,  1.35s/it][A
 80%|████████  | 8/10 [00:10<00:02,  1.35s/it][A
 90%|█████████ | 9/10 [00:12<00:01,  1.35s/it][A
100%|██████████| 10/10 [00:13<00:00,  1.35s/it][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:04,  2.24it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.24it/s][A
 30%|███       | 3/10 [00:01<00:03,  2.24it/s][A
 40%|████      | 4/10 [00:01<00:02,  2.24it/s][A
 50%|█████     | 5/10 [00:02<00:02,  2.24it/s][A
 60%|██████    | 6/10 [00:02<00:01,  2.23it/s][A
 70%|███████   | 7/10 [00:03<00:01,  2.23it/s][A
 80%|████████  | 8/10 [00:03<00:00,  2.23it/s][A
 90%|████████

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:03,  2.50it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.49it/s][A
 30%|███       | 3/10 [00:01<00:02,  2.49it/s][A
 40%|████      | 4/10 [00:01<00:02,  2.49it/s][A
 50%|█████     | 5/10 [00:02<00:02,  2.49it/s][A
 60%|██████    | 6/10 [00:02<00:01,  2.48it/s][A
 70%|███████   | 7/10 [00:02<00:01,  2.48it/s][A
 80%|████████  | 8/10 [00:03<00:00,  2.48it/s][A
 90%|█████████ | 9/10 [00:03<00:00,  2.48it/s][A
100%|██████████| 10/10 [00:04<00:00,  2.48it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:04,  2.24it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.22it/s][A
 30%|███       | 3/10 [00:01<00:03,  2.23it/s][A
 40%|████      | 4/10 [00:01<00:02,  2.23it/s][A
 50%|█████     | 5/10 [00:02<00:02,  2.23it/s][A
 60%|██████    | 6/10 [00:02<00:01,  2.23it/s][A
 70%|███████   | 7/10 [00:03<00:01,  2.23it/s][A
 80%|████████  | 8/10 [00:03<00:00,  2.23it/s][A
 90%|████████

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:08,  1.12it/s][A
 20%|██        | 2/10 [00:01<00:07,  1.11it/s][A
 30%|███       | 3/10 [00:02<00:06,  1.11it/s][A
 40%|████      | 4/10 [00:03<00:05,  1.11it/s][A
 50%|█████     | 5/10 [00:04<00:04,  1.11it/s][A
 60%|██████    | 6/10 [00:05<00:03,  1.11it/s][A
 70%|███████   | 7/10 [00:06<00:02,  1.11it/s][A
 80%|████████  | 8/10 [00:07<00:01,  1.11it/s][A
 90%|█████████ | 9/10 [00:08<00:00,  1.11it/s][A
100%|██████████| 10/10 [00:09<00:00,  1.11it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:04,  2.24it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.24it/s][A
 30%|███       | 3/10 [00:01<00:03,  2.24it/s][A
 40%|████      | 4/10 [00:01<00:02,  2.24it/s][A
 50%|█████     | 5/10 [00:02<00:02,  2.23it/s][A
 60%|██████    | 6/10 [00:02<00:01,  2.24it/s][A
 70%|███████   | 7/10 [00:03<00:01,  2.24it/s][A
 80%|████████  | 8/10 [00:03<00:00,  2.24it/s][A
 90%|████████

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=58, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.75it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.74it/s][A
 30%|███       | 3/10 [00:01<00:04,  1.73it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.73it/s][A
 50%|█████     | 5/10 [00:02<00:02,  1.73it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.73it/s][A
 70%|███████   | 7/10 [00:04<00:01,  1.73it/s][A
 80%|████████  | 8/10 [00:04<00:01,  1.73it/s][A
 90%|█████████ | 9/10 [00:05<00:00,  1.73it/s][A
100%|██████████| 10/10 [00:05<00:00,  1.73it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:04,  2.24it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.23it/s][A
 30%|███       | 3/10 [00:01<00:03,  2.23it/s][A
 40%|████      | 4/10 [00:01<00:02,  2.23it/s][A
 50%|█████     | 5/10 [00:02<00:02,  2.23it/s][A
 60%|██████    | 6/10 [00:02<00:01,  2.23it/s][A
 70%|███████   | 7/10 [00:03<00:01,  2.23it/s][A
 80%|████████  | 8/10 [00:03<00:00,  2.23it/s][A
 90%|████████

Average Inference Time per Evaluation: 0.0001351466 seconds
Lifelong PR-AUC: 0.37285924558567873, BWT: 0.060578728324494666, FWT: 0.39602854143028715





# RDP

In [28]:
from deepod.models.tabular import RDP

In [14]:
R_hm = evaluation_protocol(T, E, Y, RDP(epochs=10), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=58, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000000, time: 0.6s
epoch 10, training loss: 0.000006, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 1/1 [00:00<00:00, 785.60it/s]

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  21%|██        | 109/520 [00:00<00:00, 1082.03it/s][A
testing:  42%|████▏     | 218/520 [00:00<00:00, 1084.56it/s][A
testing:  63%|██████▎   | 327/520 [00:00<00:00, 1085.73it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 1085.37it/s][A

testing:   0%|          | 0/731 [00:00<?, ?it/s][A
testing:  15%|█▍        | 109/731 [00:00<00:00, 1089.27it/s][A
testing:  30%|██▉       | 218/731 [00:00<00:00, 1073.58it/s][A
testing:  45%|████▍     | 327/731 [00:00<00:00, 1078.79it/s][A
testing:  60%|█████▉    | 437/731 [00:00<00:00, 1084.30it/s][A
testing:  75%|███████▍  | 546/731 [00:00<00:00, 1086.24it/s][A
testing: 100%|██████████| 731/731 [00:00<00:00, 1085.02it/s][A

testing:   0%|          | 0/401 [00:00<?, ?it/s][A
testing:  27%|██▋       | 110/401 [00:00<00:00, 1090.46it/s][A
testing:  55%|█████▍    | 220/401 [00:00<00:00, 1087.53it/s][A
testing: 100%|██

drift: 0.15575845828560167
Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=58, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000000, time: 0.0s
epoch 10, training loss: 0.000010, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 1/1 [00:00<00:00, 777.44it/s]

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  21%|██        | 109/520 [00:00<00:00, 1089.95it/s][A
testing:  42%|████▏     | 218/520 [00:00<00:00, 1087.07it/s][A
testing:  63%|██████▎   | 328/520 [00:00<00:00, 1089.17it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 1088.93it/s][A

testing:   0%|          | 0/731 [00:00<?, ?it/s][A
testing:  15%|█▌        | 110/731 [00:00<00:00, 1093.73it/s][A
testing:  30%|███       | 220/731 [00:00<00:00, 1091.39it/s][A
testing:  45%|████▌     | 330/731 [00:00<00:00, 1088.36it/s][A
testing:  60%|██████    | 440/731 [00:00<00:00, 1089.92it/s][A
testing:  75%|███████▌  | 550/731 [00:00<00:00, 1090.94it/s][A
testing: 100%|██████████| 731/731 [00:00<00:00, 1090.84it/s][A

testing:   0%|          | 0/401 [00:00<?, ?it/s][A
testing:  27%|██▋       | 110/401 [00:00<00:00, 1093.47it/s][A
testing:  55%|█████▍    | 220/401 [00:00<00:00, 1092.99it/s][A
testing: 100%|██

drift: 0.10687482627406424
Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=58, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000000, time: 0.0s
epoch 10, training loss: 0.000006, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 1/1 [00:00<00:00, 777.88it/s]

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  21%|██        | 109/520 [00:00<00:00, 1085.75it/s][A
testing:  42%|████▏     | 218/520 [00:00<00:00, 1079.70it/s][A
testing:  63%|██████▎   | 327/520 [00:00<00:00, 1083.86it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 1086.19it/s][A

testing:   0%|          | 0/731 [00:00<?, ?it/s][A
testing:  15%|█▍        | 109/731 [00:00<00:00, 1088.12it/s][A
testing:  30%|██▉       | 218/731 [00:00<00:00, 1087.17it/s][A
testing:  45%|████▍     | 327/731 [00:00<00:00, 1087.53it/s][A
testing:  60%|█████▉    | 437/731 [00:00<00:00, 1088.46it/s][A
testing:  75%|███████▍  | 547/731 [00:00<00:00, 1090.04it/s][A
testing: 100%|██████████| 731/731 [00:00<00:00, 1088.11it/s][A

testing:   0%|          | 0/401 [00:00<?, ?it/s][A
testing:  27%|██▋       | 109/401 [00:00<00:00, 1087.73it/s][A
testing:  54%|█████▍    | 218/401 [00:00<00:00, 1083.16it/s][A
testing: 100%|██

drift: 0.08550491766410032
Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=58, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000000, time: 0.0s
epoch 10, training loss: 0.000010, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 1/1 [00:00<00:00, 748.58it/s]

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  21%|██        | 109/520 [00:00<00:00, 1084.60it/s][A
testing:  42%|████▏     | 218/520 [00:00<00:00, 1084.53it/s][A
testing:  63%|██████▎   | 327/520 [00:00<00:00, 1084.44it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 1084.68it/s][A

testing:   0%|          | 0/731 [00:00<?, ?it/s][A
testing:  15%|█▍        | 109/731 [00:00<00:00, 1088.20it/s][A
testing:  30%|██▉       | 219/731 [00:00<00:00, 1089.18it/s][A
testing:  45%|████▍     | 328/731 [00:00<00:00, 1088.69it/s][A
testing:  60%|█████▉    | 438/731 [00:00<00:00, 1090.15it/s][A
testing:  75%|███████▍  | 548/731 [00:00<00:00, 1090.68it/s][A
testing: 100%|██████████| 731/731 [00:00<00:00, 1089.72it/s][A

testing:   0%|          | 0/401 [00:00<?, ?it/s][A
testing:  27%|██▋       | 110/401 [00:00<00:00, 1090.48it/s][A
testing:  55%|█████▍    | 220/401 [00:00<00:00, 1090.10it/s][A
testing: 100%|██

drift: 0.14433198880791898
Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=58, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000000, time: 0.0s
epoch 10, training loss: 0.000009, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 1/1 [00:00<00:00, 746.85it/s]

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  21%|██        | 109/520 [00:00<00:00, 1089.98it/s][A
testing:  42%|████▏     | 218/520 [00:00<00:00, 1089.32it/s][A
testing:  63%|██████▎   | 327/520 [00:00<00:00, 1087.56it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 1087.08it/s][A

testing:   0%|          | 0/731 [00:00<?, ?it/s][A
testing:  15%|█▍        | 109/731 [00:00<00:00, 1087.25it/s][A
testing:  30%|██▉       | 218/731 [00:00<00:00, 1088.08it/s][A
testing:  45%|████▍     | 327/731 [00:00<00:00, 1088.09it/s][A
testing:  60%|█████▉    | 437/731 [00:00<00:00, 1090.22it/s][A
testing:  75%|███████▍  | 547/731 [00:00<00:00, 1093.05it/s][A
testing: 100%|██████████| 731/731 [00:00<00:00, 1090.08it/s][A

testing:   0%|          | 0/401 [00:00<?, ?it/s][A
testing:  27%|██▋       | 109/401 [00:00<00:00, 1086.07it/s][A
testing:  55%|█████▍    | 219/401 [00:00<00:00, 1088.95it/s][A
testing: 100%|██

Lifelong ROC-AUC: 0.3522134732574979, BWT: 0.042147218832008115, FWT: 0.3412602382945243





In [24]:
R_ssf = evaluation_protocol(T, E, Y, RDP(epochs=10), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")

Evaluating using SSF strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=58, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000004, time: 0.9s
epoch 10, training loss: 0.000013, time: 0.9s
Start Inference on the training data...



testing: 100%|██████████| 79/79 [00:00<00:00, 1141.60it/s]

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  22%|██▏       | 114/520 [00:00<00:00, 1136.97it/s][A
testing:  44%|████▍     | 228/520 [00:00<00:00, 1129.73it/s][A
testing:  66%|██████▌   | 341/520 [00:00<00:00, 1124.13it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 1124.20it/s][A

testing:   0%|          | 0/731 [00:00<?, ?it/s][A
testing:  15%|█▌        | 113/731 [00:00<00:00, 1125.13it/s][A
testing:  31%|███       | 226/731 [00:00<00:00, 1117.66it/s][A
testing:  46%|████▋     | 339/731 [00:00<00:00, 1119.14it/s][A
testing:  62%|██████▏   | 452/731 [00:00<00:00, 1121.13it/s][A
testing:  77%|███████▋  | 565/731 [00:00<00:00, 1118.49it/s][A
testing: 100%|██████████| 731/731 [00:00<00:00, 1120.68it/s][A

testing:   0%|          | 0/401 [00:00<?, ?it/s][A
testing:  28%|██▊       | 113/401 [00:00<00:00, 1120.84it/s][A
testing:  57%|█████▋    | 227/401 [00:00<00:00, 1127.91it/s][A
testing: 100%

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=58, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000004, time: 1.0s
epoch 10, training loss: 0.000014, time: 1.0s
Start Inference on the training data...



testing: 100%|██████████| 79/79 [00:00<00:00, 1138.51it/s]

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  22%|██▏       | 113/520 [00:00<00:00, 1126.69it/s][A
testing:  43%|████▎     | 226/520 [00:00<00:00, 1125.71it/s][A
testing:  65%|██████▌   | 340/520 [00:00<00:00, 1128.49it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 1124.55it/s][A

testing:   0%|          | 0/731 [00:00<?, ?it/s][A
testing:  16%|█▌        | 114/731 [00:00<00:00, 1132.53it/s][A
testing:  31%|███       | 228/731 [00:00<00:00, 1123.67it/s][A
testing:  47%|████▋     | 341/731 [00:00<00:00, 1126.04it/s][A
testing:  62%|██████▏   | 455/731 [00:00<00:00, 1127.41it/s][A
testing:  78%|███████▊  | 568/731 [00:00<00:00, 1122.34it/s][A
testing: 100%|██████████| 731/731 [00:00<00:00, 1122.77it/s][A

testing:   0%|          | 0/401 [00:00<?, ?it/s][A
testing:  28%|██▊       | 113/401 [00:00<00:00, 1127.93it/s][A
testing:  56%|█████▋    | 226/401 [00:00<00:00, 1128.62it/s][A
testing: 100%

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=58, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000004, time: 0.9s
epoch 10, training loss: 0.000013, time: 0.2s
Start Inference on the training data...



testing: 100%|██████████| 79/79 [00:00<00:00, 1122.81it/s]

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  22%|██▏       | 112/520 [00:00<00:00, 1111.28it/s][A
testing:  43%|████▎     | 224/520 [00:00<00:00, 1111.09it/s][A
testing:  65%|██████▍   | 337/520 [00:00<00:00, 1116.14it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 1118.56it/s][A

testing:   0%|          | 0/731 [00:00<?, ?it/s][A
testing:  15%|█▌        | 112/731 [00:00<00:00, 1118.94it/s][A
testing:  31%|███       | 226/731 [00:00<00:00, 1126.17it/s][A
testing:  46%|████▋     | 339/731 [00:00<00:00, 1124.66it/s][A
testing:  62%|██████▏   | 453/731 [00:00<00:00, 1129.44it/s][A
testing:  77%|███████▋  | 566/731 [00:00<00:00, 1126.14it/s][A
testing: 100%|██████████| 731/731 [00:00<00:00, 1124.09it/s][A

testing:   0%|          | 0/401 [00:00<?, ?it/s][A
testing:  28%|██▊       | 112/401 [00:00<00:00, 1114.07it/s][A
testing:  56%|█████▌    | 225/401 [00:00<00:00, 1120.21it/s][A
testing: 100%

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=58, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000004, time: 0.2s
epoch 10, training loss: 0.000015, time: 0.2s
Start Inference on the training data...



testing: 100%|██████████| 79/79 [00:00<00:00, 1118.74it/s]

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  21%|██        | 108/520 [00:00<00:00, 1074.48it/s][A
testing:  42%|████▏     | 220/520 [00:00<00:00, 1100.08it/s][A
testing:  64%|██████▍   | 333/520 [00:00<00:00, 1111.53it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 1113.77it/s][A

testing:   0%|          | 0/731 [00:00<?, ?it/s][A
testing:  15%|█▌        | 112/731 [00:00<00:00, 1111.40it/s][A
testing:  31%|███       | 225/731 [00:00<00:00, 1119.36it/s][A
testing:  46%|████▌     | 338/731 [00:00<00:00, 1121.78it/s][A
testing:  62%|██████▏   | 451/731 [00:00<00:00, 1124.37it/s][A
testing:  77%|███████▋  | 564/731 [00:00<00:00, 1126.02it/s][A
testing: 100%|██████████| 731/731 [00:00<00:00, 1124.80it/s][A

testing:   0%|          | 0/401 [00:00<?, ?it/s][A
testing:  28%|██▊       | 114/401 [00:00<00:00, 1138.67it/s][A
testing:  57%|█████▋    | 228/401 [00:00<00:00, 1137.15it/s][A
testing: 100%

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=58, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000004, time: 0.2s
epoch 10, training loss: 0.000014, time: 0.2s
Start Inference on the training data...



testing: 100%|██████████| 79/79 [00:00<00:00, 1141.62it/s]

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  22%|██▏       | 114/520 [00:00<00:00, 1135.36it/s][A
testing:  44%|████▍     | 228/520 [00:00<00:00, 1131.47it/s][A
testing:  66%|██████▌   | 342/520 [00:00<00:00, 1130.89it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 1131.13it/s][A

testing:   0%|          | 0/731 [00:00<?, ?it/s][A
testing:  16%|█▌        | 114/731 [00:00<00:00, 1131.35it/s][A
testing:  31%|███▏      | 229/731 [00:00<00:00, 1138.67it/s][A
testing:  47%|████▋     | 343/731 [00:00<00:00, 1138.88it/s][A
testing:  63%|██████▎   | 457/731 [00:00<00:00, 1138.11it/s][A
testing:  78%|███████▊  | 571/731 [00:00<00:00, 1138.14it/s][A
testing: 100%|██████████| 731/731 [00:00<00:00, 1138.55it/s][A

testing:   0%|          | 0/401 [00:00<?, ?it/s][A
testing:  28%|██▊       | 113/401 [00:00<00:00, 1127.75it/s][A
testing:  57%|█████▋    | 227/401 [00:00<00:00, 1129.35it/s][A
testing: 100%

Lifelong ROC-AUC: 0.22051690821185324, BWT: -0.16105950839576202, FWT: 0.48067775760587406





In [28]:
R_naive = evaluation_protocol(T, E, Y, RDP(epochs=10), strategy="naive")
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=58, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000008, time: 5.1s
epoch 10, training loss: 0.000014, time: 4.5s
Start Inference on the training data...



testing:   0%|          | 0/312 [00:00<?, ?it/s][A
testing:  33%|███▎      | 102/312 [00:00<00:00, 1012.94it/s][A
testing:  65%|██████▌   | 204/312 [00:00<00:00, 1013.88it/s][A
testing: 100%|██████████| 312/312 [00:00<00:00, 1012.42it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  20%|█▉        | 103/520 [00:00<00:00, 1022.57it/s][A
testing:  40%|███▉      | 206/520 [00:00<00:00, 979.99it/s] [A
testing:  59%|█████▊    | 305/520 [00:00<00:00, 711.88it/s][A
testing:  78%|███████▊  | 406/520 [00:00<00:00, 804.47it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 854.21it/s][A

testing:   0%|          | 0/731 [00:00<?, ?it/s][A
testing:  14%|█▍        | 102/731 [00:00<00:00, 1012.91it/s][A
testing:  28%|██▊       | 204/731 [00:00<00:00, 1013.50it/s][A
testing:  42%|████▏     | 306/731 [00:00<00:00, 1016.30it/s][A
testing:  56%|█████▌    | 408/731 [00:00<00:00, 1014.90it/s][A
testing:  70%|██████▉   | 510/731 [00:00<00:00, 1015.28it/s][A
testing:  84

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=58, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000011, time: 21.0s
epoch 10, training loss: 0.000013, time: 20.9s
Start Inference on the training data...



testing:   0%|          | 0/1472 [00:00<?, ?it/s][A
testing:   4%|▎         | 54/1472 [00:00<00:02, 512.16it/s][A
testing:  10%|▉         | 143/1472 [00:00<00:01, 726.75it/s][A
testing:  17%|█▋        | 245/1472 [00:00<00:01, 856.83it/s][A
testing:  24%|██▎       | 347/1472 [00:00<00:01, 919.71it/s][A
testing:  31%|███       | 449/1472 [00:00<00:01, 954.39it/s][A
testing:  37%|███▋      | 551/1472 [00:00<00:00, 975.84it/s][A
testing:  44%|████▍     | 654/1472 [00:00<00:00, 992.65it/s][A
testing:  51%|█████▏    | 757/1472 [00:00<00:00, 1001.93it/s][A
testing:  58%|█████▊    | 860/1472 [00:00<00:00, 1008.67it/s][A
testing:  65%|██████▌   | 963/1472 [00:01<00:00, 1012.55it/s][A
testing:  72%|███████▏  | 1066/1472 [00:01<00:00, 1016.12it/s][A
testing:  79%|███████▉  | 1169/1472 [00:01<00:00, 1018.63it/s][A
testing:  86%|████████▋ | 1271/1472 [00:01<00:00, 769.78it/s] [A
testing:  93%|█████████▎| 1363/1472 [00:01<00:00, 806.06it/s][A
testing: 100%|██████████| 1472/1472 [00:0

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=58, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000010, time: 5.5s
epoch 10, training loss: 0.000017, time: 5.8s
Start Inference on the training data...



testing:   0%|          | 0/392 [00:00<?, ?it/s][A
testing:  22%|██▏       | 86/392 [00:00<00:00, 853.80it/s][A
testing:  44%|████▍     | 172/392 [00:00<00:00, 834.14it/s][A
testing:  65%|██████▌   | 256/392 [00:00<00:00, 794.75it/s][A
testing: 100%|██████████| 392/392 [00:00<00:00, 868.13it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  20%|█▉        | 102/520 [00:00<00:00, 1014.69it/s][A
testing:  39%|███▉      | 204/520 [00:00<00:00, 1010.39it/s][A
testing:  59%|█████▉    | 306/520 [00:00<00:00, 1011.36it/s][A
testing:  79%|███████▊  | 409/520 [00:00<00:00, 1014.74it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 1012.94it/s][A

testing:   0%|          | 0/731 [00:00<?, ?it/s][A
testing:  14%|█▍        | 103/731 [00:00<00:00, 1026.44it/s][A
testing:  28%|██▊       | 206/731 [00:00<00:00, 1021.65it/s][A
testing:  42%|████▏     | 309/731 [00:00<00:00, 1017.94it/s][A
testing:  56%|█████▌    | 411/731 [00:00<00:00, 867.41it/s] [A
testing:  69%|

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=58, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000011, time: 13.2s
epoch 10, training loss: 0.000013, time: 14.3s
Start Inference on the training data...



testing:   0%|          | 0/968 [00:00<?, ?it/s][A
testing:  11%|█         | 103/968 [00:00<00:00, 1024.68it/s][A
testing:  21%|██▏       | 206/968 [00:00<00:00, 1023.20it/s][A
testing:  32%|███▏      | 309/968 [00:00<00:00, 1023.24it/s][A
testing:  43%|████▎     | 412/968 [00:00<00:00, 1024.75it/s][A
testing:  53%|█████▎    | 515/968 [00:00<00:00, 1025.21it/s][A
testing:  64%|██████▍   | 618/968 [00:00<00:00, 1025.48it/s][A
testing:  74%|███████▍  | 721/968 [00:00<00:00, 973.75it/s] [A
testing:  85%|████████▌ | 824/968 [00:00<00:00, 988.43it/s][A
testing: 100%|██████████| 968/968 [00:00<00:00, 1006.65it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  20%|█▉        | 102/520 [00:00<00:00, 1018.41it/s][A
testing:  39%|███▉      | 205/520 [00:00<00:00, 1021.06it/s][A
testing:  59%|█████▉    | 308/520 [00:00<00:00, 1020.85it/s][A
testing:  79%|███████▉  | 411/520 [00:00<00:00, 1019.23it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 1017.93it/s][A

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=58, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000009, time: 8.7s
epoch 10, training loss: 0.000012, time: 8.4s
Start Inference on the training data...



testing:   0%|          | 0/590 [00:00<?, ?it/s][A
testing:  17%|█▋        | 102/590 [00:00<00:00, 1015.88it/s][A
testing:  35%|███▍      | 204/590 [00:00<00:00, 1011.11it/s][A
testing:  52%|█████▏    | 306/590 [00:00<00:00, 1010.76it/s][A
testing:  69%|██████▉   | 408/590 [00:00<00:00, 1009.80it/s][A
testing: 100%|██████████| 590/590 [00:00<00:00, 1010.10it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  20%|█▉        | 102/520 [00:00<00:00, 1019.10it/s][A
testing:  39%|███▉      | 204/520 [00:00<00:00, 1018.27it/s][A
testing:  59%|█████▉    | 306/520 [00:00<00:00, 1017.98it/s][A
testing:  79%|███████▊  | 409/520 [00:00<00:00, 1019.20it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 1017.26it/s][A

testing:   0%|          | 0/731 [00:00<?, ?it/s][A
testing:  10%|▉         | 72/731 [00:00<00:00, 717.29it/s][A
testing:  24%|██▍       | 174/731 [00:00<00:00, 894.74it/s][A
testing:  38%|███▊      | 277/731 [00:00<00:00, 954.29it/s][A
testing:  52%

Lifelong PR-AUC: 0.47709673629284577, BWT: 0.23318726648307506, FWT: 0.5024503301170068





In [29]:
R_replay = evaluation_protocol(T, E, Y, RDP(epochs=10), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=58, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000008, time: 0.9s
epoch 10, training loss: 0.000014, time: 0.9s
Start Inference on the training data...



testing:   0%|          | 0/312 [00:00<?, ?it/s][A
testing:  35%|███▍      | 108/312 [00:00<00:00, 1079.02it/s][A
testing: 100%|██████████| 312/312 [00:00<00:00, 1074.37it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  21%|██        | 108/520 [00:00<00:00, 1074.56it/s][A
testing:  42%|████▏     | 216/520 [00:00<00:00, 1076.62it/s][A
testing:  62%|██████▏   | 324/520 [00:00<00:00, 1075.81it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 1078.52it/s][A

testing:   0%|          | 0/731 [00:00<?, ?it/s][A
testing:  15%|█▍        | 108/731 [00:00<00:00, 1070.95it/s][A
testing:  30%|██▉       | 217/731 [00:00<00:00, 1076.86it/s][A
testing:  44%|████▍     | 325/731 [00:00<00:00, 1078.19it/s][A
testing:  59%|█████▉    | 433/731 [00:00<00:00, 1077.80it/s][A
testing:  74%|███████▍  | 542/731 [00:00<00:00, 1077.89it/s][A
testing: 100%|██████████| 731/731 [00:00<00:00, 1078.61it/s][A

testing:   0%|          | 0/401 [00:00<?, ?it/s][A
testing:  27%|██▋   

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=58, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000013, time: 4.6s
epoch 10, training loss: 0.000015, time: 4.6s
Start Inference on the training data...



testing:   0%|          | 0/1550 [00:00<?, ?it/s][A
testing:   7%|▋         | 109/1550 [00:00<00:01, 1084.10it/s][A
testing:  14%|█▍        | 218/1550 [00:00<00:01, 1080.12it/s][A
testing:  21%|██        | 327/1550 [00:00<00:01, 1079.30it/s][A
testing:  28%|██▊       | 436/1550 [00:00<00:01, 1081.38it/s][A
testing:  35%|███▌      | 545/1550 [00:00<00:00, 1081.95it/s][A
testing:  42%|████▏     | 654/1550 [00:00<00:00, 1081.28it/s][A
testing:  49%|████▉     | 763/1550 [00:00<00:00, 1080.26it/s][A
testing:  56%|█████▋    | 872/1550 [00:00<00:00, 1080.26it/s][A
testing:  63%|██████▎   | 981/1550 [00:00<00:00, 1080.01it/s][A
testing:  70%|███████   | 1090/1550 [00:01<00:00, 1079.28it/s][A
testing:  77%|███████▋  | 1198/1550 [00:01<00:00, 1079.00it/s][A
testing:  84%|████████▍ | 1306/1550 [00:01<00:00, 1078.93it/s][A
testing:  91%|█████████ | 1414/1550 [00:01<00:00, 1073.64it/s][A
testing: 100%|██████████| 1550/1550 [00:01<00:00, 1078.17it/s][A

testing:   0%|          | 0/52

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=58, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000012, time: 2.8s
epoch 10, training loss: 0.000017, time: 1.6s
Start Inference on the training data...



testing:   0%|          | 0/470 [00:00<?, ?it/s][A
testing:  23%|██▎       | 109/470 [00:00<00:00, 1083.51it/s][A
testing:  46%|████▋     | 218/470 [00:00<00:00, 1079.51it/s][A
testing:  69%|██████▉   | 326/470 [00:00<00:00, 1078.98it/s][A
testing: 100%|██████████| 470/470 [00:00<00:00, 1077.56it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  21%|██        | 109/520 [00:00<00:00, 1083.61it/s][A
testing:  42%|████▏     | 218/520 [00:00<00:00, 1085.79it/s][A
testing:  63%|██████▎   | 327/520 [00:00<00:00, 1084.48it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 1086.74it/s][A

testing:   0%|          | 0/731 [00:00<?, ?it/s][A
testing:  15%|█▍        | 109/731 [00:00<00:00, 1080.26it/s][A
testing:  30%|██▉       | 218/731 [00:00<00:00, 1083.07it/s][A
testing:  45%|████▍     | 327/731 [00:00<00:00, 1081.93it/s][A
testing:  60%|█████▉    | 436/731 [00:00<00:00, 1084.17it/s][A
testing:  75%|███████▍  | 546/731 [00:00<00:00, 1087.18it/s][A
testing: 

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=58, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000011, time: 3.8s
epoch 10, training loss: 0.000013, time: 3.0s
Start Inference on the training data...



testing:   0%|          | 0/1046 [00:00<?, ?it/s][A
testing:  10%|█         | 109/1046 [00:00<00:00, 1084.78it/s][A
testing:  21%|██        | 218/1046 [00:00<00:01, 640.54it/s] [A
testing:  31%|███       | 326/1046 [00:00<00:00, 785.50it/s][A
testing:  42%|████▏     | 435/1046 [00:00<00:00, 881.15it/s][A
testing:  52%|█████▏    | 544/1046 [00:00<00:00, 944.95it/s][A
testing:  62%|██████▏   | 652/1046 [00:00<00:00, 986.56it/s][A
testing:  73%|███████▎  | 760/1046 [00:00<00:00, 1014.06it/s][A
testing:  83%|████████▎ | 869/1046 [00:00<00:00, 1034.26it/s][A
testing: 100%|██████████| 1046/1046 [00:01<00:00, 962.92it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  21%|██        | 108/520 [00:00<00:00, 1078.72it/s][A
testing:  42%|████▏     | 217/520 [00:00<00:00, 1083.91it/s][A
testing:  63%|██████▎   | 326/520 [00:00<00:00, 1083.63it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 1084.55it/s][A

testing:   0%|          | 0/731 [00:00<?, ?it/s][A
tes

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=58, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000013, time: 2.7s
epoch 10, training loss: 0.000018, time: 2.2s
Start Inference on the training data...



testing:   0%|          | 0/668 [00:00<?, ?it/s][A
testing:  16%|█▌        | 107/668 [00:00<00:00, 1065.19it/s][A
testing:  32%|███▏      | 215/668 [00:00<00:00, 1069.52it/s][A
testing:  48%|████▊     | 323/668 [00:00<00:00, 1070.36it/s][A
testing:  65%|██████▍   | 431/668 [00:00<00:00, 1068.60it/s][A
testing:  81%|████████  | 539/668 [00:00<00:00, 1070.14it/s][A
testing: 100%|██████████| 668/668 [00:00<00:00, 1068.59it/s][A

testing:   0%|          | 0/520 [00:00<?, ?it/s][A
testing:  21%|██        | 109/520 [00:00<00:00, 1081.25it/s][A
testing:  42%|████▏     | 218/520 [00:00<00:00, 1079.54it/s][A
testing:  63%|██████▎   | 326/520 [00:00<00:00, 1075.58it/s][A
testing: 100%|██████████| 520/520 [00:00<00:00, 1077.00it/s][A

testing:   0%|          | 0/731 [00:00<?, ?it/s][A
testing:  15%|█▍        | 107/731 [00:00<00:00, 1066.96it/s][A
testing:  29%|██▉       | 215/731 [00:00<00:00, 1070.38it/s][A
testing:  44%|████▍     | 323/731 [00:00<00:00, 1071.00it/s][A
testing: 

Average Inference Time per Evaluation: 0.0000151288 seconds
Lifelong PR-AUC: 0.3634034199662845, BWT: 0.06243627900950712, FWT: 0.3210049019414214



