# Imports

In [1]:
import numpy as np
import pandas as pd
#import matplotlib.pyplot as plt
from tqdm import tqdm
import math

from tqdm import tqdm
import time

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from copy import deepcopy

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split

from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.linear_model import SGDOneClassSVM
from sklearn.base import clone

from scipy.spatial.distance import cdist
from scipy.stats import ks_2samp
from scipy.optimize import minimize
from scipy.stats import wasserstein_distance

from sklearn.metrics import average_precision_score


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
X_train = np.load('data/x_train.npy')
y_train = np.load('data/y_train.npy')

X_test = np.load('data/x_test.npy')
y_test = np.load('data/y_test.npy')

X = np.concatenate([X_train, X_test], axis=0)
y = np.concatenate([y_train, y_test], axis=0)

y = np.where(y == 7, 0, 1)

# Setup

In [4]:
def create_phi(normal_data, c):
    """
    Concept creation function for normal data.
    Uses k-Means clustering to partition normal data into c clusters.
    
    Args:
        normal_data (numpy array): The normal data points.
        c (int): Number of desired normal concepts.
    
    Returns:
        list of numpy arrays: List of normal clusters.
    """
    kmeans = KMeans(n_clusters=c, random_state=42)
    labels = kmeans.fit_predict(normal_data)
    
    normal_concepts = [normal_data[labels == i] for i in range(c)]
    print("Finished creating normal concepts")
    
    return normal_concepts


def create_gamma(anomaly_data, c):
    """
    Concept creation function for anomaly data.
    Uses k-Means clustering to partition anomaly data into c clusters.
    
    Args:
        anomaly_data (numpy array): The anomaly data points.
        c (int): Number of desired anomaly concepts.
    
    Returns:
        list of numpy arrays: List of anomaly clusters.
    """
    kmeans = KMeans(n_clusters=c, random_state=42)
    labels = kmeans.fit_predict(anomaly_data)
    
    anomaly_concepts = [anomaly_data[labels == i] for i in range(c)]
    print("Finished creating anomaly concepts")
    
    return anomaly_concepts
    
def match_lambda(anomaly_concepts, normal_concepts):
    """
    Matches each normal concept with the closest anomaly concept.
    Uses Euclidean distance to determine the best match.
    
    Args:
        anomaly_concepts (list of numpy arrays): List of anomaly clusters.
        normal_concepts (list of numpy arrays): List of normal clusters.
    
    Returns:
        list of tuples: Pairs of (normal_concept, matched_anomaly_concept)
    """
    pairs = []
    remaining_anomalies = anomaly_concepts.copy()

    for normal_concept in normal_concepts:
        normal_centroid = np.mean(normal_concept, axis=0)
        anomaly_centroids = [np.mean(ac, axis=0) for ac in remaining_anomalies]

        distances = cdist([normal_centroid], anomaly_centroids, metric='euclidean')[0]
        closest_idx = np.argmin(distances)

        pairs.append((normal_concept, remaining_anomalies[closest_idx]))
        remaining_anomalies.pop(closest_idx)

    print("Finished matching concept pairs")
    
    return pairs

def lifelong_roc_auc(R):
    """
    Computes the Lifelong ROC-AUC metric.
    
    Args:
        R (numpy array): NxN matrix of ROC-AUC scores, where R[i, j] is the model's 
                         performance on concept j after learning concept i.
    
    Returns:
        float: Lifelong ROC-AUC score.
    """
    N = R.shape[0]
    lower_triangular_sum = np.sum(np.tril(R))
    normalization_factor = (N * (N + 1)) / 2

    return lower_triangular_sum / normalization_factor

def BWT(R):
    """
    Computes the Backward Transfer (BWT) score.
    
    Args:
        R (numpy array): NxN results matrix.
    
    Returns:
        float: BWT score.
    """
    N = R.shape[0]
    backward_transfer = 0
    count = 0

    for i in range(1, N):
        for j in range(i):
            backward_transfer += (R[i, j] - R[j, j])
            count += 1

    return backward_transfer / count if count > 0 else 0

def FWT(R):
    """
    Computes the Forward Transfer (FWT) score.
    
    Args:
        R (numpy array): NxN results matrix.
    
    Returns:
        float: FWT score.
    """
    N = R.shape[0]
    forward_transfer = 0
    count = 0

    for i in range(N):
        for j in range(i + 1, N): 
            forward_transfer += R[i, j]
            count += 1

    return forward_transfer / count if count > 0 else 0 

def kolmogorov_smirnov_test(X_old, X_new, alpha=0.05):
    """Detect concept drift using KS-test on feature distributions."""
    
    p_values = [ks_2samp(X_old[:, i], X_new[:, i]).pvalue for i in range(X_old.shape[1])]
    return np.any(np.array(p_values) < alpha)

def histogram_binning(X, bins=25):
    """Convert sample distributions into histograms."""
    
    return np.array([np.histogram(X[:, i], bins=bins, density=True)[0] for i in range(X.shape[1])]).T

def kl_divergence(P, Q):
    """Compute KL divergence between two distributions."""
    
    P, Q = np.clip(P, 1e-10, None), np.clip(Q, 1e-10, None)  # Avoid log(0)
    return np.sum(P * np.log(P / Q))

def strategic_sample_selection(X_old, X_new, top_k=100, learning_rate=0.01, num_iterations=100):
    """
    Selects representative new samples by minimizing KL divergence.
    
    Args:
        X_old (numpy.ndarray): Old memory buffer samples.
        X_new (numpy.ndarray): Incoming new samples.
        top_k (int): Number of samples to retain.
        learning_rate (float): Step size for optimization.
        num_iterations (int): Number of optimization steps.

    Returns:
        numpy.ndarray: Selected representative new samples.
    """
    
    H_old, H_new = histogram_binning(X_old), histogram_binning(X_new)
    m_n = np.random.rand(H_new.shape[0])  

    def loss_function(m_n):
        """Computes KL divergence loss for selected samples."""
        weighted_H_new = H_new * m_n[:, np.newaxis]  
        combined_H = (H_old + weighted_H_new) / 2 
        return kl_divergence(H_new, combined_H) 

    progress_bar = tqdm(total=num_iterations, desc="Optimizing Sample Selection", position=0, leave=True)

    def callback(xk):
        progress_bar.update(1)  

    result = minimize(loss_function, m_n, method="L-BFGS-B", bounds=[(0, 1)] * len(m_n), 
                      options={"maxiter": num_iterations, "ftol": 1e-10}, callback=callback)

    progress_bar.close()

    selected_indices = np.argsort(result.x)[-top_k:]

    return X_new[selected_indices] 


def update_memory_buffer(X_old, X_new_selected, memory_size=3000):
    """Updates memory buffer using strategic forgetting."""
    updated_buffer = np.vstack((X_old, X_new_selected))  

    if updated_buffer.shape[0] > memory_size:
        updated_buffer = updated_buffer[-memory_size:]

    return updated_buffer

class HierarchicalMemory:
    def __init__(self, memory_limit=5000, pyramid_factor=2, centroids_per_concept=10):
        self.memory_limit = memory_limit
        self.pyramid_factor = pyramid_factor
        self.centroids_per_concept = centroids_per_concept
        self.memory = {}  # level: [concept1, concept2, ...]

    def add_concept(self, data, level=1):
        if level not in self.memory:
            self.memory[level] = []
        self.memory[level].append(np.array(data))
        self._summarize_memory()

    def _pyramidal_allocation(self):
        levels = sorted(self.memory.keys())
        weights = np.array([1 / (self.pyramid_factor ** (lvl - 1)) for lvl in levels])
        total_weight = weights.sum()
        allocations = (weights / total_weight) * self.memory_limit
        return {lvl: int(alloc) for lvl, alloc in zip(levels, allocations)}

    def _summarize_concept(self, concept, n_samples):
        if len(concept) <= n_samples:
            return concept
        kmeans = KMeans(n_clusters=min(self.centroids_per_concept, len(concept)), random_state=42).fit(concept)
        centroids = kmeans.cluster_centers_
        distances = np.linalg.norm(concept[:, None] - centroids, axis=2)
        closest_indices = np.argmin(distances, axis=0)
        summarized = concept[closest_indices]
        return summarized

    def _summarize_memory(self):
        allocations = self._pyramidal_allocation()
        for level, concepts in self.memory.items():
            summarized_level = []
            alloc_per_concept = max(1, allocations[level] // len(concepts))
            for concept in concepts:
                summarized = self._summarize_concept(concept, alloc_per_concept)
                summarized_level.append(summarized)
            self.memory[level] = summarized_level

    def get_all_memory(self):
        all_data = []
        for level_concepts in self.memory.values():
            for concept in level_concepts:
                all_data.append(concept)
        return np.vstack(all_data) if all_data else np.empty((0,))

def scenario_design(normal_data, anomaly_data, c):
    """
    Implements Algorithm 1 to create a lifelong learning scenario.
    
    Args:
        normal_data (numpy array): The normal data points.
        anomaly_data (numpy array): The anomaly data points.
        c (int): Number of desired concepts.
    
    Returns:
        list of tuples: List of (normal_concept, anomaly_concept) pairs forming the scenario.
    """
    normal_concepts = create_phi(normal_data, c)
    anomaly_concepts = create_gamma(anomaly_data, c)
    
    scenario = match_lambda(anomaly_concepts, normal_concepts)
    
    return scenario

def evaluation_protocol(T, E, Y, model, strategy="naive", replay_buffer_size=5000, memory_size=5000, alpha=0.05):
    """
    Implements Algorithm 2: Lifelong Learning Evaluation Protocol with multiple strategies.
    
    Args:
        T (list): Sequence of N training sets.
        E (list): Sequence of N testing sets.
        Y (list): Sequence of true labels for test sets.
        model (sklearn.base.BaseEstimator): A scikit-learn-like model instance that supports `fit` and `decision_function`.
        strategy (str): Strategy for training.
        replay_buffer_size (int): Maximum size of replay buffer if applicable
        memory_size (int): Maximum memory size if applicable
        alpha (float): KS-test threshold for drift detection.

    Returns:
        numpy array: NxN results matrix R where R[i, j] is ROC-AUC of model on E[j] after learning T[i].
    """
    N = len(T)
    R = np.zeros((N, N))  

    if strategy in ["cumulative"]:
        cumulative_data = []
    
    if strategy in ["replay"]:
        replay_buffer = []

    if strategy == "SSF":
        memory_buffer = None 

    if strategy == "hierarchical":
        h_memory = HierarchicalMemory(memory_limit=memory_size, pyramid_factor=2, centroids_per_concept=10)

    for i, Ti in tqdm(enumerate(T), desc=f"Evaluating using {strategy} strategy"):
        current_model = deepcopy(model)

        # -- NAIVE --
        if strategy == "naive":
            current_model.fit(Ti)

        # -- CUMULATIVE --
        elif strategy == "cumulative":
            cumulative_data.extend(Ti.tolist())
            current_model.fit(np.array(cumulative_data)) 

        # -- REPLAY -- 
        elif strategy == "replay":
            if replay_buffer:
                combined_data = np.vstack((np.array(replay_buffer), Ti))
            else:
                combined_data = Ti

            current_model.fit(combined_data)
            replay_buffer.extend(Ti.tolist())

            if len(replay_buffer) > replay_buffer_size:
                replay_buffer = replay_buffer[-replay_buffer_size:]
        
        # -- SSF -- 
        elif strategy == "SSF":
            if memory_buffer is None:
                memory_buffer = Ti[:memory_size]  
            else:
                drift_detected = kolmogorov_smirnov_test(memory_buffer, Ti, alpha)
                if drift_detected:
                    X_new_selected = strategic_sample_selection(memory_buffer, Ti, top_k=1000)
                    memory_buffer = update_memory_buffer(memory_buffer, X_new_selected, memory_size=memory_size)
            memory_buffer = np.unique(memory_buffer, axis=0)
            current_model.fit(memory_buffer)

        # -- HIERARCHICAL --
        elif strategy == "hierarchical":

            memory_data = h_memory.get_all_memory()
            if memory_data.size == 0:
                drift_level = 1
            else:
                drift_distances = [
                    wasserstein_distance(Ti[:, d], memory_data[:, d])
                    for d in range(Ti.shape[1])
                ]
                drift_score = np.mean(drift_distances)
                print(f"drift: {drift_score}")
                
                if drift_score < 0.05:
                    drift_level = 1
                elif drift_score < 0.1:
                    drift_level = 2
                elif drift_score < 0.2:
                    drift_level = 3
                else:
                    drift_level = 4
        
            h_memory.add_concept(Ti, level=drift_level)
            summarized_memory = h_memory.get_all_memory()
            current_model.fit(summarized_memory)

        # -- Evaluation --
        for j, ((Ej_normal, Ej_anomaly), (y_normal, y_anomaly)) in enumerate(zip(E, Y)):
            test_data = np.vstack((Ej_normal, Ej_anomaly))
            test_labels = np.hstack((y_normal, y_anomaly))  
        
            scores = -current_model.decision_function(test_data)  
            R[i, j] = average_precision_score(test_labels, scores)

    return R


# Experiments

In [5]:
num_concepts = 5

X_normal = X[y == 0]  
X_anomaly = X[y == 1]

normal_concepts = create_phi(X_normal, num_concepts)
anomaly_concepts = create_gamma(X_anomaly, num_concepts)

concept_pairs = match_lambda(anomaly_concepts, normal_concepts)

T = []  
E = [] 
Y = []

for normal, anomaly in concept_pairs:

    normal_train, normal_test = train_test_split(normal, test_size=0.3, random_state=42)
    anomaly_train, anomaly_test = train_test_split(anomaly, test_size=0.3, random_state=42)  

    T.append(normal_train)
    E.append((normal_test, anomaly_test))

    y_normal_test = np.zeros(len(normal_test))
    y_anomaly_test = np.ones(len(anomaly_test))
    
    Y.append((y_normal_test, y_anomaly_test))

Finished creating normal concepts
Finished creating anomaly concepts
Finished matching concept pairs


# Eval

## LOF

In [8]:
R_hm = evaluation_protocol(T, E, Y, LocalOutlierFactor(n_neighbors=20, novelty=True), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 1it [00:01,  1.24s/it]

drift: 0.579317037267897


Evaluating using hierarchical strategy: 2it [00:01,  1.20it/s]

drift: 0.2703673128905297


Evaluating using hierarchical strategy: 3it [00:02,  1.31it/s]

drift: 0.42197186395107206


Evaluating using hierarchical strategy: 4it [00:03,  1.39it/s]

drift: 0.4691101451175654


Evaluating using hierarchical strategy: 5it [00:03,  1.33it/s]

Lifelong ROC-AUC: 0.6854783517193195, BWT: -0.16314537493495082, FWT: 0.8799652477466345





In [26]:
R_ssf = evaluation_protocol(T, E, Y, LocalOutlierFactor(n_neighbors=20, novelty=True), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")


[Aluating using SSF strategy: 0it [00:00, ?it/s]
Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 273.05it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 256.08it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 276.87it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 326.53it/s]

Evaluating using SSF strategy: 5it [00:05,  1.14s/it]

Lifelong ROC-AUC: 0.7544561432893502, BWT: -0.0039135441339650965, FWT: 0.45301001764958054





In [6]:
R_naive = evaluation_protocol(T, E, Y, LocalOutlierFactor(n_neighbors=20, novelty=True), strategy="naive")
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 5it [00:13,  2.78s/it]

Lifelong PR-AUC: 0.9476023853974299, BWT: -0.07859058798019172, FWT: 0.973512600919387





In [7]:
R_cumulative = evaluation_protocol(T, E, Y, LocalOutlierFactor(n_neighbors=20, novelty=True), strategy="cumulative")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_cumulative)}, BWT: {BWT(R_cumulative)}, FWT: {FWT(R_cumulative)}")

Evaluating using cumulative strategy: 5it [02:10, 26.08s/it]

Lifelong ROC-AUC: 0.9238354062988797, BWT: -0.004499087364359666, FWT: 0.17379610432039533





In [7]:
R_replay = evaluation_protocol(T, E, Y, LocalOutlierFactor(n_neighbors=20, novelty=True), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 5it [00:11,  2.35s/it]

Lifelong PR-AUC: 0.9742997397831817, BWT: -0.03854455640156388, FWT: 0.9873064488251835





## IF

In [9]:
R_hm = evaluation_protocol(T, E, Y, IsolationForest(n_estimators=100), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 1it [00:01,  1.83s/it]

drift: 0.579317037267897


Evaluating using hierarchical strategy: 2it [00:03,  1.98s/it]

drift: 0.2703673128905297


Evaluating using hierarchical strategy: 3it [00:05,  2.00s/it]

drift: 0.42197186395107206


Evaluating using hierarchical strategy: 4it [00:08,  2.04s/it]

drift: 0.4691101451175654


Evaluating using hierarchical strategy: 5it [00:10,  2.07s/it]

Lifelong ROC-AUC: 0.8654683649498129, BWT: 0.0951616468964753, FWT: 0.6855683452574001





In [27]:
R_ssf = evaluation_protocol(T, E, Y,  IsolationForest(n_estimators=100), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")


[Aluating using SSF strategy: 0it [00:00, ?it/s]
Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 281.14it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 253.28it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 237.31it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 244.61it/s]

Evaluating using SSF strategy: 5it [00:05,  1.06s/it]

Lifelong ROC-AUC: 0.6559285630296237, BWT: 0.06868127809581366, FWT: 0.804018261100736





In [8]:
R_naive = evaluation_protocol(T, E, Y, IsolationForest(n_estimators=100), strategy="naive")
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 5it [00:11,  2.39s/it]

Lifelong PR-AUC: 0.978930536691485, BWT: -0.029086506965707647, FWT: 0.8247305854006719





In [9]:
R_cumulative = evaluation_protocol(T, E, Y, IsolationForest(n_estimators=100), strategy="cumulative")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_cumulative)}, BWT: {BWT(R_cumulative)}, FWT: {FWT(R_cumulative)}")

Evaluating using cumulative strategy: 5it [00:07,  1.55s/it]

Lifelong ROC-AUC: 0.753897896406868, BWT: -0.0066406078135951676, FWT: 0.7719174809547474





In [9]:
R_replay = evaluation_protocol(T, E, Y, IsolationForest(n_estimators=100), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 5it [00:12,  2.44s/it]

Lifelong PR-AUC: 0.9915871069382679, BWT: -0.011070536055244495, FWT: 0.8578499751140404





## SGDOCSVM

In [10]:
R_hm = evaluation_protocol(T, E, Y, SGDOneClassSVM(), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 1it [00:00,  2.77it/s]

drift: 0.579317037267897


Evaluating using hierarchical strategy: 2it [00:00,  2.31it/s]

drift: 0.2703673128905297


Evaluating using hierarchical strategy: 3it [00:01,  2.46it/s]

drift: 0.42197186395107206


Evaluating using hierarchical strategy: 4it [00:01,  2.35it/s]

drift: 0.4691101451175654


Evaluating using hierarchical strategy: 5it [00:02,  2.38it/s]

Lifelong ROC-AUC: 0.9129554621219703, BWT: 0.0016059336227942755, FWT: 0.7984083349034685





In [28]:
R_ssf = evaluation_protocol(T, E, Y,  SGDOneClassSVM(), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")


[Aluating using SSF strategy: 0it [00:00, ?it/s]
Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 288.13it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 307.55it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 306.85it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 271.76it/s]

Evaluating using SSF strategy: 5it [00:02,  2.14it/s]

Lifelong ROC-AUC: 0.790790866239933, BWT: -0.004091332504534584, FWT: 0.5467376505389003





In [10]:
R_naive = evaluation_protocol(T, E, Y, SGDOneClassSVM(), strategy="naive")
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 5it [00:01,  4.71it/s]

Lifelong PR-AUC: 0.955329278735878, BWT: -0.0655760846465653, FWT: 0.6731887475117183





In [17]:
R_cumulative = evaluation_protocol(T, E, Y, SGDOneClassSVM(), strategy="cumulative")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_cumulative)}, BWT: {BWT(R_cumulative)}, FWT: {FWT(R_cumulative)}")

Evaluating using cumulative strategy: 5it [00:04,  1.03it/s]


Lifelong ROC-AUC: 0.597799790906325, BWT: -0.215598947769658, FWT: 0.6579349113663381


In [11]:
R_replay = evaluation_protocol(T, E, Y, SGDOneClassSVM(), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 5it [00:01,  4.15it/s]

Lifelong PR-AUC: 0.9972816657502965, BWT: 0.0005240982460674281, FWT: 0.7987464657206108





# SLAD

In [7]:
from deepod.models.tabular import SLAD

In [8]:
R_hm = evaluation_protocol(T, E, Y, SLAD(), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 36, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36]


  c /= stddev[:, None]
  c /= stddev[None, :]


epoch  1, training loss: 0.591571, time: 0.4s
epoch 10, training loss: 0.489330, time: 0.0s
epoch 20, training loss: 0.445471, time: 0.0s
epoch 30, training loss: 0.431223, time: 0.0s
epoch 40, training loss: 0.416453, time: 0.0s
epoch 50, training loss: 0.416784, time: 0.0s
epoch 60, training loss: 0.419885, time: 0.0s
epoch 70, training loss: 0.415998, time: 0.0s
epoch 80, training loss: 0.409251, time: 0.0s
epoch 90, training loss: 0.413254, time: 0.0s
epoch100, training loss: 0.408513, time: 0.0s
Start Inference on the training data...


Evaluating using hierarchical strategy: 1it [03:59, 239.13s/it]

drift: 0.579317037267897
Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 36, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36]


  c /= stddev[:, None]
  c /= stddev[None, :]


epoch  1, training loss: 0.635669, time: 0.0s
epoch 10, training loss: 0.511418, time: 0.0s
epoch 20, training loss: 0.482239, time: 0.0s
epoch 30, training loss: 0.476269, time: 0.0s
epoch 40, training loss: 0.473079, time: 0.0s
epoch 50, training loss: 0.471285, time: 0.0s
epoch 60, training loss: 0.470184, time: 0.0s
epoch 70, training loss: 0.470499, time: 0.0s
epoch 80, training loss: 0.470719, time: 0.0s
epoch 90, training loss: 0.468957, time: 0.0s
epoch100, training loss: 0.470001, time: 0.0s
Start Inference on the training data...


Evaluating using hierarchical strategy: 2it [07:50, 234.67s/it]

drift: 0.2703673128905297
Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 36, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36]


  c /= stddev[:, None]
  c /= stddev[None, :]


epoch  1, training loss: 0.711739, time: 0.0s
epoch 10, training loss: 0.605024, time: 0.0s
epoch 20, training loss: 0.581380, time: 0.0s
epoch 30, training loss: 0.570861, time: 0.0s
epoch 40, training loss: 0.568834, time: 0.0s
epoch 50, training loss: 0.567407, time: 0.0s
epoch 60, training loss: 0.569013, time: 0.0s
epoch 70, training loss: 0.564466, time: 0.0s
epoch 80, training loss: 0.564674, time: 0.0s
epoch 90, training loss: 0.568014, time: 0.0s
epoch100, training loss: 0.565782, time: 0.0s
Start Inference on the training data...


Evaluating using hierarchical strategy: 3it [11:38, 231.69s/it]

drift: 0.42197186395107206
Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 36, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36]


  c /= stddev[:, None]
  c /= stddev[None, :]


epoch  1, training loss: 0.675733, time: 0.0s
epoch 10, training loss: 0.557780, time: 0.0s
epoch 20, training loss: 0.546163, time: 0.0s
epoch 30, training loss: 0.540972, time: 0.0s
epoch 40, training loss: 0.542338, time: 0.0s
epoch 50, training loss: 0.541350, time: 0.0s
epoch 60, training loss: 0.544041, time: 0.0s
epoch 70, training loss: 0.539697, time: 0.0s
epoch 80, training loss: 0.539467, time: 0.0s
epoch 90, training loss: 0.539749, time: 0.0s
epoch100, training loss: 0.540637, time: 0.0s
Start Inference on the training data...


Evaluating using hierarchical strategy: 4it [15:33, 232.92s/it]

drift: 0.4691101451175654
Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 36, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36]


  c /= stddev[:, None]
  c /= stddev[None, :]


epoch  1, training loss: 0.708117, time: 0.0s
epoch 10, training loss: 0.590839, time: 0.2s
epoch 20, training loss: 0.580164, time: 0.0s
epoch 30, training loss: 0.578071, time: 0.0s
epoch 40, training loss: 0.577773, time: 0.0s
epoch 50, training loss: 0.576259, time: 0.0s
epoch 60, training loss: 0.575789, time: 0.0s
epoch 70, training loss: 0.576140, time: 0.0s
epoch 80, training loss: 0.577216, time: 0.0s
epoch 90, training loss: 0.573508, time: 0.0s
epoch100, training loss: 0.575931, time: 0.0s
Start Inference on the training data...


Evaluating using hierarchical strategy: 5it [19:28, 233.63s/it]

Lifelong ROC-AUC: 0.0038470990534672693, BWT: -0.0006702693392070578, FWT: 0.05892590521987697





In [7]:
R_ssf = evaluation_protocol(T, E, Y, SLAD(epochs=10), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")

Evaluating using SSF strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 36, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36]


  c /= stddev[:, None]
  c /= stddev[None, :]


epoch  1, training loss: 0.687912, time: 5.0s
epoch 10, training loss: 0.645888, time: 0.2s
Start Inference on the training data...


Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 294.46it/s]
  c /= stddev[:, None]
  c /= stddev[None, :]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 36, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36]
epoch  1, training loss: 0.641161, time: 0.2s
epoch 10, training loss: 0.590166, time: 0.2s
Start Inference on the training data...


Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 326.15it/s]
  c /= stddev[:, None]
  c /= stddev[None, :]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 36, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36]
epoch  1, training loss: 0.693941, time: 0.2s
epoch 10, training loss: 0.646333, time: 0.2s
Start Inference on the training data...


Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 336.81it/s]
  c /= stddev[:, None]
  c /= stddev[None, :]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 36, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36]
epoch  1, training loss: 0.690720, time: 0.2s
epoch 10, training loss: 0.645765, time: 0.2s
Start Inference on the training data...


Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 357.97it/s]
  c /= stddev[:, None]
  c /= stddev[None, :]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 36, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36]
epoch  1, training loss: 0.640675, time: 0.2s
epoch 10, training loss: 0.591027, time: 0.2s
Start Inference on the training data...


Evaluating using SSF strategy: 5it [18:37, 223.46s/it]

Lifelong ROC-AUC: 0.10306787593838156, BWT: 0.15012962323322607, FWT: 0.07251415630594457





In [8]:
R_naive = evaluation_protocol(T, E, Y, SLAD(epochs=10), strategy="naive")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

  c /= stddev[:, None]
  c /= stddev[None, :]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 36, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36]
epoch  1, training loss: 0.660946, time: 0.2s
epoch 10, training loss: 0.626060, time: 0.2s
Start Inference on the training data...


  c /= stddev[:, None]
  c /= stddev[None, :]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 36, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36]
epoch  1, training loss: 0.684167, time: 0.2s
epoch 10, training loss: 0.632561, time: 0.2s
Start Inference on the training data...


  c /= stddev[:, None]
  c /= stddev[None, :]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 36, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36]
epoch  1, training loss: 0.673102, time: 0.3s
epoch 10, training loss: 0.628391, time: 0.2s
Start Inference on the training data...


  c /= stddev[:, None]
  c /= stddev[None, :]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 36, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36]
epoch  1, training loss: 0.639581, time: 0.2s
epoch 10, training loss: 0.607174, time: 0.2s
Start Inference on the training data...


  c /= stddev[:, None]
  c /= stddev[None, :]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 36, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36]
epoch  1, training loss: 0.715584, time: 0.2s
epoch 10, training loss: 0.668756, time: 0.2s
Start Inference on the training data...


Evaluating using naive strategy: 5it [18:05, 217.00s/it]

Lifelong ROC-AUC: 0.3090254885781049, BWT: 0.46285084302580703, FWT: 0.13138860698144264





In [9]:
R_replay = evaluation_protocol(T, E, Y, SLAD(epochs=10), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

  c /= stddev[:, None]
  c /= stddev[None, :]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 36, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36]
epoch  1, training loss: 0.660946, time: 0.2s
epoch 10, training loss: 0.626060, time: 0.2s
Start Inference on the training data...


  c /= stddev[:, None]
  c /= stddev[None, :]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 36, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36]
epoch  1, training loss: 0.618283, time: 0.4s
epoch 10, training loss: 0.575527, time: 0.4s
Start Inference on the training data...


  c /= stddev[:, None]
  c /= stddev[None, :]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 36, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36]
epoch  1, training loss: 0.587725, time: 0.4s
epoch 10, training loss: 0.535928, time: 0.4s
Start Inference on the training data...


  c /= stddev[:, None]
  c /= stddev[None, :]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 36, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36]
epoch  1, training loss: 0.593238, time: 0.4s
epoch 10, training loss: 0.557224, time: 0.4s
Start Inference on the training data...


  c /= stddev[:, None]
  c /= stddev[None, :]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 36, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36]
epoch  1, training loss: 0.615165, time: 0.4s
epoch 10, training loss: 0.571907, time: 0.4s
Start Inference on the training data...


Evaluating using replay strategy: 5it [18:33, 222.75s/it]

Lifelong ROC-AUC: 0.10642739273539757, BWT: 0.15895803857582042, FWT: 0.07114211921512488





# ICL

In [9]:
from deepod.models.tabular import ICL

In [10]:
R_hm = evaluation_protocol(T, E, Y, ICL(epochs=100), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=34, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2, out_features=50, bias=Fa


testing: 100%|██████████| 1/1 [00:00<00:00, 525.08it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 576.70it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 585.80it/s]

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  10%|█         | 65/644 [00:00<00:00, 649.25it/s][A
testing:  20%|██        | 131/644 [00:00<00:00, 651.16it/s][A
testing:  31%|███       | 197/644 [00:00<00:00, 651.93it/s][A
testing:  41%|████      | 263/644 [00:00<00:00, 652.68it/s][A
testing:  51%|█████     | 329/644 [00:00<00:00, 653.37it/s][A
testing:  61%|██████▏   | 395/644 [00:00<00:00, 653.72it/s][A
testing:  72%|███████▏  | 461/644 [00:00<00:00, 654.81it/s][A
testing:  82%|████████▏ | 527/644 [00:00<00:00, 655.35it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 653.98it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  10%|█         | 66/644 [00:00<00:00, 654.60it/s][A
testing:  20%|██        | 132/644 [00:00<00:00, 653.56it/s][A
testing:  31%|███       | 198/64

drift: 0.579317037267897
Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=34, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2,


testing: 100%|██████████| 1/1 [00:00<00:00, 572.44it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 574.01it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 586.78it/s]

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  10%|█         | 66/644 [00:00<00:00, 651.54it/s][A
testing:  20%|██        | 132/644 [00:00<00:00, 653.28it/s][A
testing:  31%|███       | 198/644 [00:00<00:00, 655.01it/s][A
testing:  41%|████      | 264/644 [00:00<00:00, 655.10it/s][A
testing:  51%|█████     | 330/644 [00:00<00:00, 655.37it/s][A
testing:  61%|██████▏   | 396/644 [00:00<00:00, 655.64it/s][A
testing:  72%|███████▏  | 462/644 [00:00<00:00, 655.33it/s][A
testing:  82%|████████▏ | 528/644 [00:00<00:00, 655.67it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 655.31it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  10%|█         | 66/644 [00:00<00:00, 657.44it/s][A
testing:  20%|██        | 132/644 [00:00<00:00, 654.86it/s][A
testing:  31%|███       | 198/64

drift: 0.2703673128905297
Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=34, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2


testing: 100%|██████████| 1/1 [00:00<00:00, 519.16it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 529.65it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 534.51it/s]

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  10%|█         | 65/644 [00:00<00:00, 649.97it/s][A
testing:  20%|██        | 131/644 [00:00<00:00, 650.56it/s][A
testing:  31%|███       | 197/644 [00:00<00:00, 651.65it/s][A
testing:  41%|████      | 263/644 [00:00<00:00, 652.85it/s][A
testing:  51%|█████     | 329/644 [00:00<00:00, 651.11it/s][A
testing:  61%|██████▏   | 395/644 [00:00<00:00, 651.33it/s][A
testing:  72%|███████▏  | 461/644 [00:00<00:00, 652.47it/s][A
testing:  82%|████████▏ | 527/644 [00:00<00:00, 653.64it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 652.57it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  10%|█         | 66/644 [00:00<00:00, 654.38it/s][A
testing:  20%|██        | 132/644 [00:00<00:00, 654.65it/s][A
testing:  31%|███       | 198/64

drift: 0.42197186395107206
Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=34, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=


testing: 100%|██████████| 1/1 [00:00<00:00, 508.28it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 530.52it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 520.06it/s]

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  10%|█         | 65/644 [00:00<00:00, 649.74it/s][A
testing:  20%|██        | 131/644 [00:00<00:00, 650.47it/s][A
testing:  31%|███       | 197/644 [00:00<00:00, 651.97it/s][A
testing:  41%|████      | 263/644 [00:00<00:00, 652.57it/s][A
testing:  51%|█████     | 329/644 [00:00<00:00, 652.27it/s][A
testing:  61%|██████▏   | 395/644 [00:00<00:00, 652.95it/s][A
testing:  72%|███████▏  | 461/644 [00:00<00:00, 653.36it/s][A
testing:  82%|████████▏ | 527/644 [00:00<00:00, 653.41it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 652.76it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  10%|█         | 66/644 [00:00<00:00, 654.13it/s][A
testing:  20%|██        | 132/644 [00:00<00:00, 655.02it/s][A
testing:  31%|███       | 198/64

drift: 0.4691101451175654
Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=34, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2


testing: 100%|██████████| 1/1 [00:00<00:00, 498.43it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 518.52it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 520.64it/s]

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  10%|█         | 66/644 [00:00<00:00, 653.41it/s][A
testing:  20%|██        | 132/644 [00:00<00:00, 653.05it/s][A
testing:  31%|███       | 198/644 [00:00<00:00, 653.51it/s][A
testing:  41%|████      | 264/644 [00:00<00:00, 653.91it/s][A
testing:  51%|█████     | 330/644 [00:00<00:00, 653.62it/s][A
testing:  61%|██████▏   | 396/644 [00:00<00:00, 653.44it/s][A
testing:  72%|███████▏  | 462/644 [00:00<00:00, 652.41it/s][A
testing:  82%|████████▏ | 528/644 [00:00<00:00, 651.94it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 652.69it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  10%|█         | 66/644 [00:00<00:00, 655.49it/s][A
testing:  20%|██        | 132/644 [00:00<00:00, 654.63it/s][A
testing:  31%|███       | 198/64

Lifelong ROC-AUC: 0.07946647862972361, BWT: 0.03098455622652524, FWT: 0.15436658370294945





In [11]:
R_ssf = evaluation_protocol(T, E, Y, ICL(epochs=10), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")

Evaluating using SSF strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=34, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2, out_features=50, bias=Fa


testing:   0%|          | 0/79 [00:00<?, ?it/s][A
testing: 100%|██████████| 79/79 [00:00<00:00, 664.15it/s][A

testing:   0%|          | 0/79 [00:00<?, ?it/s][A
testing: 100%|██████████| 79/79 [00:00<00:00, 667.69it/s][A

testing:   0%|          | 0/79 [00:00<?, ?it/s][A
testing: 100%|██████████| 79/79 [00:00<00:00, 670.96it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  10%|█         | 67/644 [00:00<00:00, 668.28it/s][A
testing:  21%|██        | 135/644 [00:00<00:00, 669.26it/s][A
testing:  31%|███▏      | 202/644 [00:00<00:00, 669.57it/s][A
testing:  42%|████▏     | 269/644 [00:00<00:00, 669.47it/s][A
testing:  52%|█████▏    | 336/644 [00:00<00:00, 667.07it/s][A
testing:  63%|██████▎   | 403/644 [00:00<00:00, 665.33it/s][A
testing:  73%|███████▎  | 470/644 [00:00<00:00, 664.39it/s][A
testing:  83%|████████▎ | 537/644 [00:00<00:00, 664.36it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 665.86it/s][A

testing:   0%|          | 0/644 [00:00<?,

Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=34, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2, out_features=50, bias=Fa


testing:   0%|          | 0/79 [00:00<?, ?it/s][A
testing: 100%|██████████| 79/79 [00:00<00:00, 664.61it/s][A

testing:   0%|          | 0/79 [00:00<?, ?it/s][A
testing: 100%|██████████| 79/79 [00:00<00:00, 662.68it/s][A

testing:   0%|          | 0/79 [00:00<?, ?it/s][A
testing: 100%|██████████| 79/79 [00:00<00:00, 663.30it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  10%|█         | 67/644 [00:00<00:00, 662.66it/s][A
testing:  21%|██        | 134/644 [00:00<00:00, 661.54it/s][A
testing:  31%|███       | 201/644 [00:00<00:00, 661.93it/s][A
testing:  42%|████▏     | 268/644 [00:00<00:00, 662.83it/s][A
testing:  52%|█████▏    | 335/644 [00:00<00:00, 662.44it/s][A
testing:  62%|██████▏   | 402/644 [00:00<00:00, 661.68it/s][A
testing:  73%|███████▎  | 469/644 [00:00<00:00, 662.15it/s][A
testing:  83%|████████▎ | 536/644 [00:00<00:00, 662.22it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 661.44it/s][A

testing:   0%|          | 0/644 [00:00<?,

Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=34, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2, out_features=50, bias=Fa


testing:   0%|          | 0/79 [00:00<?, ?it/s][A
testing: 100%|██████████| 79/79 [00:00<00:00, 660.82it/s][A

testing:   0%|          | 0/79 [00:00<?, ?it/s][A
testing: 100%|██████████| 79/79 [00:00<00:00, 639.87it/s][A

testing:   0%|          | 0/79 [00:00<?, ?it/s][A
testing: 100%|██████████| 79/79 [00:00<00:00, 661.40it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  10%|█         | 67/644 [00:00<00:00, 662.17it/s][A
testing:  21%|██        | 134/644 [00:00<00:00, 661.06it/s][A
testing:  31%|███       | 201/644 [00:00<00:00, 661.23it/s][A
testing:  42%|████▏     | 268/644 [00:00<00:00, 661.21it/s][A
testing:  52%|█████▏    | 335/644 [00:00<00:00, 661.72it/s][A
testing:  62%|██████▏   | 402/644 [00:00<00:00, 663.06it/s][A
testing:  73%|███████▎  | 469/644 [00:00<00:00, 662.99it/s][A
testing:  83%|████████▎ | 536/644 [00:00<00:00, 664.30it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 662.45it/s][A

testing:   0%|          | 0/644 [00:00<?,

Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=34, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2, out_features=50, bias=Fa


testing:   0%|          | 0/79 [00:00<?, ?it/s][A
testing: 100%|██████████| 79/79 [00:00<00:00, 668.42it/s][A

testing:   0%|          | 0/79 [00:00<?, ?it/s][A
testing: 100%|██████████| 79/79 [00:00<00:00, 667.83it/s][A

testing:   0%|          | 0/79 [00:00<?, ?it/s][A
testing: 100%|██████████| 79/79 [00:00<00:00, 663.86it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  10%|█         | 67/644 [00:00<00:00, 665.37it/s][A
testing:  21%|██        | 134/644 [00:00<00:00, 664.05it/s][A
testing:  31%|███       | 201/644 [00:00<00:00, 665.78it/s][A
testing:  42%|████▏     | 268/644 [00:00<00:00, 665.47it/s][A
testing:  52%|█████▏    | 335/644 [00:00<00:00, 665.43it/s][A
testing:  62%|██████▏   | 402/644 [00:00<00:00, 666.95it/s][A
testing:  73%|███████▎  | 469/644 [00:00<00:00, 667.57it/s][A
testing:  83%|████████▎ | 536/644 [00:00<00:00, 667.27it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 666.19it/s][A

testing:   0%|          | 0/644 [00:00<?,

Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=34, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2, out_features=50, bias=Fa


testing:   0%|          | 0/79 [00:00<?, ?it/s][A
testing: 100%|██████████| 79/79 [00:00<00:00, 661.35it/s][A

testing:   0%|          | 0/79 [00:00<?, ?it/s][A
testing: 100%|██████████| 79/79 [00:00<00:00, 661.20it/s][A

testing:   0%|          | 0/79 [00:00<?, ?it/s][A
testing: 100%|██████████| 79/79 [00:00<00:00, 665.18it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  10%|█         | 67/644 [00:00<00:00, 663.15it/s][A
testing:  21%|██        | 134/644 [00:00<00:00, 652.57it/s][A
testing:  31%|███       | 201/644 [00:00<00:00, 657.26it/s][A
testing:  42%|████▏     | 268/644 [00:00<00:00, 660.18it/s][A
testing:  52%|█████▏    | 335/644 [00:00<00:00, 661.28it/s][A
testing:  62%|██████▏   | 402/644 [00:00<00:00, 662.26it/s][A
testing:  73%|███████▎  | 469/644 [00:00<00:00, 663.07it/s][A
testing:  83%|████████▎ | 536/644 [00:00<00:00, 664.02it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 661.27it/s][A

testing:   0%|          | 0/644 [00:00<?,

Lifelong ROC-AUC: 0.08869924947566094, BWT: 0.132206238457011, FWT: 0.042390205101964604





In [12]:
R_naive = evaluation_protocol(T, E, Y, ICL(epochs=10), strategy="naive")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=34, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2, out_features=50, bias=Fa


testing:   0%|          | 0/107 [00:00<?, ?it/s][A
testing: 100%|██████████| 107/107 [00:00<00:00, 664.26it/s][A

testing:   0%|          | 0/107 [00:00<?, ?it/s][A
testing: 100%|██████████| 107/107 [00:00<00:00, 658.54it/s][A

testing:   0%|          | 0/107 [00:00<?, ?it/s][A
testing: 100%|██████████| 107/107 [00:00<00:00, 669.63it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  10%|█         | 67/644 [00:00<00:00, 665.42it/s][A
testing:  21%|██        | 134/644 [00:00<00:00, 663.97it/s][A
testing:  31%|███       | 201/644 [00:00<00:00, 655.27it/s][A
testing:  42%|████▏     | 268/644 [00:00<00:00, 658.94it/s][A
testing:  52%|█████▏    | 335/644 [00:00<00:00, 660.59it/s][A
testing:  62%|██████▏   | 402/644 [00:00<00:00, 662.35it/s][A
testing:  73%|███████▎  | 469/644 [00:00<00:00, 662.98it/s][A
testing:  83%|████████▎ | 536/644 [00:00<00:00, 663.58it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 660.32it/s][A

testing:   0%|          | 0/644 [00

Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=34, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2, out_features=50, bias=Fa


testing:   0%|          | 0/114 [00:00<?, ?it/s][A
testing: 100%|██████████| 114/114 [00:00<00:00, 646.39it/s][A

testing:   0%|          | 0/114 [00:00<?, ?it/s][A
testing: 100%|██████████| 114/114 [00:00<00:00, 646.53it/s][A

testing:   0%|          | 0/114 [00:00<?, ?it/s][A
testing: 100%|██████████| 114/114 [00:00<00:00, 650.98it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  10%|█         | 65/644 [00:00<00:00, 642.78it/s][A
testing:  20%|██        | 130/644 [00:00<00:00, 642.82it/s][A
testing:  30%|███       | 196/644 [00:00<00:00, 649.51it/s][A
testing:  41%|████      | 263/644 [00:00<00:00, 655.59it/s][A
testing:  51%|█████     | 330/644 [00:00<00:00, 659.31it/s][A
testing:  61%|██████▏   | 396/644 [00:00<00:00, 656.01it/s][A
testing:  72%|███████▏  | 463/644 [00:00<00:00, 658.28it/s][A
testing:  82%|████████▏ | 530/644 [00:00<00:00, 660.61it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 657.91it/s][A

testing:   0%|          | 0/644 [00

Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=34, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2, out_features=50, bias=Fa


testing:   0%|          | 0/107 [00:00<?, ?it/s][A
testing: 100%|██████████| 107/107 [00:00<00:00, 664.74it/s][A

testing:   0%|          | 0/107 [00:00<?, ?it/s][A
testing: 100%|██████████| 107/107 [00:00<00:00, 668.90it/s][A

testing:   0%|          | 0/107 [00:00<?, ?it/s][A
testing: 100%|██████████| 107/107 [00:00<00:00, 665.00it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  10%|█         | 67/644 [00:00<00:00, 664.06it/s][A
testing:  21%|██        | 134/644 [00:00<00:00, 664.18it/s][A
testing:  31%|███       | 201/644 [00:00<00:00, 664.84it/s][A
testing:  42%|████▏     | 268/644 [00:00<00:00, 663.83it/s][A
testing:  52%|█████▏    | 335/644 [00:00<00:00, 664.26it/s][A
testing:  62%|██████▏   | 402/644 [00:00<00:00, 664.30it/s][A
testing:  73%|███████▎  | 469/644 [00:00<00:00, 652.86it/s][A
testing:  83%|████████▎ | 536/644 [00:00<00:00, 657.73it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 661.39it/s][A

testing:   0%|          | 0/644 [00

Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=34, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2, out_features=50, bias=Fa


testing:   0%|          | 0/106 [00:00<?, ?it/s][A
testing: 100%|██████████| 106/106 [00:00<00:00, 644.72it/s][A

testing:   0%|          | 0/106 [00:00<?, ?it/s][A
testing: 100%|██████████| 106/106 [00:00<00:00, 663.64it/s][A

testing:   0%|          | 0/106 [00:00<?, ?it/s][A
testing: 100%|██████████| 106/106 [00:00<00:00, 665.96it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  10%|█         | 67/644 [00:00<00:00, 663.11it/s][A
testing:  21%|██        | 134/644 [00:00<00:00, 663.91it/s][A
testing:  31%|███       | 201/644 [00:00<00:00, 662.94it/s][A
testing:  42%|████▏     | 268/644 [00:00<00:00, 663.91it/s][A
testing:  52%|█████▏    | 335/644 [00:00<00:00, 652.62it/s][A
testing:  62%|██████▏   | 402/644 [00:00<00:00, 656.50it/s][A
testing:  73%|███████▎  | 469/644 [00:00<00:00, 657.74it/s][A
testing:  83%|████████▎ | 536/644 [00:00<00:00, 659.44it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 660.30it/s][A

testing:   0%|          | 0/644 [00

Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=34, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2, out_features=50, bias=Fa


testing:   0%|          | 0/116 [00:00<?, ?it/s][A
testing: 100%|██████████| 116/116 [00:00<00:00, 666.81it/s][A

testing:   0%|          | 0/116 [00:00<?, ?it/s][A
testing: 100%|██████████| 116/116 [00:00<00:00, 667.79it/s][A

testing:   0%|          | 0/116 [00:00<?, ?it/s][A
testing: 100%|██████████| 116/116 [00:00<00:00, 666.92it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  10%|█         | 67/644 [00:00<00:00, 664.28it/s][A
testing:  21%|██        | 134/644 [00:00<00:00, 664.28it/s][A
testing:  31%|███       | 201/644 [00:00<00:00, 664.91it/s][A
testing:  42%|████▏     | 268/644 [00:00<00:00, 665.90it/s][A
testing:  52%|█████▏    | 335/644 [00:00<00:00, 664.66it/s][A
testing:  62%|██████▏   | 402/644 [00:00<00:00, 665.31it/s][A
testing:  73%|███████▎  | 469/644 [00:00<00:00, 665.85it/s][A
testing:  83%|████████▎ | 536/644 [00:00<00:00, 665.61it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 664.80it/s][A

testing:   0%|          | 0/644 [00

Lifelong ROC-AUC: 0.31813958431394085, BWT: 0.47604237970763624, FWT: 0.1896800757350226





In [13]:
R_replay = evaluation_protocol(T, E, Y, ICL(epochs=10), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=34, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2, out_features=50, bias=Fa


testing:   0%|          | 0/107 [00:00<?, ?it/s][A
testing: 100%|██████████| 107/107 [00:00<00:00, 664.72it/s][A

testing:   0%|          | 0/107 [00:00<?, ?it/s][A
testing: 100%|██████████| 107/107 [00:00<00:00, 665.73it/s][A

testing:   0%|          | 0/107 [00:00<?, ?it/s][A
testing: 100%|██████████| 107/107 [00:00<00:00, 666.16it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  10%|█         | 67/644 [00:00<00:00, 661.10it/s][A
testing:  21%|██        | 134/644 [00:00<00:00, 660.79it/s][A
testing:  31%|███       | 201/644 [00:00<00:00, 661.91it/s][A
testing:  42%|████▏     | 268/644 [00:00<00:00, 662.13it/s][A
testing:  52%|█████▏    | 335/644 [00:00<00:00, 662.32it/s][A
testing:  62%|██████▏   | 402/644 [00:00<00:00, 663.43it/s][A
testing:  73%|███████▎  | 469/644 [00:00<00:00, 664.41it/s][A
testing:  83%|████████▎ | 536/644 [00:00<00:00, 664.64it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 663.12it/s][A

testing:   0%|          | 0/644 [00

Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=34, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2, out_features=50, bias=Fa


testing:   0%|          | 0/192 [00:00<?, ?it/s][A
testing:  35%|███▍      | 67/192 [00:00<00:00, 663.17it/s][A
testing: 100%|██████████| 192/192 [00:00<00:00, 661.77it/s][A

testing:   0%|          | 0/192 [00:00<?, ?it/s][A
testing:  35%|███▍      | 67/192 [00:00<00:00, 666.94it/s][A
testing: 100%|██████████| 192/192 [00:00<00:00, 664.09it/s][A

testing:   0%|          | 0/192 [00:00<?, ?it/s][A
testing:  35%|███▍      | 67/192 [00:00<00:00, 663.92it/s][A
testing: 100%|██████████| 192/192 [00:00<00:00, 661.68it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  10%|█         | 66/644 [00:00<00:00, 659.05it/s][A
testing:  20%|██        | 132/644 [00:00<00:00, 656.70it/s][A
testing:  31%|███       | 199/644 [00:00<00:00, 658.49it/s][A
testing:  41%|████▏     | 266/644 [00:00<00:00, 659.34it/s][A
testing:  52%|█████▏    | 333/644 [00:00<00:00, 660.40it/s][A
testing:  62%|██████▏   | 400/644 [00:00<00:00, 660.24it/s][A
testing:  73%|███████▎  | 467/644 [0

Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=34, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2, out_features=50, bias=Fa


testing:   0%|          | 0/185 [00:00<?, ?it/s][A
testing:  37%|███▋      | 68/185 [00:00<00:00, 670.40it/s][A
testing: 100%|██████████| 185/185 [00:00<00:00, 665.39it/s][A

testing:   0%|          | 0/185 [00:00<?, ?it/s][A
testing:  36%|███▌      | 67/185 [00:00<00:00, 666.94it/s][A
testing: 100%|██████████| 185/185 [00:00<00:00, 663.48it/s][A

testing:   0%|          | 0/185 [00:00<?, ?it/s][A
testing:  36%|███▌      | 67/185 [00:00<00:00, 667.24it/s][A
testing: 100%|██████████| 185/185 [00:00<00:00, 666.51it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  10%|█         | 67/644 [00:00<00:00, 666.15it/s][A
testing:  21%|██        | 134/644 [00:00<00:00, 664.85it/s][A
testing:  31%|███       | 201/644 [00:00<00:00, 666.18it/s][A
testing:  42%|████▏     | 268/644 [00:00<00:00, 663.99it/s][A
testing:  52%|█████▏    | 335/644 [00:00<00:00, 664.96it/s][A
testing:  62%|██████▏   | 402/644 [00:00<00:00, 665.40it/s][A
testing:  73%|███████▎  | 469/644 [0

Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=34, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2, out_features=50, bias=Fa


testing:   0%|          | 0/185 [00:00<?, ?it/s][A
testing:  36%|███▌      | 67/185 [00:00<00:00, 669.83it/s][A
testing: 100%|██████████| 185/185 [00:00<00:00, 668.84it/s][A

testing:   0%|          | 0/185 [00:00<?, ?it/s][A
testing:  37%|███▋      | 68/185 [00:00<00:00, 671.45it/s][A
testing: 100%|██████████| 185/185 [00:00<00:00, 669.50it/s][A

testing:   0%|          | 0/185 [00:00<?, ?it/s][A
testing:  36%|███▌      | 67/185 [00:00<00:00, 666.78it/s][A
testing: 100%|██████████| 185/185 [00:00<00:00, 664.66it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  10%|█         | 67/644 [00:00<00:00, 663.36it/s][A
testing:  21%|██        | 134/644 [00:00<00:00, 663.41it/s][A
testing:  31%|███       | 201/644 [00:00<00:00, 664.28it/s][A
testing:  42%|████▏     | 268/644 [00:00<00:00, 663.27it/s][A
testing:  52%|█████▏    | 335/644 [00:00<00:00, 663.71it/s][A
testing:  62%|██████▏   | 402/644 [00:00<00:00, 664.40it/s][A
testing:  73%|███████▎  | 469/644 [0

Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=34, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(35, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2, out_features=50, bias=Fa


testing:   0%|          | 0/194 [00:00<?, ?it/s][A
testing:  35%|███▌      | 68/194 [00:00<00:00, 669.46it/s][A
testing: 100%|██████████| 194/194 [00:00<00:00, 666.09it/s][A

testing:   0%|          | 0/194 [00:00<?, ?it/s][A
testing:  35%|███▍      | 67/194 [00:00<00:00, 668.65it/s][A
testing: 100%|██████████| 194/194 [00:00<00:00, 666.27it/s][A

testing:   0%|          | 0/194 [00:00<?, ?it/s][A
testing:  35%|███▍      | 67/194 [00:00<00:00, 669.21it/s][A
testing: 100%|██████████| 194/194 [00:00<00:00, 666.62it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  10%|█         | 67/644 [00:00<00:00, 666.76it/s][A
testing:  21%|██        | 134/644 [00:00<00:00, 667.97it/s][A
testing:  31%|███       | 201/644 [00:00<00:00, 666.76it/s][A
testing:  42%|████▏     | 268/644 [00:00<00:00, 665.63it/s][A
testing:  52%|█████▏    | 335/644 [00:00<00:00, 663.52it/s][A
testing:  62%|██████▏   | 402/644 [00:00<00:00, 664.77it/s][A
testing:  73%|███████▎  | 469/644 [0

Lifelong ROC-AUC: 0.06834745891953305, BWT: 0.10172968122632724, FWT: 0.14286469749813824





# RCA

In [11]:
from deepod.models.tabular import RCA

In [12]:
R_hm = evaluation_protocol(T, E, Y, RCA(epochs=10), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


100%|██████████| 10/10 [00:00<00:00, 1110.02it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.80it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.80it/s][A
 30%|███       | 3/10 [00:01<00:03,  1.80it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.80it/s][A
 50%|█████     | 5/10 [00:02<00:02,  1.79it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.80it/s][A
 70%|███████   | 7/10 [00:03<00:01,  1.80it/s][A
 80%|████████  | 8/10 [00:04<00:01,  1.79it/s][A
 90%|█████████ | 9/10 [00:05<00:00,  1.79it/s][A
100%|██████████| 10/10 [00:05<00:00,  1.79it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:06<00:56,  6.32s/it][A
 20%|██        | 2/10 [00:12<00:50,  6.33s/it][A
 30%|███       | 3/10 [00:19<00:44,  6.37s/it][A
 40%|████      | 4/10 [00:25<00:38,  6.34s/it][A
 50%|█████     | 5/10 [00:31<00:31,  6.32s/it][A
 60%|██████    | 6/10 [00:37<00:25,  6.31s/it][A
 70%|███████   | 7/10 [00:44<00:18,  6.31s/it][A
 80%|███████

drift: 0.579317037267897
Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, o


100%|██████████| 10/10 [00:00<00:00, 1078.56it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.80it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.79it/s][A
 30%|███       | 3/10 [00:01<00:03,  1.79it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.79it/s][A
 50%|█████     | 5/10 [00:02<00:02,  1.79it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.79it/s][A
 70%|███████   | 7/10 [00:03<00:01,  1.79it/s][A
 80%|████████  | 8/10 [00:04<00:01,  1.79it/s][A
 90%|█████████ | 9/10 [00:05<00:00,  1.79it/s][A
100%|██████████| 10/10 [00:05<00:00,  1.79it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:06<00:56,  6.27s/it][A
 20%|██        | 2/10 [00:12<00:50,  6.28s/it][A
 30%|███       | 3/10 [00:18<00:44,  6.34s/it][A
 40%|████      | 4/10 [00:25<00:37,  6.32s/it][A
 50%|█████     | 5/10 [00:31<00:31,  6.30s/it][A
 60%|██████    | 6/10 [00:37<00:25,  6.29s/it][A
 70%|███████   | 7/10 [00:44<00:18,  6.29s/it][A
 80%|███████

drift: 0.2703673128905297
Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, 


100%|██████████| 10/10 [00:00<00:00, 1039.22it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.79it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.79it/s][A
 30%|███       | 3/10 [00:01<00:03,  1.79it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.79it/s][A
 50%|█████     | 5/10 [00:02<00:02,  1.79it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.79it/s][A
 70%|███████   | 7/10 [00:03<00:01,  1.79it/s][A
 80%|████████  | 8/10 [00:04<00:01,  1.79it/s][A
 90%|█████████ | 9/10 [00:05<00:00,  1.79it/s][A
100%|██████████| 10/10 [00:05<00:00,  1.79it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:06<00:56,  6.27s/it][A
 20%|██        | 2/10 [00:12<00:50,  6.29s/it][A
 30%|███       | 3/10 [00:18<00:44,  6.31s/it][A
 40%|████      | 4/10 [00:25<00:37,  6.30s/it][A
 50%|█████     | 5/10 [00:31<00:31,  6.30s/it][A
 60%|██████    | 6/10 [00:37<00:25,  6.33s/it][A
 70%|███████   | 7/10 [00:44<00:18,  6.32s/it][A
 80%|███████

drift: 0.42197186395107206
Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100,


100%|██████████| 10/10 [00:00<00:00, 1015.59it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.79it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.79it/s][A
 30%|███       | 3/10 [00:01<00:03,  1.79it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.79it/s][A
 50%|█████     | 5/10 [00:02<00:02,  1.79it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.79it/s][A
 70%|███████   | 7/10 [00:03<00:01,  1.79it/s][A
 80%|████████  | 8/10 [00:04<00:01,  1.79it/s][A
 90%|█████████ | 9/10 [00:05<00:00,  1.79it/s][A
100%|██████████| 10/10 [00:05<00:00,  1.79it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:06<00:56,  6.29s/it][A
 20%|██        | 2/10 [00:12<00:50,  6.30s/it][A
 30%|███       | 3/10 [00:18<00:44,  6.30s/it][A
 40%|████      | 4/10 [00:25<00:37,  6.29s/it][A
 50%|█████     | 5/10 [00:31<00:31,  6.31s/it][A
 60%|██████    | 6/10 [00:37<00:25,  6.30s/it][A
 70%|███████   | 7/10 [00:44<00:18,  6.29s/it][A
 80%|███████

drift: 0.4691101451175654
Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, 


100%|██████████| 10/10 [00:00<00:00, 983.63it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.79it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.79it/s][A
 30%|███       | 3/10 [00:01<00:04,  1.67it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.72it/s][A
 50%|█████     | 5/10 [00:02<00:02,  1.74it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.76it/s][A
 70%|███████   | 7/10 [00:03<00:01,  1.77it/s][A
 80%|████████  | 8/10 [00:04<00:01,  1.78it/s][A
 90%|█████████ | 9/10 [00:05<00:00,  1.78it/s][A
100%|██████████| 10/10 [00:05<00:00,  1.76it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:06<00:56,  6.29s/it][A
 20%|██        | 2/10 [00:12<00:50,  6.29s/it][A
 30%|███       | 3/10 [00:18<00:44,  6.30s/it][A
 40%|████      | 4/10 [00:25<00:37,  6.30s/it][A
 50%|█████     | 5/10 [00:31<00:31,  6.30s/it][A
 60%|██████    | 6/10 [00:37<00:25,  6.34s/it][A
 70%|███████   | 7/10 [00:44<00:18,  6.32s/it][A
 80%|████████

Lifelong ROC-AUC: 0.7081630926934802, BWT: -0.008586760559425023, FWT: 0.38781532834559374





In [15]:
R_ssf = evaluation_protocol(T, E, Y, RCA(epochs=10), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")

Evaluating using SSF strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:00<00:00, 14.79it/s][A
 40%|████      | 4/10 [00:00<00:00, 14.73it/s][A
 60%|██████    | 6/10 [00:00<00:00, 14.72it/s][A
 80%|████████  | 8/10 [00:00<00:00, 14.70it/s][A
100%|██████████| 10/10 [00:00<00:00, 14.68it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.80it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.79it/s][A
 30%|███       | 3/10 [00:01<00:03,  1.79it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.79it/s][A
 50%|█████     | 5/10 [00:02<00:02,  1.79it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.79it/s][A
 70%|███████   | 7/10 [00:03<00:01,  1.79it/s][A
 80%|████████  | 8/10 [00:04<00:01,  1.79it/s][A
 90%|█████████ | 9/10 [00:05<00:00,  1.79it/s][A
100%|██████████| 10/10 [00:05<00:00,  1.79it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:06<00:57,  6.36s/it][A
 20%|██        | 2/10 [00:12<00:50,  6.35s/it][A
 30%|███       | 3/

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:00<00:00, 14.44it/s][A
 40%|████      | 4/10 [00:00<00:00, 14.19it/s][A
 60%|██████    | 6/10 [00:00<00:00, 14.22it/s][A
 80%|████████  | 8/10 [00:00<00:00, 14.20it/s][A
100%|██████████| 10/10 [00:00<00:00, 14.20it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.76it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.75it/s][A
 30%|███       | 3/10 [00:01<00:03,  1.76it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.77it/s][A
 50%|█████     | 5/10 [00:02<00:02,  1.77it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.77it/s][A
 70%|███████   | 7/10 [00:03<00:01,  1.77it/s][A
 80%|████████  | 8/10 [00:04<00:01,  1.77it/s][A
 90%|█████████ | 9/10 [00:05<00:00,  1.77it/s][A
100%|██████████| 10/10 [00:05<00:00,  1.77it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:06<00:57,  6.34s/it][A
 20%|██        | 2/10 [00:12<00:50,  6.35s/it][A
 30%|███       | 3/

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:00<00:00, 14.26it/s][A
 40%|████      | 4/10 [00:00<00:00, 14.16it/s][A
 60%|██████    | 6/10 [00:00<00:00, 14.08it/s][A
 80%|████████  | 8/10 [00:00<00:00, 13.99it/s][A
100%|██████████| 10/10 [00:00<00:00, 14.07it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.78it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.78it/s][A
 30%|███       | 3/10 [00:01<00:03,  1.78it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.78it/s][A
 50%|█████     | 5/10 [00:02<00:02,  1.78it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.78it/s][A
 70%|███████   | 7/10 [00:03<00:01,  1.78it/s][A
 80%|████████  | 8/10 [00:04<00:01,  1.78it/s][A
 90%|█████████ | 9/10 [00:05<00:00,  1.78it/s][A
100%|██████████| 10/10 [00:05<00:00,  1.78it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:06<00:56,  6.28s/it][A
 20%|██        | 2/10 [00:12<00:50,  6.28s/it][A
 30%|███       | 3/

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:00<00:00, 14.49it/s][A
 40%|████      | 4/10 [00:00<00:00, 14.41it/s][A
 60%|██████    | 6/10 [00:00<00:00, 14.41it/s][A
 80%|████████  | 8/10 [00:00<00:00, 14.40it/s][A
100%|██████████| 10/10 [00:00<00:00, 14.37it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.77it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.77it/s][A
 30%|███       | 3/10 [00:01<00:03,  1.78it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.77it/s][A
 50%|█████     | 5/10 [00:02<00:02,  1.75it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.74it/s][A
 70%|███████   | 7/10 [00:03<00:01,  1.75it/s][A
 80%|████████  | 8/10 [00:04<00:01,  1.76it/s][A
 90%|█████████ | 9/10 [00:05<00:00,  1.76it/s][A
100%|██████████| 10/10 [00:05<00:00,  1.76it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:06<00:57,  6.34s/it][A
 20%|██        | 2/10 [00:12<00:50,  6.33s/it][A
 30%|███       | 3/

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:00<00:00, 14.59it/s][A
 40%|████      | 4/10 [00:00<00:00, 14.53it/s][A
 60%|██████    | 6/10 [00:00<00:00, 14.56it/s][A
 80%|████████  | 8/10 [00:00<00:00, 14.59it/s][A
100%|██████████| 10/10 [00:00<00:00, 14.48it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.78it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.78it/s][A
 30%|███       | 3/10 [00:01<00:03,  1.78it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.78it/s][A
 50%|█████     | 5/10 [00:02<00:02,  1.78it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.78it/s][A
 70%|███████   | 7/10 [00:03<00:01,  1.77it/s][A
 80%|████████  | 8/10 [00:04<00:01,  1.77it/s][A
 90%|█████████ | 9/10 [00:05<00:00,  1.77it/s][A
100%|██████████| 10/10 [00:05<00:00,  1.78it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:06<00:56,  6.31s/it][A
 20%|██        | 2/10 [00:12<00:50,  6.31s/it][A
 30%|███       | 3/

Lifelong ROC-AUC: 0.1649190346519576, BWT: 0.023446627474956922, FWT: 0.06733171937070788





In [16]:
R_naive = evaluation_protocol(T, E, Y, RCA(epochs=10), strategy="naive")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:00<00:00, 10.78it/s][A
 40%|████      | 4/10 [00:00<00:00, 10.71it/s][A
 60%|██████    | 6/10 [00:00<00:00, 10.69it/s][A
 80%|████████  | 8/10 [00:00<00:00, 10.69it/s][A
100%|██████████| 10/10 [00:00<00:00, 10.69it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.79it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.79it/s][A
 30%|███       | 3/10 [00:01<00:03,  1.79it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.79it/s][A
 50%|█████     | 5/10 [00:02<00:02,  1.78it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.78it/s][A
 70%|███████   | 7/10 [00:03<00:01,  1.78it/s][A
 80%|████████  | 8/10 [00:04<00:01,  1.78it/s][A
 90%|█████████ | 9/10 [00:05<00:00,  1.78it/s][A
100%|██████████| 10/10 [00:05<00:00,  1.78it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:06<00:56,  6.33s/it][A
 20%|██        | 2/10 [00:12<00:50,  6.32s/it][A
 30%|███       | 3/

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:00, 10.00it/s][A
 20%|██        | 2/10 [00:00<00:00,  9.94it/s][A
 30%|███       | 3/10 [00:00<00:00,  9.94it/s][A
 40%|████      | 4/10 [00:00<00:00,  9.89it/s][A
 60%|██████    | 6/10 [00:00<00:00,  9.95it/s][A
 80%|████████  | 8/10 [00:00<00:00, 10.01it/s][A
100%|██████████| 10/10 [00:01<00:00, 10.00it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:04,  1.80it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.80it/s][A
 30%|███       | 3/10 [00:01<00:03,  1.79it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.79it/s][A
 50%|█████     | 5/10 [00:02<00:02,  1.78it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.78it/s][A
 70%|███████   | 7/10 [00:03<00:01,  1.78it/s][A
 80%|████████  | 8/10 [00:04<00:01,  1.77it/s][A
 90%|█████████ | 9/10 [00:05<00:00,  1.77it/s][A
100%|██████████| 10/10 [00:05<00:00,  1.78it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:00<00:00, 10.85it/s][A
 40%|████      | 4/10 [00:00<00:00, 10.69it/s][A
 60%|██████    | 6/10 [00:00<00:00, 10.61it/s][A
 80%|████████  | 8/10 [00:00<00:00, 10.60it/s][A
100%|██████████| 10/10 [00:00<00:00, 10.60it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.78it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.77it/s][A
 30%|███       | 3/10 [00:01<00:03,  1.77it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.77it/s][A
 50%|█████     | 5/10 [00:02<00:02,  1.77it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.77it/s][A
 70%|███████   | 7/10 [00:03<00:01,  1.77it/s][A
 80%|████████  | 8/10 [00:04<00:01,  1.77it/s][A
 90%|█████████ | 9/10 [00:05<00:00,  1.77it/s][A
100%|██████████| 10/10 [00:05<00:00,  1.77it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:06<00:56,  6.32s/it][A
 20%|██        | 2/10 [00:12<00:50,  6.32s/it][A
 30%|███       | 3/

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:00<00:00, 10.68it/s][A
 40%|████      | 4/10 [00:00<00:00, 10.66it/s][A
 60%|██████    | 6/10 [00:00<00:00, 10.65it/s][A
 80%|████████  | 8/10 [00:00<00:00, 10.64it/s][A
100%|██████████| 10/10 [00:00<00:00, 10.64it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.78it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.77it/s][A
 30%|███       | 3/10 [00:01<00:03,  1.77it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.77it/s][A
 50%|█████     | 5/10 [00:02<00:02,  1.77it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.77it/s][A
 70%|███████   | 7/10 [00:03<00:01,  1.77it/s][A
 80%|████████  | 8/10 [00:04<00:01,  1.77it/s][A
 90%|█████████ | 9/10 [00:05<00:00,  1.77it/s][A
100%|██████████| 10/10 [00:05<00:00,  1.77it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:06<00:56,  6.28s/it][A
 20%|██        | 2/10 [00:12<00:50,  6.32s/it][A
 30%|███       | 3/

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:00,  9.94it/s][A
 20%|██        | 2/10 [00:00<00:00,  9.90it/s][A
 30%|███       | 3/10 [00:00<00:00,  9.88it/s][A
 40%|████      | 4/10 [00:00<00:00,  9.89it/s][A
 50%|█████     | 5/10 [00:00<00:00,  9.88it/s][A
 60%|██████    | 6/10 [00:00<00:00,  9.90it/s][A
 70%|███████   | 7/10 [00:00<00:00,  9.90it/s][A
 80%|████████  | 8/10 [00:00<00:00,  9.89it/s][A
 90%|█████████ | 9/10 [00:00<00:00,  9.90it/s][A
100%|██████████| 10/10 [00:01<00:00,  9.88it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.78it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.77it/s][A
 30%|███       | 3/10 [00:01<00:03,  1.77it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.77it/s][A
 50%|█████     | 5/10 [00:02<00:02,  1.77it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.77it/s][A
 70%|███████   | 7/10 [00:03<00:01,  1.77it/s][A
 80%|████████  | 8/10 [00:04<00:01,  1.77it/s][A
 90%|████████

Lifelong ROC-AUC: 0.3506331907143607, BWT: 0.516312534680504, FWT: 0.14119247468920826





In [17]:
R_replay = evaluation_protocol(T, E, Y, RCA(epochs=10), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:00<00:00, 10.80it/s][A
 40%|████      | 4/10 [00:00<00:00, 10.71it/s][A
 60%|██████    | 6/10 [00:00<00:00, 10.67it/s][A
 80%|████████  | 8/10 [00:00<00:00, 10.64it/s][A
100%|██████████| 10/10 [00:00<00:00, 10.65it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.79it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.78it/s][A
 30%|███       | 3/10 [00:01<00:03,  1.76it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.76it/s][A
 50%|█████     | 5/10 [00:02<00:02,  1.76it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.77it/s][A
 70%|███████   | 7/10 [00:03<00:01,  1.77it/s][A
 80%|████████  | 8/10 [00:04<00:01,  1.77it/s][A
 90%|█████████ | 9/10 [00:05<00:00,  1.77it/s][A
100%|██████████| 10/10 [00:05<00:00,  1.77it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:06<00:57,  6.35s/it][A
 20%|██        | 2/10 [00:12<00:50,  6.34s/it][A
 30%|███       | 3/

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  6.02it/s][A
 20%|██        | 2/10 [00:00<00:01,  5.97it/s][A
 30%|███       | 3/10 [00:00<00:01,  5.98it/s][A
 40%|████      | 4/10 [00:00<00:01,  5.97it/s][A
 50%|█████     | 5/10 [00:00<00:00,  5.97it/s][A
 60%|██████    | 6/10 [00:01<00:00,  5.97it/s][A
 70%|███████   | 7/10 [00:01<00:00,  5.97it/s][A
 80%|████████  | 8/10 [00:01<00:00,  5.97it/s][A
 90%|█████████ | 9/10 [00:01<00:00,  5.97it/s][A
100%|██████████| 10/10 [00:01<00:00,  5.97it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.78it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.78it/s][A
 30%|███       | 3/10 [00:01<00:03,  1.78it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.78it/s][A
 50%|█████     | 5/10 [00:02<00:02,  1.78it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.78it/s][A
 70%|███████   | 7/10 [00:03<00:01,  1.78it/s][A
 80%|████████  | 8/10 [00:04<00:01,  1.78it/s][A
 90%|████████

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  6.24it/s][A
 20%|██        | 2/10 [00:00<00:01,  6.19it/s][A
 30%|███       | 3/10 [00:00<00:01,  6.11it/s][A
 40%|████      | 4/10 [00:00<00:00,  6.11it/s][A
 50%|█████     | 5/10 [00:00<00:00,  6.13it/s][A
 60%|██████    | 6/10 [00:00<00:00,  6.10it/s][A
 70%|███████   | 7/10 [00:01<00:00,  6.08it/s][A
 80%|████████  | 8/10 [00:01<00:00,  6.09it/s][A
 90%|█████████ | 9/10 [00:01<00:00,  6.04it/s][A
100%|██████████| 10/10 [00:01<00:00,  6.07it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.73it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.75it/s][A
 30%|███       | 3/10 [00:01<00:03,  1.76it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.77it/s][A
 50%|█████     | 5/10 [00:02<00:02,  1.77it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.77it/s][A
 70%|███████   | 7/10 [00:03<00:01,  1.77it/s][A
 80%|████████  | 8/10 [00:04<00:01,  1.78it/s][A
 90%|████████

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  6.11it/s][A
 20%|██        | 2/10 [00:00<00:01,  6.13it/s][A
 30%|███       | 3/10 [00:00<00:01,  6.13it/s][A
 40%|████      | 4/10 [00:00<00:00,  6.11it/s][A
 50%|█████     | 5/10 [00:00<00:00,  6.12it/s][A
 60%|██████    | 6/10 [00:00<00:00,  6.13it/s][A
 70%|███████   | 7/10 [00:01<00:00,  6.14it/s][A
 80%|████████  | 8/10 [00:01<00:00,  6.15it/s][A
 90%|█████████ | 9/10 [00:01<00:00,  6.15it/s][A
100%|██████████| 10/10 [00:01<00:00,  6.13it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.78it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.78it/s][A
 30%|███       | 3/10 [00:01<00:03,  1.78it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.78it/s][A
 50%|█████     | 5/10 [00:02<00:02,  1.79it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.78it/s][A
 70%|███████   | 7/10 [00:03<00:01,  1.78it/s][A
 80%|████████  | 8/10 [00:04<00:01,  1.78it/s][A
 90%|████████

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=36, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:01,  5.95it/s][A
 20%|██        | 2/10 [00:00<00:01,  5.91it/s][A
 30%|███       | 3/10 [00:00<00:01,  5.89it/s][A
 40%|████      | 4/10 [00:00<00:01,  5.89it/s][A
 50%|█████     | 5/10 [00:00<00:00,  5.89it/s][A
 60%|██████    | 6/10 [00:01<00:00,  5.88it/s][A
 70%|███████   | 7/10 [00:01<00:00,  5.87it/s][A
 80%|████████  | 8/10 [00:01<00:00,  5.87it/s][A
 90%|█████████ | 9/10 [00:01<00:00,  5.86it/s][A
100%|██████████| 10/10 [00:01<00:00,  5.87it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:00<00:05,  1.78it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.77it/s][A
 30%|███       | 3/10 [00:01<00:03,  1.77it/s][A
 40%|████      | 4/10 [00:02<00:03,  1.77it/s][A
 50%|█████     | 5/10 [00:02<00:02,  1.78it/s][A
 60%|██████    | 6/10 [00:03<00:02,  1.77it/s][A
 70%|███████   | 7/10 [00:03<00:01,  1.77it/s][A
 80%|████████  | 8/10 [00:04<00:01,  1.77it/s][A
 90%|████████

Lifelong ROC-AUC: 0.2013217304082249, BWT: 0.28779258562474086, FWT: 0.07851763107195732





# RDP

In [13]:
from deepod.models.tabular import RDP

In [14]:
R_hm = evaluation_protocol(T, E, Y, RDP(epochs=10), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=36, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000000, time: 0.0s
epoch 10, training loss: 0.000059, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 1/1 [00:00<00:00, 798.46it/s]

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  17%|█▋        | 108/644 [00:00<00:00, 1072.45it/s][A
testing:  34%|███▎      | 217/644 [00:00<00:00, 1078.08it/s][A
testing:  51%|█████     | 326/644 [00:00<00:00, 1080.21it/s][A
testing:  68%|██████▊   | 435/644 [00:00<00:00, 1079.05it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 1080.87it/s][A

testing:   0%|          | 0/7245 [00:00<?, ?it/s][A
testing:   2%|▏         | 109/7245 [00:00<00:06, 1083.16it/s][A
testing:   3%|▎         | 218/7245 [00:00<00:06, 1080.64it/s][A
testing:   5%|▍         | 327/7245 [00:00<00:06, 1080.88it/s][A
testing:   6%|▌         | 436/7245 [00:00<00:06, 1081.76it/s][A
testing:   8%|▊         | 545/7245 [00:00<00:06, 1082.77it/s][A
testing:   9%|▉         | 654/7245 [00:00<00:06, 1082.65it/s][A
testing:  11%|█         | 763/7245 [00:00<00:05, 1083.83it/s][A
testing:  12%|█▏        | 872/7245 [00:00<00:05, 1083.94it/s]

drift: 0.579317037267897
Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=36, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000000, time: 0.0s
epoch 10, training loss: 0.000039, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 1/1 [00:00<00:00, 782.67it/s]

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  17%|█▋        | 108/644 [00:00<00:00, 1076.67it/s][A
testing:  34%|███▎      | 216/644 [00:00<00:00, 1070.73it/s][A
testing:  50%|█████     | 324/644 [00:00<00:00, 1074.72it/s][A
testing:  67%|██████▋   | 433/644 [00:00<00:00, 1076.80it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 1078.29it/s][A

testing:   0%|          | 0/7245 [00:00<?, ?it/s][A
testing:   2%|▏         | 109/7245 [00:00<00:06, 1082.73it/s][A
testing:   3%|▎         | 218/7245 [00:00<00:06, 1082.73it/s][A
testing:   5%|▍         | 327/7245 [00:00<00:06, 1082.64it/s][A
testing:   6%|▌         | 436/7245 [00:00<00:06, 1083.85it/s][A
testing:   8%|▊         | 545/7245 [00:00<00:06, 1085.46it/s][A
testing:   9%|▉         | 654/7245 [00:00<00:06, 1085.39it/s][A
testing:  11%|█         | 763/7245 [00:00<00:05, 1085.27it/s][A
testing:  12%|█▏        | 872/7245 [00:00<00:05, 1084.90it/s]

drift: 0.2703673128905297
Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=36, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000000, time: 0.0s
epoch 10, training loss: 0.000049, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 1/1 [00:00<00:00, 826.46it/s]

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  17%|█▋        | 109/644 [00:00<00:00, 1081.70it/s][A
testing:  34%|███▍      | 218/644 [00:00<00:00, 1082.82it/s][A
testing:  51%|█████     | 327/644 [00:00<00:00, 1085.57it/s][A
testing:  68%|██████▊   | 436/644 [00:00<00:00, 1085.07it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 1085.43it/s][A

testing:   0%|          | 0/7245 [00:00<?, ?it/s][A
testing:   2%|▏         | 109/7245 [00:00<00:06, 1084.61it/s][A
testing:   3%|▎         | 218/7245 [00:00<00:06, 1085.30it/s][A
testing:   5%|▍         | 327/7245 [00:00<00:06, 1086.49it/s][A
testing:   6%|▌         | 436/7245 [00:00<00:06, 1086.30it/s][A
testing:   8%|▊         | 545/7245 [00:00<00:06, 1086.49it/s][A
testing:   9%|▉         | 654/7245 [00:00<00:06, 1087.42it/s][A
testing:  11%|█         | 763/7245 [00:00<00:05, 1087.88it/s][A
testing:  12%|█▏        | 872/7245 [00:00<00:05, 1087.49it/s]

drift: 0.42197186395107206
Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=36, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000000, time: 0.0s
epoch 10, training loss: 0.000055, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 1/1 [00:00<00:00, 819.52it/s]

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  17%|█▋        | 109/644 [00:00<00:00, 1088.80it/s][A
testing:  34%|███▍      | 218/644 [00:00<00:00, 1088.40it/s][A
testing:  51%|█████     | 328/644 [00:00<00:00, 1089.07it/s][A
testing:  68%|██████▊   | 437/644 [00:00<00:00, 1088.56it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 1088.25it/s][A

testing:   0%|          | 0/7245 [00:00<?, ?it/s][A
testing:   2%|▏         | 109/7245 [00:00<00:06, 1086.90it/s][A
testing:   3%|▎         | 218/7245 [00:00<00:06, 1087.55it/s][A
testing:   5%|▍         | 327/7245 [00:00<00:06, 1087.38it/s][A
testing:   6%|▌         | 437/7245 [00:00<00:06, 1088.98it/s][A
testing:   8%|▊         | 547/7245 [00:00<00:06, 1090.06it/s][A
testing:   9%|▉         | 657/7245 [00:00<00:06, 1090.81it/s][A
testing:  11%|█         | 767/7245 [00:00<00:05, 1091.17it/s][A
testing:  12%|█▏        | 877/7245 [00:00<00:05, 1090.64it/s]

drift: 0.4691101451175654
Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=36, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000000, time: 0.0s
epoch 10, training loss: 0.000041, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 1/1 [00:00<00:00, 721.17it/s]

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  17%|█▋        | 109/644 [00:00<00:00, 1084.97it/s][A
testing:  34%|███▍      | 218/644 [00:00<00:00, 1083.10it/s][A
testing:  51%|█████     | 327/644 [00:00<00:00, 1084.60it/s][A
testing:  68%|██████▊   | 436/644 [00:00<00:00, 1085.44it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 1084.62it/s][A

testing:   0%|          | 0/7245 [00:00<?, ?it/s][A
testing:   2%|▏         | 109/7245 [00:00<00:06, 1089.76it/s][A
testing:   3%|▎         | 218/7245 [00:00<00:06, 1087.42it/s][A
testing:   5%|▍         | 327/7245 [00:00<00:06, 1085.45it/s][A
testing:   6%|▌         | 436/7245 [00:00<00:06, 1083.14it/s][A
testing:   8%|▊         | 545/7245 [00:00<00:06, 1082.27it/s][A
testing:   9%|▉         | 654/7245 [00:00<00:06, 1081.26it/s][A
testing:  11%|█         | 763/7245 [00:00<00:05, 1081.23it/s][A
testing:  12%|█▏        | 872/7245 [00:00<00:05, 1081.98it/s]

Lifelong ROC-AUC: 0.2812994822360661, BWT: 0.3341371323870483, FWT: 0.08501645310775398





In [19]:
R_ssf = evaluation_protocol(T, E, Y, RDP(epochs=10), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")

Evaluating using SSF strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=36, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000011, time: 0.2s
epoch 10, training loss: 0.000003, time: 0.2s
Start Inference on the training data...



testing: 100%|██████████| 79/79 [00:00<00:00, 1083.06it/s]

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  17%|█▋        | 109/644 [00:00<00:00, 1082.82it/s][A
testing:  34%|███▍      | 218/644 [00:00<00:00, 1084.17it/s][A
testing:  51%|█████     | 327/644 [00:00<00:00, 1084.30it/s][A
testing:  68%|██████▊   | 436/644 [00:00<00:00, 1085.32it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 1084.34it/s][A

testing:   0%|          | 0/7245 [00:00<?, ?it/s][A
testing:   2%|▏         | 109/7245 [00:00<00:06, 1082.54it/s][A
testing:   3%|▎         | 218/7245 [00:00<00:06, 1083.56it/s][A
testing:   5%|▍         | 327/7245 [00:00<00:06, 1081.74it/s][A
testing:   6%|▌         | 436/7245 [00:00<00:06, 1082.06it/s][A
testing:   8%|▊         | 545/7245 [00:00<00:06, 1083.13it/s][A
testing:   9%|▉         | 654/7245 [00:00<00:06, 1082.72it/s][A
testing:  11%|█         | 763/7245 [00:00<00:05, 1083.62it/s][A
testing:  12%|█▏        | 872/7245 [00:00<00:05, 1083.83it

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=36, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000013, time: 0.2s
epoch 10, training loss: 0.000004, time: 0.2s
Start Inference on the training data...



testing: 100%|██████████| 79/79 [00:00<00:00, 1088.24it/s]

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  17%|█▋        | 109/644 [00:00<00:00, 1083.74it/s][A
testing:  34%|███▍      | 218/644 [00:00<00:00, 1079.68it/s][A
testing:  51%|█████     | 327/644 [00:00<00:00, 1082.70it/s][A
testing:  68%|██████▊   | 436/644 [00:00<00:00, 1080.42it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 1080.92it/s][A

testing:   0%|          | 0/7245 [00:00<?, ?it/s][A
testing:   2%|▏         | 109/7245 [00:00<00:06, 1082.69it/s][A
testing:   3%|▎         | 218/7245 [00:00<00:06, 1084.98it/s][A
testing:   5%|▍         | 327/7245 [00:00<00:06, 1085.62it/s][A
testing:   6%|▌         | 436/7245 [00:00<00:06, 1085.02it/s][A
testing:   8%|▊         | 545/7245 [00:00<00:06, 1085.96it/s][A
testing:   9%|▉         | 654/7245 [00:00<00:06, 1084.96it/s][A
testing:  11%|█         | 763/7245 [00:00<00:05, 1085.80it/s][A
testing:  12%|█▏        | 872/7245 [00:00<00:05, 1086.86it

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=36, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000011, time: 0.2s
epoch 10, training loss: 0.000004, time: 0.2s
Start Inference on the training data...



testing: 100%|██████████| 79/79 [00:00<00:00, 1092.93it/s]

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  17%|█▋        | 109/644 [00:00<00:00, 1084.21it/s][A
testing:  34%|███▍      | 218/644 [00:00<00:00, 1084.23it/s][A
testing:  51%|█████     | 327/644 [00:00<00:00, 1084.25it/s][A
testing:  68%|██████▊   | 436/644 [00:00<00:00, 1084.10it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 1084.55it/s][A

testing:   0%|          | 0/7245 [00:00<?, ?it/s][A
testing:   2%|▏         | 109/7245 [00:00<00:06, 1089.14it/s][A
testing:   3%|▎         | 218/7245 [00:00<00:06, 1084.59it/s][A
testing:   5%|▍         | 327/7245 [00:00<00:06, 1083.09it/s][A
testing:   6%|▌         | 436/7245 [00:00<00:06, 1083.33it/s][A
testing:   8%|▊         | 545/7245 [00:00<00:06, 1083.62it/s][A
testing:   9%|▉         | 654/7245 [00:00<00:06, 1083.40it/s][A
testing:  11%|█         | 763/7245 [00:00<00:05, 1083.20it/s][A
testing:  12%|█▏        | 872/7245 [00:00<00:05, 1079.42it

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=36, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000017, time: 0.2s
epoch 10, training loss: 0.000003, time: 0.2s
Start Inference on the training data...



testing: 100%|██████████| 79/79 [00:00<00:00, 1090.70it/s]

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  17%|█▋        | 109/644 [00:00<00:00, 1085.81it/s][A
testing:  34%|███▍      | 218/644 [00:00<00:00, 1083.14it/s][A
testing:  51%|█████     | 327/644 [00:00<00:00, 1081.93it/s][A
testing:  68%|██████▊   | 436/644 [00:00<00:00, 1082.18it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 1084.30it/s][A

testing:   0%|          | 0/7245 [00:00<?, ?it/s][A
testing:   2%|▏         | 109/7245 [00:00<00:06, 1086.23it/s][A
testing:   3%|▎         | 218/7245 [00:00<00:06, 1088.43it/s][A
testing:   5%|▍         | 327/7245 [00:00<00:06, 1088.90it/s][A
testing:   6%|▌         | 436/7245 [00:00<00:06, 1087.45it/s][A
testing:   8%|▊         | 546/7245 [00:00<00:06, 1088.46it/s][A
testing:   9%|▉         | 656/7245 [00:00<00:06, 1089.05it/s][A
testing:  11%|█         | 766/7245 [00:00<00:05, 1090.03it/s][A
testing:  12%|█▏        | 876/7245 [00:00<00:05, 1088.46it

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=36, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000006, time: 0.2s
epoch 10, training loss: 0.000004, time: 0.2s
Start Inference on the training data...



testing: 100%|██████████| 79/79 [00:00<00:00, 1058.61it/s]

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  16%|█▋        | 106/644 [00:00<00:00, 1054.55it/s][A
testing:  33%|███▎      | 212/644 [00:00<00:00, 1055.30it/s][A
testing:  49%|████▉     | 318/644 [00:00<00:00, 1053.07it/s][A
testing:  66%|██████▌   | 424/644 [00:00<00:00, 1052.53it/s][A
testing:  82%|████████▏ | 530/644 [00:00<00:00, 1053.03it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 1050.56it/s][A

testing:   0%|          | 0/7245 [00:00<?, ?it/s][A
testing:   2%|▏         | 109/7245 [00:00<00:06, 1080.18it/s][A
testing:   3%|▎         | 218/7245 [00:00<00:06, 1066.34it/s][A
testing:   4%|▍         | 325/7245 [00:00<00:06, 1055.24it/s][A
testing:   6%|▌         | 433/7245 [00:00<00:06, 1061.41it/s][A
testing:   7%|▋         | 542/7245 [00:00<00:06, 1068.23it/s][A
testing:   9%|▉         | 650/7245 [00:00<00:06, 1071.99it/s][A
testing:  10%|█         | 759/7245 [00:00<00:06, 1075.44it/

Lifelong ROC-AUC: 0.10674468040538798, BWT: 0.15026856861789806, FWT: 0.07368081173471211





In [20]:
R_naive = evaluation_protocol(T, E, Y, RDP(epochs=10), strategy="naive")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=36, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000008, time: 0.3s
epoch 10, training loss: 0.000003, time: 0.3s
Start Inference on the training data...



testing: 100%|██████████| 107/107 [00:00<00:00, 1098.10it/s]

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  17%|█▋        | 110/644 [00:00<00:00, 1093.74it/s][A
testing:  34%|███▍      | 220/644 [00:00<00:00, 1095.93it/s][A
testing:  51%|█████     | 330/644 [00:00<00:00, 1094.69it/s][A
testing:  68%|██████▊   | 440/644 [00:00<00:00, 1094.89it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 1092.23it/s][A

testing:   0%|          | 0/7245 [00:00<?, ?it/s][A
testing:   2%|▏         | 109/7245 [00:00<00:06, 1084.33it/s][A
testing:   3%|▎         | 218/7245 [00:00<00:06, 1083.20it/s][A
testing:   5%|▍         | 327/7245 [00:00<00:06, 1082.97it/s][A
testing:   6%|▌         | 436/7245 [00:00<00:06, 1082.44it/s][A
testing:   8%|▊         | 545/7245 [00:00<00:06, 1082.48it/s][A
testing:   9%|▉         | 654/7245 [00:00<00:06, 1082.64it/s][A
testing:  11%|█         | 763/7245 [00:00<00:05, 1084.09it/s][A
testing:  12%|█▏        | 872/7245 [00:00<00:05, 1085.24

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=36, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000002, time: 0.3s
epoch 10, training loss: 0.000004, time: 0.3s
Start Inference on the training data...



testing:   0%|          | 0/114 [00:00<?, ?it/s][A
testing: 100%|██████████| 114/114 [00:00<00:00, 1028.90it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  16%|█▋        | 105/644 [00:00<00:00, 1049.14it/s][A
testing:  33%|███▎      | 211/644 [00:00<00:00, 1051.82it/s][A
testing:  49%|████▉     | 318/644 [00:00<00:00, 1059.66it/s][A
testing:  66%|██████▋   | 427/644 [00:00<00:00, 1068.82it/s][A
testing:  83%|████████▎ | 536/644 [00:00<00:00, 1073.84it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 1066.82it/s][A

testing:   0%|          | 0/7245 [00:00<?, ?it/s][A
testing:   1%|▏         | 108/7245 [00:00<00:06, 1077.48it/s][A
testing:   3%|▎         | 217/7245 [00:00<00:06, 1080.21it/s][A
testing:   4%|▍         | 326/7245 [00:00<00:06, 1080.24it/s][A
testing:   6%|▌         | 435/7245 [00:00<00:06, 1082.88it/s][A
testing:   8%|▊         | 544/7245 [00:00<00:06, 1082.16it/s][A
testing:   9%|▉         | 653/7245 [00:00<00:06, 1080.92it/s][A
te

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=36, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000018, time: 0.3s
epoch 10, training loss: 0.000009, time: 0.3s
Start Inference on the training data...



testing: 100%|██████████| 107/107 [00:00<00:00, 1091.28it/s]

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  17%|█▋        | 109/644 [00:00<00:00, 1085.99it/s][A
testing:  34%|███▍      | 218/644 [00:00<00:00, 1079.14it/s][A
testing:  51%|█████     | 327/644 [00:00<00:00, 1081.32it/s][A
testing:  68%|██████▊   | 436/644 [00:00<00:00, 1083.18it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 1083.92it/s][A

testing:   0%|          | 0/7245 [00:00<?, ?it/s][A
testing:   2%|▏         | 109/7245 [00:00<00:06, 1082.97it/s][A
testing:   3%|▎         | 218/7245 [00:00<00:06, 1077.72it/s][A
testing:   5%|▍         | 327/7245 [00:00<00:06, 1079.42it/s][A
testing:   6%|▌         | 435/7245 [00:00<00:06, 1076.80it/s][A
testing:   8%|▊         | 544/7245 [00:00<00:06, 1080.84it/s][A
testing:   9%|▉         | 653/7245 [00:00<00:06, 1080.96it/s][A
testing:  11%|█         | 762/7245 [00:00<00:05, 1082.75it/s][A
testing:  12%|█▏        | 871/7245 [00:00<00:05, 1085.00

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=36, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000033, time: 0.3s
epoch 10, training loss: 0.000002, time: 0.3s
Start Inference on the training data...



testing: 100%|██████████| 106/106 [00:00<00:00, 1093.32it/s]

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  17%|█▋        | 109/644 [00:00<00:00, 1084.35it/s][A
testing:  34%|███▍      | 218/644 [00:00<00:00, 1084.98it/s][A
testing:  51%|█████     | 328/644 [00:00<00:00, 1087.39it/s][A
testing:  68%|██████▊   | 437/644 [00:00<00:00, 1088.01it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 1088.18it/s][A

testing:   0%|          | 0/7245 [00:00<?, ?it/s][A
testing:   2%|▏         | 109/7245 [00:00<00:06, 1086.56it/s][A
testing:   3%|▎         | 218/7245 [00:00<00:06, 1087.33it/s][A
testing:   5%|▍         | 327/7245 [00:00<00:06, 1086.84it/s][A
testing:   6%|▌         | 436/7245 [00:00<00:06, 1086.08it/s][A
testing:   8%|▊         | 545/7245 [00:00<00:06, 1084.83it/s][A
testing:   9%|▉         | 654/7245 [00:00<00:06, 1085.61it/s][A
testing:  11%|█         | 763/7245 [00:00<00:05, 1086.57it/s][A
testing:  12%|█▏        | 872/7245 [00:00<00:05, 1087.53

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=36, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000003, time: 0.3s
epoch 10, training loss: 0.000006, time: 0.3s
Start Inference on the training data...



testing:   0%|          | 0/116 [00:00<?, ?it/s][A
testing: 100%|██████████| 116/116 [00:00<00:00, 1080.05it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  17%|█▋        | 108/644 [00:00<00:00, 1077.99it/s][A
testing:  34%|███▎      | 217/644 [00:00<00:00, 1080.91it/s][A
testing:  51%|█████     | 326/644 [00:00<00:00, 1083.83it/s][A
testing:  68%|██████▊   | 435/644 [00:00<00:00, 1085.53it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 1083.19it/s][A

testing:   0%|          | 0/7245 [00:00<?, ?it/s][A
testing:   2%|▏         | 109/7245 [00:00<00:06, 1088.51it/s][A
testing:   3%|▎         | 218/7245 [00:00<00:06, 1086.82it/s][A
testing:   5%|▍         | 327/7245 [00:00<00:06, 1085.94it/s][A
testing:   6%|▌         | 436/7245 [00:00<00:06, 1086.88it/s][A
testing:   8%|▊         | 545/7245 [00:00<00:06, 1087.46it/s][A
testing:   9%|▉         | 654/7245 [00:00<00:06, 1085.46it/s][A
testing:  11%|█         | 763/7245 [00:00<00:05, 1086.72it/s][A
t

Lifelong ROC-AUC: 0.37049857086067534, BWT: 0.5555126133726674, FWT: 0.15938244244744287





In [21]:
R_replay = evaluation_protocol(T, E, Y, RDP(epochs=10), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=36, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000008, time: 0.3s
epoch 10, training loss: 0.000003, time: 0.3s
Start Inference on the training data...



testing: 100%|██████████| 107/107 [00:00<00:00, 1091.72it/s]

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  17%|█▋        | 109/644 [00:00<00:00, 1083.11it/s][A
testing:  34%|███▍      | 218/644 [00:00<00:00, 1083.76it/s][A
testing:  51%|█████     | 327/644 [00:00<00:00, 1083.17it/s][A
testing:  68%|██████▊   | 436/644 [00:00<00:00, 1084.23it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 1080.11it/s][A

testing:   0%|          | 0/7245 [00:00<?, ?it/s][A
testing:   2%|▏         | 109/7245 [00:00<00:06, 1084.64it/s][A
testing:   3%|▎         | 218/7245 [00:00<00:06, 1084.36it/s][A
testing:   5%|▍         | 327/7245 [00:00<00:08, 782.43it/s] [A
testing:   6%|▌         | 434/7245 [00:00<00:07, 873.97it/s][A
testing:   7%|▋         | 542/7245 [00:00<00:07, 938.42it/s][A
testing:   9%|▉         | 650/7245 [00:00<00:06, 980.89it/s][A
testing:  10%|█         | 758/7245 [00:00<00:06, 1008.41it/s][A
testing:  12%|█▏        | 866/7245 [00:00<00:06, 1029.26it/

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=36, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000004, time: 0.5s
epoch 10, training loss: 0.000005, time: 0.5s
Start Inference on the training data...



testing:   0%|          | 0/192 [00:00<?, ?it/s][A
testing: 100%|██████████| 192/192 [00:00<00:00, 1089.21it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  17%|█▋        | 110/644 [00:00<00:00, 1090.98it/s][A
testing:  34%|███▍      | 220/644 [00:00<00:00, 1089.07it/s][A
testing:  51%|█████     | 329/644 [00:00<00:00, 1086.32it/s][A
testing:  68%|██████▊   | 438/644 [00:00<00:00, 1085.09it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 1085.86it/s][A

testing:   0%|          | 0/7245 [00:00<?, ?it/s][A
testing:   2%|▏         | 109/7245 [00:00<00:06, 1088.08it/s][A
testing:   3%|▎         | 218/7245 [00:00<00:06, 1088.30it/s][A
testing:   5%|▍         | 327/7245 [00:00<00:06, 1082.10it/s][A
testing:   6%|▌         | 436/7245 [00:00<00:06, 1082.30it/s][A
testing:   8%|▊         | 545/7245 [00:00<00:06, 1083.40it/s][A
testing:   9%|▉         | 654/7245 [00:00<00:06, 1084.84it/s][A
testing:  11%|█         | 763/7245 [00:00<00:05, 1085.25it/s][A
t

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=36, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000005, time: 0.5s
epoch 10, training loss: 0.000009, time: 0.5s
Start Inference on the training data...



testing:   0%|          | 0/185 [00:00<?, ?it/s][A
testing: 100%|██████████| 185/185 [00:00<00:00, 1085.90it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  17%|█▋        | 109/644 [00:00<00:00, 1081.73it/s][A
testing:  34%|███▍      | 218/644 [00:00<00:00, 1082.59it/s][A
testing:  51%|█████     | 327/644 [00:00<00:00, 1081.57it/s][A
testing:  68%|██████▊   | 436/644 [00:00<00:00, 1084.57it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 1080.43it/s][A

testing:   0%|          | 0/7245 [00:00<?, ?it/s][A
testing:   1%|▏         | 108/7245 [00:00<00:06, 1077.69it/s][A
testing:   3%|▎         | 217/7245 [00:00<00:06, 1079.55it/s][A
testing:   4%|▍         | 326/7245 [00:00<00:06, 1080.50it/s][A
testing:   6%|▌         | 435/7245 [00:00<00:06, 1081.70it/s][A
testing:   8%|▊         | 544/7245 [00:00<00:06, 1083.90it/s][A
testing:   9%|▉         | 653/7245 [00:00<00:06, 1084.24it/s][A
testing:  11%|█         | 762/7245 [00:00<00:05, 1084.73it/s][A
t

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=36, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000010, time: 0.5s
epoch 10, training loss: 0.000004, time: 0.5s
Start Inference on the training data...



testing:   0%|          | 0/185 [00:00<?, ?it/s][A
testing: 100%|██████████| 185/185 [00:00<00:00, 1093.00it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  17%|█▋        | 110/644 [00:00<00:00, 1092.24it/s][A
testing:  34%|███▍      | 220/644 [00:00<00:00, 1095.40it/s][A
testing:  51%|█████     | 330/644 [00:00<00:00, 1093.25it/s][A
testing:  68%|██████▊   | 440/644 [00:00<00:00, 1093.62it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 1094.21it/s][A

testing:   0%|          | 0/7245 [00:00<?, ?it/s][A
testing:   2%|▏         | 109/7245 [00:00<00:06, 1084.40it/s][A
testing:   3%|▎         | 219/7245 [00:00<00:06, 1090.65it/s][A
testing:   5%|▍         | 329/7245 [00:00<00:06, 1091.76it/s][A
testing:   6%|▌         | 439/7245 [00:00<00:06, 1089.91it/s][A
testing:   8%|▊         | 548/7245 [00:00<00:06, 1088.16it/s][A
testing:   9%|▉         | 657/7245 [00:00<00:06, 1086.12it/s][A
testing:  11%|█         | 766/7245 [00:00<00:05, 1085.66it/s][A
t

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=36, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000007, time: 0.5s
epoch 10, training loss: 0.000006, time: 0.5s
Start Inference on the training data...



testing:   0%|          | 0/194 [00:00<?, ?it/s][A
testing: 100%|██████████| 194/194 [00:00<00:00, 1072.74it/s][A

testing:   0%|          | 0/644 [00:00<?, ?it/s][A
testing:  17%|█▋        | 108/644 [00:00<00:00, 1077.00it/s][A
testing:  34%|███▎      | 216/644 [00:00<00:00, 1055.27it/s][A
testing:  50%|█████     | 323/644 [00:00<00:00, 1058.49it/s][A
testing:  67%|██████▋   | 432/644 [00:00<00:00, 1066.89it/s][A
testing: 100%|██████████| 644/644 [00:00<00:00, 1065.15it/s][A

testing:   0%|          | 0/7245 [00:00<?, ?it/s][A
testing:   1%|▏         | 107/7245 [00:00<00:06, 1068.34it/s][A
testing:   3%|▎         | 215/7245 [00:00<00:06, 1072.16it/s][A
testing:   4%|▍         | 323/7245 [00:00<00:06, 1074.72it/s][A
testing:   6%|▌         | 433/7245 [00:00<00:06, 1081.73it/s][A
testing:   7%|▋         | 542/7245 [00:00<00:06, 1080.75it/s][A
testing:   9%|▉         | 652/7245 [00:00<00:06, 1085.18it/s][A
testing:  11%|█         | 761/7245 [00:00<00:05, 1083.86it/s][A
t

Lifelong ROC-AUC: 0.12671932503281785, BWT: 0.18981846791017146, FWT: 0.07350462297981172



