# Imports

In [1]:
import numpy as np
#import pandas as pd
#import matplotlib.pyplot as plt
from tqdm import tqdm
import math

from tqdm import tqdm
import time

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

from copy import deepcopy

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split

from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.linear_model import SGDOneClassSVM
from sklearn.base import clone

from scipy.spatial.distance import cdist
from scipy.stats import ks_2samp
from scipy.optimize import minimize
from scipy.stats import wasserstein_distance

from sklearn.metrics import average_precision_score

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
X_train = np.load('data/x_train.npy')
y_train = np.load('data/y_train.npy')

X_test = np.load('data/x_test.npy')
y_test = np.load('data/y_test.npy')

X = np.concatenate([X_train, X_test], axis=0)
y = np.concatenate([y_train, y_test], axis=0)

y = np.where(y == 3, 0, 1)

# Setup

In [4]:
def create_phi(normal_data, c):
    """
    Concept creation function for normal data.
    Uses k-Means clustering to partition normal data into c clusters.
    
    Args:
        normal_data (numpy array): The normal data points.
        c (int): Number of desired normal concepts.
    
    Returns:
        list of numpy arrays: List of normal clusters.
    """
    kmeans = KMeans(n_clusters=c, random_state=42)
    labels = kmeans.fit_predict(normal_data)
    
    normal_concepts = [normal_data[labels == i] for i in range(c)]
    print("Finished creating normal concepts")
    
    return normal_concepts


def create_gamma(anomaly_data, c):
    """
    Concept creation function for anomaly data.
    Uses k-Means clustering to partition anomaly data into c clusters.
    
    Args:
        anomaly_data (numpy array): The anomaly data points.
        c (int): Number of desired anomaly concepts.
    
    Returns:
        list of numpy arrays: List of anomaly clusters.
    """
    kmeans = KMeans(n_clusters=c, random_state=42)
    labels = kmeans.fit_predict(anomaly_data)
    
    anomaly_concepts = [anomaly_data[labels == i] for i in range(c)]
    print("Finished creating anomaly concepts")
    
    return anomaly_concepts
    
def match_lambda(anomaly_concepts, normal_concepts):
    """
    Matches each normal concept with the closest anomaly concept.
    Uses Euclidean distance to determine the best match.
    
    Args:
        anomaly_concepts (list of numpy arrays): List of anomaly clusters.
        normal_concepts (list of numpy arrays): List of normal clusters.
    
    Returns:
        list of tuples: Pairs of (normal_concept, matched_anomaly_concept)
    """
    pairs = []
    remaining_anomalies = anomaly_concepts.copy()

    for normal_concept in normal_concepts:
        normal_centroid = np.mean(normal_concept, axis=0)
        anomaly_centroids = [np.mean(ac, axis=0) for ac in remaining_anomalies]

        distances = cdist([normal_centroid], anomaly_centroids, metric='euclidean')[0]
        closest_idx = np.argmin(distances)

        pairs.append((normal_concept, remaining_anomalies[closest_idx]))
        remaining_anomalies.pop(closest_idx)

    print("Finished matching concept pairs")
    
    return pairs

def lifelong_roc_auc(R):
    """
    Computes the Lifelong ROC-AUC metric.
    
    Args:
        R (numpy array): NxN matrix of ROC-AUC scores, where R[i, j] is the model's 
                         performance on concept j after learning concept i.
    
    Returns:
        float: Lifelong ROC-AUC score.
    """
    N = R.shape[0]
    lower_triangular_sum = np.sum(np.tril(R))
    normalization_factor = (N * (N + 1)) / 2

    return lower_triangular_sum / normalization_factor

def BWT(R):
    """
    Computes the Backward Transfer (BWT) score.
    
    Args:
        R (numpy array): NxN results matrix.
    
    Returns:
        float: BWT score.
    """
    N = R.shape[0]
    backward_transfer = 0
    count = 0

    for i in range(1, N):
        for j in range(i):
            backward_transfer += (R[i, j] - R[j, j])
            count += 1

    return backward_transfer / count if count > 0 else 0

def FWT(R):
    """
    Computes the Forward Transfer (FWT) score.
    
    Args:
        R (numpy array): NxN results matrix.
    
    Returns:
        float: FWT score.
    """
    N = R.shape[0]
    forward_transfer = 0
    count = 0

    for i in range(N):
        for j in range(i + 1, N): 
            forward_transfer += R[i, j]
            count += 1

    return forward_transfer / count if count > 0 else 0 

def kolmogorov_smirnov_test(X_old, X_new, alpha=0.05):
    """Detect concept drift using KS-test on feature distributions."""
    
    p_values = [ks_2samp(X_old[:, i], X_new[:, i]).pvalue for i in range(X_old.shape[1])]
    return np.any(np.array(p_values) < alpha)

def histogram_binning(X, bins=25):
    """Convert sample distributions into histograms."""
    
    return np.array([np.histogram(X[:, i], bins=bins, density=True)[0] for i in range(X.shape[1])]).T

def kl_divergence(P, Q):
    """Compute KL divergence between two distributions."""
    
    P, Q = np.clip(P, 1e-10, None), np.clip(Q, 1e-10, None)  # Avoid log(0)
    return np.sum(P * np.log(P / Q))

def strategic_sample_selection(X_old, X_new, top_k=100, learning_rate=0.01, num_iterations=100):
    """
    Selects representative new samples by minimizing KL divergence.
    
    Args:
        X_old (numpy.ndarray): Old memory buffer samples.
        X_new (numpy.ndarray): Incoming new samples.
        top_k (int): Number of samples to retain.
        learning_rate (float): Step size for optimization.
        num_iterations (int): Number of optimization steps.

    Returns:
        numpy.ndarray: Selected representative new samples.
    """
    
    H_old, H_new = histogram_binning(X_old), histogram_binning(X_new)
    m_n = np.random.rand(H_new.shape[0])  

    def loss_function(m_n):
        """Computes KL divergence loss for selected samples."""
        weighted_H_new = H_new * m_n[:, np.newaxis]  
        combined_H = (H_old + weighted_H_new) / 2 
        return kl_divergence(H_new, combined_H) 

    progress_bar = tqdm(total=num_iterations, desc="Optimizing Sample Selection", position=0, leave=True)

    def callback(xk):
        progress_bar.update(1)  

    result = minimize(loss_function, m_n, method="L-BFGS-B", bounds=[(0, 1)] * len(m_n), 
                      options={"maxiter": num_iterations, "ftol": 1e-10}, callback=callback)

    progress_bar.close()

    selected_indices = np.argsort(result.x)[-top_k:]

    return X_new[selected_indices] 


def update_memory_buffer(X_old, X_new_selected, memory_size=3000):
    """Updates memory buffer using strategic forgetting."""
    updated_buffer = np.vstack((X_old, X_new_selected))  

    if updated_buffer.shape[0] > memory_size:
        updated_buffer = updated_buffer[-memory_size:]

    return updated_buffer

class HierarchicalMemory:
    def __init__(self, memory_limit=5000, pyramid_factor=2, centroids_per_concept=10):
        self.memory_limit = memory_limit
        self.pyramid_factor = pyramid_factor
        self.centroids_per_concept = centroids_per_concept
        self.memory = {}  # level: [concept1, concept2, ...]

    def add_concept(self, data, level=1):
        if level not in self.memory:
            self.memory[level] = []
        self.memory[level].append(np.array(data))
        self._summarize_memory()

    def _pyramidal_allocation(self):
        levels = sorted(self.memory.keys())
        weights = np.array([1 / (self.pyramid_factor ** (lvl - 1)) for lvl in levels])
        total_weight = weights.sum()
        allocations = (weights / total_weight) * self.memory_limit
        return {lvl: int(alloc) for lvl, alloc in zip(levels, allocations)}

    def _summarize_concept(self, concept, n_samples):
        if len(concept) <= n_samples:
            return concept
        kmeans = KMeans(n_clusters=min(self.centroids_per_concept, len(concept)), random_state=42).fit(concept)
        centroids = kmeans.cluster_centers_
        distances = np.linalg.norm(concept[:, None] - centroids, axis=2)
        closest_indices = np.argmin(distances, axis=0)
        summarized = concept[closest_indices]
        return summarized

    def _summarize_memory(self):
        allocations = self._pyramidal_allocation()
        for level, concepts in self.memory.items():
            summarized_level = []
            alloc_per_concept = max(1, allocations[level] // len(concepts))
            for concept in concepts:
                summarized = self._summarize_concept(concept, alloc_per_concept)
                summarized_level.append(summarized)
            self.memory[level] = summarized_level

    def get_all_memory(self):
        all_data = []
        for level_concepts in self.memory.values():
            for concept in level_concepts:
                all_data.append(concept)
        return np.vstack(all_data) if all_data else np.empty((0,))

def scenario_design(normal_data, anomaly_data, c):
    """
    Implements Algorithm 1 to create a lifelong learning scenario.
    
    Args:
        normal_data (numpy array): The normal data points.
        anomaly_data (numpy array): The anomaly data points.
        c (int): Number of desired concepts.
    
    Returns:
        list of tuples: List of (normal_concept, anomaly_concept) pairs forming the scenario.
    """
    normal_concepts = create_phi(normal_data, c)
    anomaly_concepts = create_gamma(anomaly_data, c)
    
    scenario = match_lambda(anomaly_concepts, normal_concepts)
    
    return scenario

def evaluation_protocol(T, E, Y, model, strategy="naive", replay_buffer_size=5000, memory_size=5000, alpha=0.05):
    """
    Implements Algorithm 2: Lifelong Learning Evaluation Protocol with multiple strategies.
    
    Args:
        T (list): Sequence of N training sets.
        E (list): Sequence of N testing sets.
        Y (list): Sequence of true labels for test sets.
        model (sklearn.base.BaseEstimator): A scikit-learn-like model instance that supports `fit` and `decision_function`.
        strategy (str): Strategy for training.
        replay_buffer_size (int): Maximum size of replay buffer if applicable
        memory_size (int): Maximum memory size if applicable
        alpha (float): KS-test threshold for drift detection.

    Returns:
        numpy array: NxN results matrix R where R[i, j] is ROC-AUC of model on E[j] after learning T[i].
    """
    N = len(T)
    R = np.zeros((N, N))  

    if strategy in ["cumulative"]:
        cumulative_data = []
    
    if strategy in ["replay"]:
        replay_buffer = []

    if strategy == "SSF":
        memory_buffer = None 

    if strategy == "hierarchical":
        h_memory = HierarchicalMemory(memory_limit=memory_size, pyramid_factor=2, centroids_per_concept=10)

    for i, Ti in tqdm(enumerate(T), desc=f"Evaluating using {strategy} strategy"):
        current_model = deepcopy(model)

        # -- NAIVE --
        if strategy == "naive":
            current_model.fit(Ti)

        # -- CUMULATIVE --
        elif strategy == "cumulative":
            cumulative_data.extend(Ti.tolist())
            current_model.fit(np.array(cumulative_data)) 

        # -- REPLAY -- 
        elif strategy == "replay":
            if replay_buffer:
                combined_data = np.vstack((np.array(replay_buffer), Ti))
            else:
                combined_data = Ti

            current_model.fit(combined_data)
            replay_buffer.extend(Ti.tolist())

            if len(replay_buffer) > replay_buffer_size:
                replay_buffer = replay_buffer[-replay_buffer_size:]
        
        # -- SSF -- 
        elif strategy == "SSF":
            if memory_buffer is None:
                memory_buffer = Ti[:memory_size]  
            else:
                drift_detected = kolmogorov_smirnov_test(memory_buffer, Ti, alpha)
                if drift_detected:
                    X_new_selected = strategic_sample_selection(memory_buffer, Ti, top_k=1000)
                    memory_buffer = update_memory_buffer(memory_buffer, X_new_selected, memory_size=memory_size)
            memory_buffer = np.unique(memory_buffer, axis=0)
            current_model.fit(memory_buffer)

        # -- HIERARCHICAL --
        elif strategy == "hierarchical":

            memory_data = h_memory.get_all_memory()
            if memory_data.size == 0:
                drift_level = 1
            else:
                drift_distances = [
                    wasserstein_distance(Ti[:, d], memory_data[:, d])
                    for d in range(Ti.shape[1])
                ]
                drift_score = np.mean(drift_distances)
                print(f"drift: {drift_score}")
                
                if drift_score < 0.05:
                    drift_level = 1
                elif drift_score < 0.1:
                    drift_level = 2
                elif drift_score < 0.2:
                    drift_level = 3
                else:
                    drift_level = 4
        
            h_memory.add_concept(Ti, level=drift_level)
            summarized_memory = h_memory.get_all_memory()
            current_model.fit(summarized_memory)

        # -- Evaluation --
        for j, ((Ej_normal, Ej_anomaly), (y_normal, y_anomaly)) in enumerate(zip(E, Y)):
            test_data = np.vstack((Ej_normal, Ej_anomaly))
            test_labels = np.hstack((y_normal, y_anomaly))  
        
            scores = -current_model.decision_function(test_data)  
            R[i, j] = roc_auc_score(test_labels, scores)

    return R


# Experiments

In [5]:
num_concepts = 5

X_normal = X[y == 0]  
X_anomaly = X[y == 1]

normal_concepts = create_phi(X_normal, num_concepts)
anomaly_concepts = create_gamma(X_anomaly, num_concepts)

concept_pairs = match_lambda(anomaly_concepts, normal_concepts)

T = []  
E = [] 
Y = []

for normal, anomaly in concept_pairs:

    normal_train, normal_test = train_test_split(normal, test_size=0.3, random_state=42)
    anomaly_train, anomaly_test = train_test_split(anomaly, test_size=0.3, random_state=42)  

    T.append(normal_train)
    E.append((normal_test, anomaly_test))

    y_normal_test = np.zeros(len(normal_test))
    y_anomaly_test = np.ones(len(anomaly_test))
    
    Y.append((y_normal_test, y_anomaly_test))

Finished creating normal concepts
Finished creating anomaly concepts
Finished matching concept pairs


# Eval

## LOF

In [10]:
R_hm = evaluation_protocol(T, E, Y, LocalOutlierFactor(n_neighbors=20, novelty=True), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 1it [00:16, 16.68s/it]

drift: 398.8568904220632


Evaluating using hierarchical strategy: 2it [00:27, 12.95s/it]

drift: 198.15502839106085


Evaluating using hierarchical strategy: 3it [00:34, 10.52s/it]

drift: 377.90543069776226


Evaluating using hierarchical strategy: 4it [00:40,  8.61s/it]

drift: 297.27042816559435


Evaluating using hierarchical strategy: 5it [00:52, 10.46s/it]

Lifelong ROC-AUC: 0.8072902812925056, BWT: -0.025695915608955983, FWT: 0.8816047635322224





In [26]:
R_ssf = evaluation_protocol(T, E, Y, LocalOutlierFactor(n_neighbors=20, novelty=True), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")


[Aluating using SSF strategy: 0it [00:00, ?it/s]
Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 273.05it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 256.08it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 276.87it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 326.53it/s]

Evaluating using SSF strategy: 5it [00:05,  1.14s/it]

Lifelong ROC-AUC: 0.7544561432893502, BWT: -0.0039135441339650965, FWT: 0.45301001764958054





In [11]:
R_naive = evaluation_protocol(T, E, Y, LocalOutlierFactor(n_neighbors=20, novelty=True), strategy="naive")
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 0it [11:18:46, ?it/s]


KeyboardInterrupt: 

In [7]:
R_cumulative = evaluation_protocol(T, E, Y, LocalOutlierFactor(n_neighbors=20, novelty=True), strategy="cumulative")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_cumulative)}, BWT: {BWT(R_cumulative)}, FWT: {FWT(R_cumulative)}")

Evaluating using cumulative strategy: 5it [02:10, 26.08s/it]

Lifelong ROC-AUC: 0.9238354062988797, BWT: -0.004499087364359666, FWT: 0.17379610432039533





In [None]:
R_replay = evaluation_protocol(T, E, Y, LocalOutlierFactor(n_neighbors=20, novelty=True), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 0it [00:00, ?it/s]

## IF

In [11]:
R_hm = evaluation_protocol(T, E, Y, IsolationForest(n_estimators=100), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 1it [00:19, 20.00s/it]

drift: 398.8568904220632


Evaluating using hierarchical strategy: 2it [00:34, 16.55s/it]

drift: 198.15502839106085


Evaluating using hierarchical strategy: 3it [00:44, 13.88s/it]

drift: 377.90543069776226


Evaluating using hierarchical strategy: 4it [00:53, 11.84s/it]

drift: 297.27042816559435


Evaluating using hierarchical strategy: 5it [01:08, 13.70s/it]

Lifelong ROC-AUC: 0.8155088499758174, BWT: -0.013645100218763955, FWT: 0.8445315775385265





In [27]:
R_ssf = evaluation_protocol(T, E, Y,  IsolationForest(n_estimators=100), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")


[Aluating using SSF strategy: 0it [00:00, ?it/s]
Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 281.14it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 253.28it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 237.31it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 244.61it/s]

Evaluating using SSF strategy: 5it [00:05,  1.06s/it]

Lifelong ROC-AUC: 0.6559285630296237, BWT: 0.06868127809581366, FWT: 0.804018261100736





In [7]:
R_naive = evaluation_protocol(T, E, Y, IsolationForest(n_estimators=100), strategy="naive")
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 5it [00:52, 10.58s/it]

Lifelong PR-AUC: 0.7788659153911035, BWT: -0.24021295357589692, FWT: 0.7917760512897282





In [9]:
R_cumulative = evaluation_protocol(T, E, Y, IsolationForest(n_estimators=100), strategy="cumulative")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_cumulative)}, BWT: {BWT(R_cumulative)}, FWT: {FWT(R_cumulative)}")

Evaluating using cumulative strategy: 5it [00:07,  1.55s/it]

Lifelong ROC-AUC: 0.753897896406868, BWT: -0.0066406078135951676, FWT: 0.7719174809547474





In [8]:
R_replay = evaluation_protocol(T, E, Y, IsolationForest(n_estimators=100), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 5it [01:07, 13.46s/it]

Lifelong PR-AUC: 0.7590506633375615, BWT: -0.27065328400264504, FWT: 0.8040770717615044





## SGDOCSVM

In [12]:
R_hm = evaluation_protocol(T, E, Y, SGDOneClassSVM(), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 1it [00:13, 13.54s/it]

drift: 398.8568904220632


Evaluating using hierarchical strategy: 2it [00:20,  9.93s/it]

drift: 198.15502839106085


Evaluating using hierarchical strategy: 3it [00:24,  7.18s/it]

drift: 377.90543069776226


Evaluating using hierarchical strategy: 4it [00:26,  4.94s/it]

drift: 297.27042816559435


Evaluating using hierarchical strategy: 5it [00:34,  6.82s/it]

Lifelong ROC-AUC: 0.711944549167925, BWT: -0.04644052916489031, FWT: 0.468725564548585





In [28]:
R_ssf = evaluation_protocol(T, E, Y,  SGDOneClassSVM(), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")


[Aluating using SSF strategy: 0it [00:00, ?it/s]
Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 288.13it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 307.55it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 306.85it/s]

Optimizing Sample Selection:   1%|          | 1/100 [00:00<00:00, 271.76it/s]

Evaluating using SSF strategy: 5it [00:02,  2.14it/s]

Lifelong ROC-AUC: 0.790790866239933, BWT: -0.004091332504534584, FWT: 0.5467376505389003





In [9]:
R_naive = evaluation_protocol(T, E, Y, SGDOneClassSVM(), strategy="naive")
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 5it [00:14,  2.89s/it]

Lifelong PR-AUC: 0.46159136314423416, BWT: -0.21579492490317728, FWT: 0.6273475196471586





In [17]:
R_cumulative = evaluation_protocol(T, E, Y, SGDOneClassSVM(), strategy="cumulative")
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_cumulative)}, BWT: {BWT(R_cumulative)}, FWT: {FWT(R_cumulative)}")

Evaluating using cumulative strategy: 5it [00:04,  1.03it/s]


Lifelong ROC-AUC: 0.597799790906325, BWT: -0.215598947769658, FWT: 0.6579349113663381


In [10]:
R_replay = evaluation_protocol(T, E, Y, SGDOneClassSVM(), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

Evaluating using replay strategy: 5it [00:28,  5.70s/it]

Lifelong PR-AUC: 0.4944899524729847, BWT: -0.21646644626189934, FWT: 0.5427823493460701





## SLAD

In [6]:
from deepod.models.tabular import SLAD

In [None]:
R_hm = evaluation_protocol(T, E, Y, SLAD(), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 33, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33]


  c /= stddev[:, None]
  c /= stddev[None, :]


epoch  1, training loss: 1.044842, time: 0.4s
epoch 10, training loss: 0.625887, time: 0.0s
epoch 20, training loss: 0.594825, time: 0.0s
epoch 30, training loss: 0.575092, time: 0.0s
epoch 40, training loss: 0.576783, time: 0.0s
epoch 50, training loss: 0.560773, time: 0.0s
epoch 60, training loss: 0.558940, time: 0.0s
epoch 70, training loss: 0.573671, time: 0.0s
epoch 80, training loss: 0.529655, time: 0.0s
epoch 90, training loss: 0.586903, time: 0.0s
epoch100, training loss: 0.551060, time: 0.0s
Start Inference on the training data...


In [7]:
R_ssf = evaluation_protocol(T, E, Y, SLAD(epochs=10), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")

  c /= stddev[:, None]
  c /= stddev[None, :]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 33, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33]
epoch  1, training loss: 0.501681, time: 4.0s
epoch 10, training loss: 0.313463, time: 0.2s
Start Inference on the training data...


Optimizing Sample Selection:   8%|▊         | 8/100 [00:00<00:00, 610.68it/s]
  c /= stddev[:, None]
  c /= stddev[None, :]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 33, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33]
epoch  1, training loss: 0.825984, time: 0.2s
epoch 10, training loss: 0.414694, time: 0.2s
Start Inference on the training data...


Optimizing Sample Selection:  11%|█         | 11/100 [00:00<00:00, 714.18it/s]
  c /= stddev[:, None]
  c /= stddev[None, :]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 33, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33]
epoch  1, training loss: 0.714377, time: 0.3s
epoch 10, training loss: 0.436546, time: 0.3s
Start Inference on the training data...


Optimizing Sample Selection:  11%|█         | 11/100 [00:00<00:00, 635.39it/s]
  c /= stddev[:, None]
  c /= stddev[None, :]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 33, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33]
epoch  1, training loss: 0.667521, time: 0.3s
epoch 10, training loss: 0.431774, time: 0.3s
Start Inference on the training data...


Optimizing Sample Selection:  10%|█         | 10/100 [00:00<00:00, 516.40it/s]
  c /= stddev[:, None]
  c /= stddev[None, :]


Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 33, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33]
epoch  1, training loss: 0.658839, time: 0.3s
epoch 10, training loss: 0.510152, time: 0.3s
Start Inference on the training data...


Evaluating using SSF strategy: 5it [1:37:56, 1175.38s/it]

Lifelong ROC-AUC: 0.3103566662499718, BWT: 0.21562569546654134, FWT: 0.011228514161569971





In [7]:
R_naive = evaluation_protocol(T, E, Y, SLAD(epochs=10), strategy="naive")
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
unified size: 128, subspace pool size: 33, ensemble size: 20
len pool: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33]


  c /= stddev[:, None]
  c /= stddev[None, :]
Evaluating using naive strategy: 0it [00:00, ?it/s]


RuntimeError: Numpy is not available

In [None]:
R_replay = evaluation_protocol(T, E, Y, SLAD(epochs=10), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

# ICL

In [6]:
from deepod.models.tabular import ICL

In [8]:
R_hm = evaluation_protocol(T, E, Y, ICL(epochs=100), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=31, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2, out_features=50, bias=Fa


testing: 100%|██████████| 1/1 [00:00<00:00, 10.37it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 459.30it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 560.66it/s]

testing:   0%|          | 0/30715 [00:00<?, ?it/s][A
testing:   0%|          | 1/30715 [00:00<3:32:02,  2.41it/s][A
testing:   0%|          | 66/30715 [00:00<03:02, 168.19it/s][A
testing:   0%|          | 130/30715 [00:00<01:43, 294.47it/s][A
testing:   1%|          | 195/30715 [00:00<01:17, 391.45it/s][A
testing:   1%|          | 259/30715 [00:00<01:06, 461.15it/s][A
testing:   1%|          | 324/30715 [00:00<00:59, 513.32it/s][A
testing:   1%|▏         | 389/30715 [00:01<00:55, 551.24it/s][A
testing:   1%|▏         | 454/30715 [00:01<00:52, 577.74it/s][A
testing:   2%|▏         | 519/30715 [00:01<00:50, 596.48it/s][A
testing:   2%|▏         | 584/30715 [00:01<00:49, 609.51it/s][A
testing:   2%|▏         | 649/30715 [00:01<00:48, 619.05it/s][A
testing:   2%|▏         | 714/30715 [00:01<00:47, 625.62it/s]

drift: 398.8568904220632
Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=31, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2,


testing: 100%|██████████| 1/1 [00:00<00:00, 528.85it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 538.98it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 545.85it/s]

testing:   0%|          | 0/30715 [00:00<?, ?it/s][A
testing:   0%|          | 65/30715 [00:00<00:47, 643.06it/s][A
testing:   0%|          | 130/30715 [00:00<00:47, 641.80it/s][A
testing:   1%|          | 195/30715 [00:00<00:47, 641.96it/s][A
testing:   1%|          | 260/30715 [00:00<00:47, 641.90it/s][A
testing:   1%|          | 325/30715 [00:00<00:47, 641.51it/s][A
testing:   1%|▏         | 390/30715 [00:00<00:47, 641.94it/s][A
testing:   1%|▏         | 455/30715 [00:00<00:47, 641.62it/s][A
testing:   2%|▏         | 520/30715 [00:00<00:47, 641.94it/s][A
testing:   2%|▏         | 585/30715 [00:00<00:46, 641.82it/s][A
testing:   2%|▏         | 650/30715 [00:01<00:46, 642.18it/s][A
testing:   2%|▏         | 715/30715 [00:01<00:46, 642.30it/s][A
testing:   3%|▎         | 780/30715 [00:01<00:46, 641.60it/s

drift: 198.15502839106085
Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=31, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2


testing: 100%|██████████| 1/1 [00:00<00:00, 513.94it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 529.85it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 529.58it/s]

testing:   0%|          | 0/30715 [00:00<?, ?it/s][A
testing:   0%|          | 65/30715 [00:00<00:47, 640.27it/s][A
testing:   0%|          | 130/30715 [00:00<00:47, 639.89it/s][A
testing:   1%|          | 194/30715 [00:00<00:47, 639.74it/s][A
testing:   1%|          | 258/30715 [00:00<00:47, 639.58it/s][A
testing:   1%|          | 322/30715 [00:00<00:47, 638.80it/s][A
testing:   1%|▏         | 387/30715 [00:00<00:47, 639.86it/s][A
testing:   1%|▏         | 451/30715 [00:00<00:47, 638.94it/s][A
testing:   2%|▏         | 516/30715 [00:00<00:47, 639.51it/s][A
testing:   2%|▏         | 580/30715 [00:00<00:47, 639.42it/s][A
testing:   2%|▏         | 645/30715 [00:01<00:46, 640.14it/s][A
testing:   2%|▏         | 710/30715 [00:01<00:46, 640.11it/s][A
testing:   3%|▎         | 775/30715 [00:01<00:46, 640.49it/s

drift: 377.90543069776226
Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=31, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2


testing: 100%|██████████| 1/1 [00:00<00:00, 505.70it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 515.46it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 530.79it/s]

testing:   0%|          | 0/30715 [00:00<?, ?it/s][A
testing:   0%|          | 65/30715 [00:00<00:47, 641.28it/s][A
testing:   0%|          | 130/30715 [00:00<00:47, 640.38it/s][A
testing:   1%|          | 195/30715 [00:00<00:47, 640.92it/s][A
testing:   1%|          | 260/30715 [00:00<00:47, 640.30it/s][A
testing:   1%|          | 325/30715 [00:00<00:47, 640.93it/s][A
testing:   1%|▏         | 390/30715 [00:00<00:47, 641.26it/s][A
testing:   1%|▏         | 455/30715 [00:00<00:47, 641.25it/s][A
testing:   2%|▏         | 520/30715 [00:00<00:47, 641.21it/s][A
testing:   2%|▏         | 585/30715 [00:00<00:47, 640.56it/s][A
testing:   2%|▏         | 650/30715 [00:01<00:47, 639.14it/s][A
testing:   2%|▏         | 715/30715 [00:01<00:46, 639.62it/s][A
testing:   3%|▎         | 780/30715 [00:01<00:46, 640.02it/s

drift: 297.27042816559435
Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=31, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2


testing: 100%|██████████| 1/1 [00:00<00:00, 504.24it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 516.54it/s]

testing: 100%|██████████| 1/1 [00:00<00:00, 503.82it/s]

testing:   0%|          | 0/30715 [00:00<?, ?it/s][A
testing:   0%|          | 65/30715 [00:00<00:47, 641.21it/s][A
testing:   0%|          | 130/30715 [00:00<00:47, 641.22it/s][A
testing:   1%|          | 195/30715 [00:00<00:47, 642.07it/s][A
testing:   1%|          | 260/30715 [00:00<00:47, 642.46it/s][A
testing:   1%|          | 325/30715 [00:00<00:47, 641.43it/s][A
testing:   1%|▏         | 390/30715 [00:00<00:47, 641.33it/s][A
testing:   1%|▏         | 455/30715 [00:00<00:47, 641.40it/s][A
testing:   2%|▏         | 520/30715 [00:00<00:47, 641.17it/s][A
testing:   2%|▏         | 585/30715 [00:00<00:47, 641.02it/s][A
testing:   2%|▏         | 650/30715 [00:01<00:47, 637.71it/s][A
testing:   2%|▏         | 715/30715 [00:01<00:46, 638.93it/s][A
testing:   3%|▎         | 780/30715 [00:01<00:46, 640.25it/s

Lifelong ROC-AUC: 0.08254430546472566, BWT: 0.0603046965289877, FWT: 0.3023565450702955





In [7]:
R_ssf = evaluation_protocol(T, E, Y, ICL(epochs=10), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")

Evaluating using SSF strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=31, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2, out_features=50, bias=Fa


testing:   0%|          | 0/12 [00:00<?, ?it/s][A
testing: 100%|██████████| 12/12 [00:00<00:00, 87.35it/s][A

testing: 100%|██████████| 12/12 [00:00<00:00, 673.09it/s]

testing: 100%|██████████| 12/12 [00:00<00:00, 575.87it/s]

testing:   0%|          | 0/30715 [00:00<?, ?it/s][A
testing:   0%|          | 68/30715 [00:00<00:45, 673.63it/s][A
testing:   0%|          | 136/30715 [00:00<00:46, 661.02it/s][A
testing:   1%|          | 203/30715 [00:00<00:45, 664.36it/s][A
testing:   1%|          | 270/30715 [00:00<00:45, 665.72it/s][A
testing:   1%|          | 337/30715 [00:00<00:46, 659.56it/s][A
testing:   1%|▏         | 404/30715 [00:00<00:45, 661.52it/s][A
testing:   2%|▏         | 471/30715 [00:00<00:46, 657.14it/s][A
testing:   2%|▏         | 539/30715 [00:00<00:45, 662.73it/s][A
testing:   2%|▏         | 606/30715 [00:00<00:45, 655.45it/s][A
testing:   2%|▏         | 673/30715 [00:01<00:45, 658.62it/s][A
testing:   2%|▏         | 739/30715 [00:01<00:45, 656.96it/s][A
te

Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=31, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2, out_features=50, bias=Fa


testing: 100%|██████████| 12/12 [00:00<00:00, 672.69it/s]

testing: 100%|██████████| 12/12 [00:00<00:00, 675.65it/s]

testing: 100%|██████████| 12/12 [00:00<00:00, 673.03it/s]

testing:   0%|          | 0/30715 [00:00<?, ?it/s][A
testing:   0%|          | 68/30715 [00:00<00:45, 676.53it/s][A
testing:   0%|          | 136/30715 [00:00<00:45, 677.19it/s][A
testing:   1%|          | 204/30715 [00:00<00:45, 676.71it/s][A
testing:   1%|          | 272/30715 [00:00<00:44, 676.63it/s][A
testing:   1%|          | 340/30715 [00:00<00:44, 677.56it/s][A
testing:   1%|▏         | 408/30715 [00:00<00:44, 677.63it/s][A
testing:   2%|▏         | 476/30715 [00:00<00:44, 677.35it/s][A
testing:   2%|▏         | 544/30715 [00:00<00:44, 676.73it/s][A
testing:   2%|▏         | 612/30715 [00:00<00:44, 676.77it/s][A
testing:   2%|▏         | 680/30715 [00:01<00:44, 675.92it/s][A
testing:   2%|▏         | 748/30715 [00:01<00:44, 674.85it/s][A
testing:   3%|▎         | 816/30715 [00:01<00:44, 674.

Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=31, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2, out_features=50, bias=Fa


testing: 100%|██████████| 13/13 [00:00<00:00, 670.73it/s]

testing: 100%|██████████| 13/13 [00:00<00:00, 666.36it/s]

testing: 100%|██████████| 13/13 [00:00<00:00, 674.01it/s]

testing:   0%|          | 0/30715 [00:00<?, ?it/s][A
testing:   0%|          | 68/30715 [00:00<00:45, 675.70it/s][A
testing:   0%|          | 136/30715 [00:00<00:45, 673.29it/s][A
testing:   1%|          | 204/30715 [00:00<00:45, 673.49it/s][A
testing:   1%|          | 272/30715 [00:00<00:45, 673.98it/s][A
testing:   1%|          | 340/30715 [00:00<00:45, 673.45it/s][A
testing:   1%|▏         | 408/30715 [00:00<00:44, 675.19it/s][A
testing:   2%|▏         | 476/30715 [00:00<00:44, 675.69it/s][A
testing:   2%|▏         | 544/30715 [00:00<00:44, 675.84it/s][A
testing:   2%|▏         | 612/30715 [00:00<00:44, 675.43it/s][A
testing:   2%|▏         | 680/30715 [00:01<00:44, 675.41it/s][A
testing:   2%|▏         | 748/30715 [00:01<00:44, 675.96it/s][A
testing:   3%|▎         | 816/30715 [00:01<00:44, 675.

Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=31, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2, out_features=50, bias=Fa


testing: 100%|██████████| 13/13 [00:00<00:00, 657.26it/s]

testing: 100%|██████████| 13/13 [00:00<00:00, 672.81it/s]

testing: 100%|██████████| 13/13 [00:00<00:00, 667.04it/s]

testing:   0%|          | 0/30715 [00:00<?, ?it/s][A
testing:   0%|          | 68/30715 [00:00<00:45, 675.68it/s][A
testing:   0%|          | 136/30715 [00:00<00:45, 676.36it/s][A
testing:   1%|          | 204/30715 [00:00<00:45, 676.50it/s][A
testing:   1%|          | 272/30715 [00:00<00:45, 676.30it/s][A
testing:   1%|          | 340/30715 [00:00<00:44, 675.73it/s][A
testing:   1%|▏         | 408/30715 [00:00<00:44, 676.16it/s][A
testing:   2%|▏         | 476/30715 [00:00<00:44, 676.53it/s][A
testing:   2%|▏         | 544/30715 [00:00<00:44, 676.20it/s][A
testing:   2%|▏         | 612/30715 [00:00<00:44, 676.44it/s][A
testing:   2%|▏         | 680/30715 [00:01<00:44, 675.58it/s][A
testing:   2%|▏         | 748/30715 [00:01<00:44, 674.77it/s][A
testing:   3%|▎         | 816/30715 [00:01<00:44, 675.

Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=31, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2, out_features=50, bias=Fa


testing: 100%|██████████| 13/13 [00:00<00:00, 669.80it/s]

testing: 100%|██████████| 13/13 [00:00<00:00, 654.27it/s]

testing: 100%|██████████| 13/13 [00:00<00:00, 672.62it/s]

testing:   0%|          | 0/30715 [00:00<?, ?it/s][A
testing:   0%|          | 68/30715 [00:00<00:45, 679.86it/s][A
testing:   0%|          | 137/30715 [00:00<00:44, 680.60it/s][A
testing:   1%|          | 206/30715 [00:00<00:44, 679.59it/s][A
testing:   1%|          | 274/30715 [00:00<00:44, 677.99it/s][A
testing:   1%|          | 342/30715 [00:00<00:44, 677.07it/s][A
testing:   1%|▏         | 410/30715 [00:00<00:44, 676.07it/s][A
testing:   2%|▏         | 478/30715 [00:00<00:44, 676.56it/s][A
testing:   2%|▏         | 546/30715 [00:00<00:44, 676.96it/s][A
testing:   2%|▏         | 614/30715 [00:00<00:44, 676.89it/s][A
testing:   2%|▏         | 682/30715 [00:01<00:44, 676.79it/s][A
testing:   2%|▏         | 750/30715 [00:01<00:44, 676.50it/s][A
testing:   3%|▎         | 818/30715 [00:01<00:44, 676.

Lifelong ROC-AUC: 0.07980195498392721, BWT: 0.015148636937932641, FWT: 0.08914322453508312





In [None]:
R_naive = evaluation_protocol(T, E, Y, ICL(epochs=10), strategy="naive")
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

Evaluating using naive strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 3
kernel size: 2
ICLNet(
  (enc_f_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=31, out_features=100, bias=False)
        (act_layer): Tanh()
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (bn_layer): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
    )
  )
  (enc_g_net): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=2, out_features=50, bias=Fa


testing:   0%|          | 0/70712 [00:00<?, ?it/s][A
testing:   0%|          | 1/70712 [00:00<6:27:21,  3.04it/s][A
testing:   0%|          | 63/70712 [00:00<06:12, 189.81it/s][A
testing:   0%|          | 125/70712 [00:00<03:40, 319.65it/s][A
testing:   0%|          | 187/70712 [00:00<02:52, 409.64it/s][A
testing:   0%|          | 249/70712 [00:00<02:29, 471.99it/s][A
testing:   0%|          | 312/70712 [00:00<02:16, 517.01it/s][A
testing:   1%|          | 374/70712 [00:00<02:08, 547.35it/s][A
testing:   1%|          | 434/70712 [00:01<02:05, 561.33it/s][A
testing:   1%|          | 495/70712 [00:01<02:02, 574.50it/s][A
testing:   1%|          | 555/70712 [00:01<02:01, 579.71it/s][A
testing:   1%|          | 615/70712 [00:01<02:00, 582.52it/s][A
testing:   1%|          | 675/70712 [00:01<02:00, 582.50it/s][A
testing:   1%|          | 735/70712 [00:01<02:00, 582.69it/s][A
testing:   1%|          | 794/70712 [00:01<02:00, 580.28it/s][A
testing:   1%|          | 853/70712 [

In [None]:
R_replay = evaluation_protocol(T, E, Y, ICL(epochs=10), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

# RCA

In [None]:
from deepod.models.tabular import RCA

In [10]:
R_hm = evaluation_protocol(T, E, Y, RCA(epochs=10), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


100%|██████████| 10/10 [00:00<00:00, 1098.04it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:28<04:15, 28.37s/it][A
 20%|██        | 2/10 [00:56<03:45, 28.25s/it][A
 30%|███       | 3/10 [01:24<03:17, 28.25s/it][A
 40%|████      | 4/10 [01:53<02:49, 28.28s/it][A
 50%|█████     | 5/10 [02:21<02:21, 28.27s/it][A
 60%|██████    | 6/10 [02:49<01:52, 28.23s/it][A
 70%|███████   | 7/10 [03:17<01:24, 28.25s/it][A
 80%|████████  | 8/10 [03:46<00:56, 28.24s/it][A
 90%|█████████ | 9/10 [04:14<00:28, 28.21s/it][A
100%|██████████| 10/10 [04:42<00:00, 28.24s/it][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:09<01:22,  9.18s/it][A
 20%|██        | 2/10 [00:18<01:14,  9.28s/it][A
 30%|███       | 3/10 [00:27<01:04,  9.26s/it][A
 40%|████      | 4/10 [00:37<00:55,  9.25s/it][A
 50%|█████     | 5/10 [00:46<00:46,  9.23s/it][A
 60%|██████    | 6/10 [00:55<00:37,  9.26s/it][A
 70%|███████   | 7/10 [01:04<00:27,  9.25s/it][A
 80%|███████

drift: 398.8568904220632
Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, o


100%|██████████| 10/10 [00:00<00:00, 1022.48it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:27<04:11, 28.00s/it][A
 20%|██        | 2/10 [00:56<03:45, 28.15s/it][A
 30%|███       | 3/10 [01:24<03:16, 28.13s/it][A
 40%|████      | 4/10 [01:52<02:49, 28.17s/it][A
 50%|█████     | 5/10 [02:20<02:21, 28.22s/it][A
 60%|██████    | 6/10 [02:49<01:52, 28.25s/it][A
 70%|███████   | 7/10 [03:17<01:24, 28.23s/it][A
 80%|████████  | 8/10 [03:45<00:56, 28.24s/it][A
 90%|█████████ | 9/10 [04:13<00:28, 28.24s/it][A
100%|██████████| 10/10 [04:41<00:00, 28.20s/it][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:09<01:23,  9.30s/it][A
 20%|██        | 2/10 [00:18<01:13,  9.25s/it][A
 30%|███       | 3/10 [00:27<01:04,  9.24s/it][A
 40%|████      | 4/10 [00:36<00:55,  9.23s/it][A
 50%|█████     | 5/10 [00:46<00:46,  9.27s/it][A
 60%|██████    | 6/10 [00:55<00:37,  9.25s/it][A
 70%|███████   | 7/10 [01:04<00:27,  9.24s/it][A
 80%|███████

drift: 198.15502839106085
Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, 


100%|██████████| 10/10 [00:00<00:00, 1001.05it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:27<04:11, 27.98s/it][A
 20%|██        | 2/10 [00:56<03:44, 28.04s/it][A
 30%|███       | 3/10 [01:24<03:16, 28.08s/it][A
 40%|████      | 4/10 [01:52<02:48, 28.00s/it][A
 50%|█████     | 5/10 [02:20<02:20, 28.01s/it][A
 60%|██████    | 6/10 [02:48<01:52, 28.02s/it][A
 70%|███████   | 7/10 [03:16<01:23, 27.98s/it][A
 80%|████████  | 8/10 [03:44<00:56, 28.01s/it][A
 90%|█████████ | 9/10 [04:12<00:28, 28.01s/it][A
100%|██████████| 10/10 [04:40<00:00, 28.02s/it][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:09<01:21,  9.09s/it][A
 20%|██        | 2/10 [00:18<01:13,  9.13s/it][A
 30%|███       | 3/10 [00:27<01:03,  9.13s/it][A
 40%|████      | 4/10 [00:36<00:55,  9.18s/it][A
 50%|█████     | 5/10 [00:45<00:45,  9.17s/it][A
 60%|██████    | 6/10 [00:54<00:36,  9.15s/it][A
 70%|███████   | 7/10 [01:04<00:27,  9.15s/it][A
 80%|███████

drift: 377.90543069776226
Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, 


100%|██████████| 10/10 [00:00<00:00, 981.19it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:27<04:10, 27.86s/it][A
 20%|██        | 2/10 [00:55<03:43, 27.98s/it][A
 30%|███       | 3/10 [01:23<03:16, 28.02s/it][A
 40%|████      | 4/10 [01:51<02:47, 27.96s/it][A
 50%|█████     | 5/10 [02:19<02:19, 27.99s/it][A
 60%|██████    | 6/10 [02:47<01:52, 28.01s/it][A
 70%|███████   | 7/10 [03:16<01:24, 28.05s/it][A
 80%|████████  | 8/10 [03:44<00:56, 28.05s/it][A
 90%|█████████ | 9/10 [04:12<00:28, 28.07s/it][A
100%|██████████| 10/10 [04:40<00:00, 28.03s/it][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:09<01:22,  9.14s/it][A
 20%|██        | 2/10 [00:18<01:13,  9.15s/it][A
 30%|███       | 3/10 [00:27<01:04,  9.20s/it][A
 40%|████      | 4/10 [00:36<00:55,  9.19s/it][A
 50%|█████     | 5/10 [00:45<00:45,  9.17s/it][A
 60%|██████    | 6/10 [00:55<00:36,  9.18s/it][A
 70%|███████   | 7/10 [01:04<00:27,  9.17s/it][A
 80%|████████

drift: 297.27042816559435
Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, 


100%|██████████| 10/10 [00:00<00:00, 948.77it/s]

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:27<04:10, 27.88s/it][A
 20%|██        | 2/10 [00:56<03:44, 28.06s/it][A
 30%|███       | 3/10 [01:24<03:17, 28.22s/it][A
 40%|████      | 4/10 [01:52<02:49, 28.29s/it][A
 50%|█████     | 5/10 [02:21<02:21, 28.28s/it][A
 60%|██████    | 6/10 [02:49<01:53, 28.29s/it][A
 70%|███████   | 7/10 [03:17<01:24, 28.32s/it][A
 80%|████████  | 8/10 [03:46<00:56, 28.33s/it][A
 90%|█████████ | 9/10 [04:14<00:28, 28.30s/it][A
100%|██████████| 10/10 [04:42<00:00, 28.28s/it][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:09<01:22,  9.19s/it][A
 20%|██        | 2/10 [00:18<01:13,  9.21s/it][A
 30%|███       | 3/10 [00:27<01:04,  9.28s/it][A
 40%|████      | 4/10 [00:37<00:55,  9.27s/it][A
 50%|█████     | 5/10 [00:46<00:46,  9.25s/it][A
 60%|██████    | 6/10 [00:55<00:36,  9.24s/it][A
 70%|███████   | 7/10 [01:04<00:27,  9.27s/it][A
 80%|████████

Lifelong ROC-AUC: 0.4234612656150892, BWT: 0.0027778110432063047, FWT: 0.13952288609745678





In [10]:
R_ssf = evaluation_protocol(T, E, Y, RCA(epochs=10), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")

Evaluating using SSF strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
100%|██████████| 10/10 [00:00<00:00, 95.71it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [02:29<22:29, 149.99s/it][A
 20%|██        | 2/10 [02:57<10:21, 77.73s/it] [A
 30%|███       | 3/10 [03:24<06:22, 54.63s/it][A
 40%|████      | 4/10 [03:51<04:22, 43.73s/it][A
 50%|█████     | 5/10 [04:18<03:08, 37.74s/it][A
 60%|██████    | 6/10 [04:45<02:16, 34.14s/it][A
 70%|███████   | 7/10 [05:12<01:35, 31.83s/it][A
 80%|████████  | 8/10 [05:39<01:00, 30.31s/it][A
 90%|█████████ | 9/10 [06:06<00:29, 29.32s/it][A
100%|██████████| 10/10 [06:33<00:00, 39.40s/it][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:08<01:19,  8.79s/it][A
 20%|██        | 2/10 [00:17<01:10,  8.83s/it][A
 30%|███       | 3/10 [00:26<01:01,  8.83s/it][A
 40%|████      | 4/10 [00:35<00:53,  8.88s/it][A
 50%|█████     | 5/10 [00:44<00:44,  8.87s/it][A
 60%|██████    | 6/10 [00:53<00:35,  8.86s/it][A
 70%|███████   | 

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
100%|██████████| 10/10 [00:00<00:00, 94.50it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:27<04:03, 27.05s/it][A
 20%|██        | 2/10 [00:54<03:37, 27.15s/it][A
 30%|███       | 3/10 [01:21<03:10, 27.18s/it][A
 40%|████      | 4/10 [01:48<02:42, 27.16s/it][A
 50%|█████     | 5/10 [02:15<02:15, 27.17s/it][A
 60%|██████    | 6/10 [02:42<01:48, 27.18s/it][A
 70%|███████   | 7/10 [03:10<01:21, 27.19s/it][A
 80%|████████  | 8/10 [03:37<00:54, 27.15s/it][A
 90%|█████████ | 9/10 [04:04<00:27, 27.16s/it][A
100%|██████████| 10/10 [04:31<00:00, 27.15s/it][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:08<01:19,  8.80s/it][A
 20%|██        | 2/10 [00:17<01:10,  8.86s/it][A
 30%|███       | 3/10 [00:26<01:02,  8.91s/it][A
 40%|████      | 4/10 [00:35<00:53,  8.90s/it][A
 50%|█████     | 5/10 [00:44<00:44,  8.89s/it][A
 60%|██████    | 6/10 [00:53<00:35,  8.88s/it][A
 70%|███████   | 7/

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
100%|██████████| 10/10 [00:00<00:00, 88.24it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:26<04:02, 26.89s/it][A
 20%|██        | 2/10 [00:54<03:36, 27.04s/it][A
 30%|███       | 3/10 [01:21<03:09, 27.10s/it][A
 40%|████      | 4/10 [01:48<02:42, 27.10s/it][A
 50%|█████     | 5/10 [02:15<02:15, 27.08s/it][A
 60%|██████    | 6/10 [02:42<01:48, 27.10s/it][A
 70%|███████   | 7/10 [03:09<01:21, 27.13s/it][A
 80%|████████  | 8/10 [03:36<00:54, 27.14s/it][A
 90%|█████████ | 9/10 [04:03<00:27, 27.10s/it][A
100%|██████████| 10/10 [04:30<00:00, 27.10s/it][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:08<01:19,  8.81s/it][A
 20%|██        | 2/10 [00:17<01:11,  8.91s/it][A
 30%|███       | 3/10 [00:26<01:02,  8.88s/it][A
 40%|████      | 4/10 [00:35<00:53,  8.87s/it][A
 50%|█████     | 5/10 [00:44<00:44,  8.87s/it][A
 60%|██████    | 6/10 [00:53<00:35,  8.86s/it][A
 70%|███████   | 7/1

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
100%|██████████| 10/10 [00:00<00:00, 87.73it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:27<04:03, 27.01s/it][A
 20%|██        | 2/10 [00:54<03:36, 27.08s/it][A
 30%|███       | 3/10 [01:21<03:09, 27.07s/it][A
 40%|████      | 4/10 [01:48<02:42, 27.11s/it][A
 50%|█████     | 5/10 [02:15<02:15, 27.12s/it][A
 60%|██████    | 6/10 [02:42<01:48, 27.09s/it][A
 70%|███████   | 7/10 [03:09<01:21, 27.11s/it][A
 80%|████████  | 8/10 [03:36<00:54, 27.12s/it][A
 90%|█████████ | 9/10 [04:03<00:27, 27.14s/it][A
100%|██████████| 10/10 [04:31<00:00, 27.11s/it][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:08<01:20,  8.92s/it][A
 20%|██        | 2/10 [00:17<01:11,  8.90s/it][A
 30%|███       | 3/10 [00:26<01:02,  8.89s/it][A
 40%|████      | 4/10 [00:35<00:53,  8.89s/it][A
 50%|█████     | 5/10 [00:44<00:44,  8.93s/it][A
 60%|██████    | 6/10 [00:53<00:35,  8.91s/it][A
 70%|███████   | 7/1

Start Training...
ensemble size: 1
RCANet(
  (enc1): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (2): LinearBlock(
        (linear): Linear(in_features=50, out_features=128, bias=False)
        (act_layer): Identity()
      )
    )
  )
  (enc2): MLPnet(
    (network): Sequential(
      (0): LinearBlock(
        (linear): Linear(in_features=33, out_features=100, bias=False)
        (act_layer): LeakyReLU(negative_slope=0.01)
        (dropout_layer): Dropout(p=0.5, inplace=False)
      )
      (1): LinearBlock(
        (linear): Linear(in_features=100, out_features=50, bias=Fals


  0%|          | 0/10 [00:00<?, ?it/s][A
100%|██████████| 10/10 [00:00<00:00, 87.00it/s][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:27<04:03, 27.09s/it][A
 20%|██        | 2/10 [00:54<03:37, 27.13s/it][A
 30%|███       | 3/10 [01:21<03:09, 27.13s/it][A
 40%|████      | 4/10 [01:48<02:42, 27.16s/it][A
 50%|█████     | 5/10 [02:15<02:15, 27.18s/it][A
 60%|██████    | 6/10 [02:42<01:48, 27.18s/it][A
 70%|███████   | 7/10 [03:10<01:21, 27.14s/it][A
 80%|████████  | 8/10 [03:37<00:54, 27.17s/it][A
 90%|█████████ | 9/10 [04:04<00:27, 27.19s/it][A
100%|██████████| 10/10 [04:31<00:00, 27.17s/it][A

  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:08<01:19,  8.80s/it][A
 20%|██        | 2/10 [00:17<01:10,  8.85s/it][A
 30%|███       | 3/10 [00:26<01:01,  8.85s/it][A
 40%|████      | 4/10 [00:35<00:53,  8.89s/it][A
 50%|█████     | 5/10 [00:44<00:44,  8.88s/it][A
 60%|██████    | 6/10 [00:53<00:35,  8.87s/it][A
 70%|███████   | 7/1

Lifelong ROC-AUC: 0.3282630514484718, BWT: -0.020036791655979554, FWT: 0.20085286353009826





In [None]:
R_naive = evaluation_protocol(T, E, Y, RCA(epochs=10), strategy="naive")
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

In [None]:
R_replay = evaluation_protocol(T, E, Y, RCA(epochs=10), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")

# RDP

In [None]:
from deepod.models.tabular import RDP

In [12]:
R_hm = evaluation_protocol(T, E, Y, RDP(epochs=10), strategy="hierarchical", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_hm)}, BWT: {BWT(R_hm)}, FWT: {FWT(R_hm)}")

Evaluating using hierarchical strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=33, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000000, time: 0.7s
epoch 10, training loss: 1.182584, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 1/1 [00:00<00:00, 741.31it/s]

testing:   0%|          | 0/30715 [00:00<?, ?it/s][A
testing:   0%|          | 103/30715 [00:00<00:29, 1025.34it/s][A
testing:   1%|          | 206/30715 [00:00<00:29, 1024.82it/s][A
testing:   1%|          | 310/30715 [00:00<00:29, 1029.32it/s][A
testing:   1%|▏         | 414/30715 [00:00<00:29, 1032.73it/s][A
testing:   2%|▏         | 519/30715 [00:00<00:29, 1035.58it/s][A
testing:   2%|▏         | 624/30715 [00:00<00:28, 1038.30it/s][A
testing:   2%|▏         | 728/30715 [00:00<00:28, 1035.68it/s][A
testing:   3%|▎         | 833/30715 [00:00<00:28, 1037.18it/s][A
testing:   3%|▎         | 938/30715 [00:00<00:28, 1039.16it/s][A
testing:   3%|▎         | 1042/30715 [00:01<00:28, 1039.01it/s][A
testing:   4%|▎         | 1147/30715 [00:01<00:28, 1040.47it/s][A
testing:   4%|▍         | 1252/30715 [00:01<00:28, 1042.12it/s][A
testing:   4%|▍         | 1357/30715 [00:01<00:28, 1044.23it/s][A
testing:   5%|▍         | 

drift: 398.8568904220632
Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=33, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000000, time: 0.0s
epoch 10, training loss: 62.041412, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 1/1 [00:00<00:00, 261.49it/s]

testing:   0%|          | 0/30715 [00:00<?, ?it/s][A
testing:   0%|          | 65/30715 [00:00<00:47, 639.56it/s][A
testing:   0%|          | 130/30715 [00:00<00:47, 638.62it/s][A
testing:   1%|          | 197/30715 [00:00<00:46, 651.88it/s][A
testing:   1%|          | 263/30715 [00:00<00:47, 645.88it/s][A
testing:   1%|          | 331/30715 [00:00<00:46, 654.91it/s][A
testing:   1%|▏         | 398/30715 [00:00<00:45, 659.84it/s][A
testing:   2%|▏         | 467/30715 [00:00<00:45, 668.68it/s][A
testing:   2%|▏         | 534/30715 [00:00<00:45, 662.91it/s][A
testing:   2%|▏         | 606/30715 [00:00<00:44, 679.93it/s][A
testing:   2%|▏         | 675/30715 [00:01<00:44, 679.44it/s][A
testing:   2%|▏         | 744/30715 [00:01<00:44, 681.14it/s][A
testing:   3%|▎         | 815/30715 [00:01<00:43, 689.07it/s][A
testing:   3%|▎         | 889/30715 [00:01<00:42, 703.28it/s][A
testing:   3%|▎         | 960/30715 [00:01<0

drift: 198.15502839106085
Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=33, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000000, time: 0.0s
epoch 10, training loss: 76.367249, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 1/1 [00:00<00:00, 728.43it/s]

testing:   0%|          | 0/30715 [00:00<?, ?it/s][A
testing:   0%|          | 103/30715 [00:00<00:29, 1027.76it/s][A
testing:   1%|          | 207/30715 [00:00<00:29, 1030.41it/s][A
testing:   1%|          | 312/30715 [00:00<00:29, 1035.68it/s][A
testing:   1%|▏         | 417/30715 [00:00<00:29, 1040.10it/s][A
testing:   2%|▏         | 522/30715 [00:00<00:29, 1040.87it/s][A
testing:   2%|▏         | 627/30715 [00:00<00:28, 1043.09it/s][A
testing:   2%|▏         | 732/30715 [00:00<00:28, 1037.22it/s][A
testing:   3%|▎         | 836/30715 [00:00<00:28, 1037.08it/s][A
testing:   3%|▎         | 941/30715 [00:00<00:28, 1038.44it/s][A
testing:   3%|▎         | 1045/30715 [00:01<00:28, 1029.93it/s][A
testing:   4%|▎         | 1150/30715 [00:01<00:28, 1033.98it/s][A
testing:   4%|▍         | 1255/30715 [00:01<00:28, 1038.18it/s][A
testing:   4%|▍         | 1360/30715 [00:01<00:28, 1040.71it/s][A
testing:   5%|▍         | 

drift: 377.90543069776226
Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=33, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000000, time: 0.0s
epoch 10, training loss: 104.846001, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 1/1 [00:00<00:00, 344.64it/s]

testing:   0%|          | 0/30715 [00:00<?, ?it/s][A
testing:   0%|          | 71/30715 [00:00<00:43, 703.85it/s][A
testing:   0%|          | 142/30715 [00:00<00:44, 686.23it/s][A
testing:   1%|          | 212/30715 [00:00<00:44, 690.66it/s][A
testing:   1%|          | 282/30715 [00:00<00:44, 680.12it/s][A
testing:   1%|          | 351/30715 [00:00<00:45, 667.96it/s][A
testing:   1%|▏         | 421/30715 [00:00<00:44, 678.34it/s][A
testing:   2%|▏         | 492/30715 [00:00<00:43, 687.82it/s][A
testing:   2%|▏         | 562/30715 [00:00<00:43, 688.96it/s][A
testing:   2%|▏         | 631/30715 [00:00<00:43, 683.88it/s][A
testing:   2%|▏         | 702/30715 [00:01<00:43, 691.32it/s][A
testing:   3%|▎         | 772/30715 [00:01<00:43, 682.34it/s][A
testing:   3%|▎         | 841/30715 [00:01<00:44, 675.83it/s][A
testing:   3%|▎         | 909/30715 [00:01<00:44, 667.57it/s][A
testing:   3%|▎         | 1004/30715 [00:01<

drift: 297.27042816559435
Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=33, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.000000, time: 0.0s
epoch 10, training loss: 37.008541, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 1/1 [00:00<00:00, 737.40it/s]

testing:   0%|          | 0/30715 [00:00<?, ?it/s][A
testing:   0%|          | 104/30715 [00:00<00:29, 1037.34it/s][A
testing:   1%|          | 209/30715 [00:00<00:29, 1040.22it/s][A
testing:   1%|          | 314/30715 [00:00<00:29, 1043.57it/s][A
testing:   1%|▏         | 419/30715 [00:00<00:28, 1045.07it/s][A
testing:   2%|▏         | 524/30715 [00:00<00:28, 1046.65it/s][A
testing:   2%|▏         | 629/30715 [00:00<00:28, 1044.32it/s][A
testing:   2%|▏         | 734/30715 [00:00<00:28, 1045.47it/s][A
testing:   3%|▎         | 840/30715 [00:00<00:28, 1047.94it/s][A
testing:   3%|▎         | 946/30715 [00:00<00:28, 1050.01it/s][A
testing:   3%|▎         | 1052/30715 [00:01<00:28, 1048.92it/s][A
testing:   4%|▍         | 1158/30715 [00:01<00:28, 1049.77it/s][A
testing:   4%|▍         | 1264/30715 [00:01<00:28, 1051.10it/s][A
testing:   4%|▍         | 1370/30715 [00:01<00:27, 1052.44it/s][A
testing:   5%|▍         | 

Lifelong ROC-AUC: 0.2109363524529993, BWT: -0.05559681466074164, FWT: 0.16230645451674172





In [None]:
R_ssf = evaluation_protocol(T, E, Y, RDP(epochs=10), strategy="SSF", memory_size=5000, alpha=0.05)
print(f"Lifelong ROC-AUC: {lifelong_roc_auc(R_ssf)}, BWT: {BWT(R_ssf)}, FWT: {FWT(R_ssf)}")

Evaluating using SSF strategy: 0it [00:00, ?it/s]

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=33, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.074373, time: 0.0s
epoch 10, training loss: 0.000723, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 12/12 [00:00<00:00, 1071.80it/s]

testing:   0%|          | 0/30715 [00:00<?, ?it/s][A
testing:   0%|          | 109/30715 [00:00<00:28, 1085.81it/s][A
testing:   1%|          | 218/30715 [00:00<00:28, 1084.10it/s][A
testing:   1%|          | 327/30715 [00:00<00:28, 1083.12it/s][A
testing:   1%|▏         | 436/30715 [00:00<00:27, 1082.60it/s][A
testing:   2%|▏         | 545/30715 [00:00<00:27, 1080.41it/s][A
testing:   2%|▏         | 654/30715 [00:00<00:27, 1081.14it/s][A
testing:   2%|▏         | 763/30715 [00:00<00:27, 1081.80it/s][A
testing:   3%|▎         | 872/30715 [00:00<00:27, 1080.63it/s][A
testing:   3%|▎         | 981/30715 [00:00<00:27, 1080.36it/s][A
testing:   4%|▎         | 1090/30715 [00:01<00:27, 1081.61it/s][A
testing:   4%|▍         | 1199/30715 [00:01<00:27, 1081.43it/s][A
testing:   4%|▍         | 1309/30715 [00:01<00:27, 1084.13it/s][A
testing:   5%|▍         | 1418/30715 [00:01<00:26, 1085.78it/s][A
testing:   5%|▍        

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=33, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 7.727429, time: 0.0s
epoch 10, training loss: 0.317424, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 12/12 [00:00<00:00, 1066.03it/s]

testing:   0%|          | 0/30715 [00:00<?, ?it/s][A
testing:   0%|          | 109/30715 [00:00<00:28, 1080.31it/s][A
testing:   1%|          | 218/30715 [00:00<00:28, 1075.07it/s][A
testing:   1%|          | 326/30715 [00:00<00:28, 1074.43it/s][A
testing:   1%|▏         | 434/30715 [00:00<00:28, 1073.63it/s][A
testing:   2%|▏         | 542/30715 [00:00<00:28, 1073.82it/s][A
testing:   2%|▏         | 650/30715 [00:00<00:28, 1072.47it/s][A
testing:   2%|▏         | 758/30715 [00:00<00:27, 1072.72it/s][A
testing:   3%|▎         | 866/30715 [00:00<00:27, 1074.61it/s][A
testing:   3%|▎         | 974/30715 [00:00<00:27, 1076.25it/s][A
testing:   4%|▎         | 1082/30715 [00:01<00:27, 1076.98it/s][A
testing:   4%|▍         | 1190/30715 [00:01<00:27, 1077.14it/s][A
testing:   4%|▍         | 1298/30715 [00:01<00:27, 1077.88it/s][A
testing:   5%|▍         | 1407/30715 [00:01<00:27, 1079.83it/s][A
testing:   5%|▍        

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=33, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 9.356652, time: 0.0s
epoch 10, training loss: 1.400372, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 13/13 [00:00<00:00, 1076.42it/s]

testing:   0%|          | 0/30715 [00:00<?, ?it/s][A
testing:   0%|          | 108/30715 [00:00<00:28, 1076.83it/s][A
testing:   1%|          | 216/30715 [00:00<00:28, 1076.98it/s][A
testing:   1%|          | 324/30715 [00:00<00:28, 1076.44it/s][A
testing:   1%|▏         | 432/30715 [00:00<00:28, 1077.73it/s][A
testing:   2%|▏         | 540/30715 [00:00<00:28, 1077.49it/s][A
testing:   2%|▏         | 648/30715 [00:00<00:27, 1078.11it/s][A
testing:   2%|▏         | 757/30715 [00:00<00:27, 1081.09it/s][A
testing:   3%|▎         | 866/30715 [00:00<00:27, 1083.43it/s][A
testing:   3%|▎         | 975/30715 [00:00<00:27, 1082.37it/s][A
testing:   4%|▎         | 1084/30715 [00:01<00:27, 1082.61it/s][A
testing:   4%|▍         | 1193/30715 [00:01<00:27, 1082.57it/s][A
testing:   4%|▍         | 1302/30715 [00:01<00:27, 1080.06it/s][A
testing:   5%|▍         | 1411/30715 [00:01<00:27, 1079.09it/s][A
testing:   5%|▍        

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=33, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 11.480359, time: 0.0s
epoch 10, training loss: 1.992806, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 13/13 [00:00<00:00, 1051.35it/s]

testing:   0%|          | 0/30715 [00:00<?, ?it/s][A
testing:   0%|          | 108/30715 [00:00<00:28, 1075.16it/s][A
testing:   1%|          | 216/30715 [00:00<00:28, 1075.20it/s][A
testing:   1%|          | 324/30715 [00:00<00:28, 1076.13it/s][A
testing:   1%|▏         | 432/30715 [00:00<00:28, 1075.68it/s][A
testing:   2%|▏         | 540/30715 [00:00<00:28, 1075.36it/s][A
testing:   2%|▏         | 649/30715 [00:00<00:27, 1077.02it/s][A
testing:   2%|▏         | 757/30715 [00:00<00:27, 1077.45it/s][A
testing:   3%|▎         | 866/30715 [00:00<00:27, 1078.48it/s][A
testing:   3%|▎         | 974/30715 [00:00<00:27, 1078.40it/s][A
testing:   4%|▎         | 1082/30715 [00:01<00:27, 1077.31it/s][A
testing:   4%|▍         | 1190/30715 [00:01<00:27, 1075.97it/s][A
testing:   4%|▍         | 1298/30715 [00:01<00:27, 1074.65it/s][A
testing:   5%|▍         | 1406/30715 [00:01<00:27, 1075.79it/s][A
testing:   5%|▍        

Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=33, out_features=100, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): LeakyReLU(negative_slope=0.01)
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 13.060379, time: 0.0s
epoch 10, training loss: 1.052357, time: 0.0s
Start Inference on the training data...



testing: 100%|██████████| 13/13 [00:00<00:00, 1032.10it/s]

testing:   0%|          | 0/30715 [00:00<?, ?it/s][A
testing:   0%|          | 107/30715 [00:00<00:28, 1069.08it/s][A
testing:   1%|          | 214/30715 [00:00<00:28, 1066.79it/s][A
testing:   1%|          | 322/30715 [00:00<00:28, 1070.44it/s][A
testing:   1%|▏         | 430/30715 [00:00<00:28, 1068.44it/s][A
testing:   2%|▏         | 537/30715 [00:00<00:28, 1064.29it/s][A
testing:   2%|▏         | 645/30715 [00:00<00:28, 1068.21it/s][A
testing:   2%|▏         | 753/30715 [00:00<00:27, 1071.34it/s][A
testing:   3%|▎         | 861/30715 [00:00<00:27, 1073.83it/s][A
testing:   3%|▎         | 970/30715 [00:00<00:27, 1076.07it/s][A
testing:   4%|▎         | 1079/30715 [00:01<00:27, 1077.56it/s][A
testing:   4%|▍         | 1188/30715 [00:01<00:27, 1079.70it/s][A
testing:   4%|▍         | 1297/30715 [00:01<00:27, 1079.85it/s][A
testing:   5%|▍         | 1405/30715 [00:01<00:27, 1079.31it/s][A
testing:   5%|▍        

Lifelong ROC-AUC: 0.12294627241713126, BWT: 0.03032751156809607, FWT: 0.013198038793983985





In [None]:
R_naive = evaluation_protocol(T, E, Y, RDP(epochs=10), strategy="naive")
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_naive)}, BWT: {BWT(R_naive)}, FWT: {FWT(R_naive)}")

In [None]:
R_replay = evaluation_protocol(T, E, Y, RDP(epochs=10), strategy="replay", replay_buffer_size=5000)
print(f"Lifelong PR-AUC: {lifelong_roc_auc(R_replay)}, BWT: {BWT(R_replay)}, FWT: {FWT(R_replay)}")