<a href="https://colab.research.google.com/github/TamHoaVo/ML-Project/blob/main/NCF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import required libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import numpy as np
import random
from sklearn.model_selection import train_test_split
from itertools import product
import pandas as pd
from scipy.stats import entropy
from IPython.display import display
import matplotlib.pyplot as plt
from collections import defaultdict
import copy
from scipy.spatial.distance import euclidean
from scipy.special import kl_div

# TESTING ONLY
#random.seed(42)
#np.random.seed(42)
#torch.manual_seed(42)

# Use (cuda) GPU if available, otherwise use CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Data Loading and Preprocessing
train_df = pd.read_csv('train1.csv') # Load training and test files
test_df = pd.read_csv('test1.csv')   # Load training and test files
# Rename 'item_id' column to 'item' in the training data
train_df = train_df.rename(columns={'item_id': 'item'})
# Rename 'movieId' column to 'item' in the test data so both datasets have the same column name
test_df = test_df.rename(columns={'movieId': 'item'})
# Combine training and test data into one DataFrame
full_df = pd.concat([train_df, test_df], ignore_index=True)

# Create a dictionary that maps each unique user to a unique number
user_mapping = {u: idx for idx, u in enumerate(full_df['tag'].unique())}
# Create a dictionary that maps each unique item to a unique number
item_mapping = {i: idx for idx, i in enumerate(full_df['item'].unique())}

# Convert original user tags to numeric user IDs using the user_mapping
full_df['user_id'] = full_df['tag'].map(user_mapping)
# Convert original item names to numeric item IDs using the item_mapping
full_df['item_id'] = full_df['item'].map(item_mapping)
# Convert ratings into binary labels (1 if rating ≥ 4, else 0)
full_df['label'] = (full_df['targets'] >= 4).astype(int)

sample_df = full_df.sample(n=10000, random_state=42) # Randomly pick 10,000 rows from the full dataset

# Split users into retained and targeted
interactions = list(zip(sample_df['user_id'], sample_df['item_id'], sample_df['label'])) # Make a list of (user, item, label) for training

# Flip labels (1 -> 0, 0 -> 1) only for interactions by target users
def flip_user_labels(interactions, target_users):
    return [(u, i, 1 - l) if u in target_users else (u, i, l) for u, i, l in interactions]

# Compute FlipRec loss
def compute_fliprec_loss(preds, labels, teacher_preds, users, target_users, lambda_general=0.5, lambda_target=0.0):
    bce = nn.BCELoss(reduction='none')(preds, labels) # Binary classification loss
    mse = F.mse_loss(preds, teacher_preds, reduction='none') # Mean squared error
    # Mark users as 1 if in target group, 0 otherwise
    is_target = torch.tensor([u.item() in target_users for u in users], device=preds.device, dtype=torch.float)
    lambdas = is_target * lambda_target + (1 - is_target) * lambda_general  # Use different weights for target and non-target users
    return (bce + lambdas * mse).mean()  # Final loss: BCE + weighted MSE

def split_users(interactions, target_fraction=0.05):
    users = list(set(u for u, _, _ in interactions))   # Extract all unique user IDs from the interaction data
    np.random.shuffle(users)  # Shuffle the user list randomly to ensure random selection of target users
    cutoff = int(len(users) * target_fraction) # Calculate how many users should be in the targeted group
    return set(users[:cutoff]), set(users[cutoff:]) # returns set - First 'cutoff' users are considered targeted users & Remaining user are retained

# Ensure non-empty targeted and retained groups
target_users, retained_users = split_users(interactions) # Split users into two groups
sample_df['user_group'] = sample_df['user_id'].apply(lambda u: 'Targeted' if u in target_users else 'Retained') # Label each row in the dataset

# Create two separate DataFrames: one for retained users, one for targeted users
retained_df = sample_df[sample_df['user_group'] == 'Retained']
targeted_df = sample_df[sample_df['user_group'] == 'Targeted']

# Convert the retained and targeted DataFrames into interaction tuples
retained_interactions = list(zip(retained_df['user_id'], retained_df['item_id'], retained_df['label']))
targeted_interactions = list(zip(targeted_df['user_id'], targeted_df['item_id'], targeted_df['label']))

# Combine retained and targeted interactions into a single list
all_interactions = retained_interactions + targeted_interactions

# Get total number of users and items
num_users = sample_df['user_id'].max() + 1
num_items = sample_df['item_id'].max() + 1

print(f"Retained Users: {len(retained_users)}, Targeted Users: {len(target_users)}")
print(f"Retained Interactions: {len(retained_interactions)}, Targeted Interactions: {len(targeted_interactions)}")
print(f"Number of users: {num_users}, Number of items: {num_items}")
print(f"Total interactions: {len(interactions)}")

# Calculate AUC(Area Under ROC Curve)
def calculate_auc(predictions, labels):
    from sklearn.metrics import roc_auc_score
    return roc_auc_score(labels, predictions)

# Calculate HR (Hit Ratio)
def calculate_hr(predictions, labels, users, items, k=10):
    user_item_scores = defaultdict(list)
    for u, i, p, l in zip(users, items, predictions, labels):
        user_item_scores[u].append((p, l))
    hits = 0
    total = 0
    for user, scores in user_item_scores.items():
        if len(scores) < k:
            continue
        sorted_scores = sorted(scores, key=lambda x: x[0], reverse=True)[:k]
        if any(label == 1 for _, label in sorted_scores):
            hits += 1
        total += 1
    return hits / total if total > 0 else 0

#Calculate NDCG (Normalized Discounted Cumulative Gain)
def calculate_ndcg(predictions, labels, users, items, k=10):
    user_item_scores = defaultdict(list)
    for u, i, p, l in zip(users, items, predictions, labels):
        user_item_scores[u].append((p, l))
    ndcg_total = 0
    total = 0
    for user, scores in user_item_scores.items():
        if len(scores) < k:
            continue
        sorted_scores = sorted(scores, key=lambda x: x[0], reverse=True)[:k]
        ideal_scores = sorted(scores, key=lambda x: x[1], reverse=True)[:k]
        dcg = sum([l / np.log2(idx + 2) for idx, (_, l) in enumerate(sorted_scores)])
        idcg = sum([l / np.log2(idx + 2) for idx, (_, l) in enumerate(ideal_scores)])
        if idcg > 0:
            ndcg_total += dcg / idcg
            total += 1
    return ndcg_total / total if total > 0 else 0

# NCF Model Definition
class NCF(nn.Module):
    def __init__(self, num_users, num_items):
        super(NCF, self).__init__()
        self.user_embed = nn.Embedding(num_users, 32)
        self.item_embed = nn.Embedding(num_items, 32)

       # Define the MLP (Multi-Layer Perceptron) for combining user and item embeddings
        self.mlp = nn.Sequential(
            nn.Linear(64, 32),
            nn.ReLU(),   # Apply ReLU activation for non-linearity
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU(),
            nn.Linear(8, 1),
            nn.Sigmoid()    # Sigmoid to squash output between 0 and 1 (for binary classification)
        )

    def forward(self, user, item):
        user_emb = self.user_embed(user)
        item_emb = self.item_embed(item)
        x = torch.cat([user_emb, item_emb], dim=1)
        return self.mlp(x)

class InteractionDataset(Dataset):
    def __init__(self, user_item_label, num_users, num_items):
        self.data = user_item_label # Store the list of (user, item, label) tuples

          # Store the total number of users and items for boundary checks
        self.num_users = num_users
        self.num_items = num_items

    def __len__(self):
        return len(self.data) # Return how many interactions are in the dataset

    def __getitem__(self, idx):
        user, item, label = self.data[idx]    # Get the user, item, and label at the given index

        # Index doesn't go out of bounds
        user = min(user, self.num_users - 1)
        item = min(item, self.num_items - 1)

        # Return user and item as long tensors, label as float tensor
        return torch.tensor(user, dtype=torch.long), torch.tensor(item, dtype=torch.long), torch.tensor(label, dtype=torch.float)

# Train the model using FlipRec loss
def train_model_with_params(model, original_model, train_data, target_users, device, lr, bs, lambda_general=0.5, num_epochs=2):
   # Check if training data is empty
    if len(train_data) == 0:
      raise ValueError("Training data is empty. Cannot proceed with training.")

        # Create a DataLoader to load training data in batches
    loader = DataLoader(
        InteractionDataset(train_data, model.user_embed.num_embeddings, model.item_embed.num_embeddings),
        batch_size=bs,
        shuffle=True
    )

       # Initialize optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
     # Move models to device (GPU or CPU)
    model.to(device)
    original_model.to(device)
    original_model.eval()

      # Dictionary to store metrics per epoch
    metric_results = {"AUC": [], "HR@5": [], "HR@10": [], "HR@20": [], "NDCG@5": [], "NDCG@10": [], "NDCG@20": []}

      # Training loop for each epoch
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        predictions = []
        true_labels = []
        user_list = []
        item_list = []

         # Loop through mini-batches
        for users, items, labels in loader:
            users, items, labels = users.to(device), items.to(device), labels.to(device)
            optimizer.zero_grad()
            preds = model(users, items).squeeze()           # Model predictions
            # Get predictions from teacher model
            with torch.no_grad():
                teacher_preds = original_model(users, items).squeeze()
                # Compute FlipRec loss
            loss = compute_fliprec_loss(preds, labels, teacher_preds, users, target_users, lambda_general)
            loss.backward()
            optimizer.step()

            # Track loss and outputs for evaluation
            total_loss += loss.item()
            predictions.extend(preds.cpu().detach().numpy())
            true_labels.extend(labels.cpu().detach().numpy())
            user_list.extend(users.cpu().numpy())
            item_list.extend(items.cpu().numpy())

             # Calculate metrics after each epoch
        auc = calculate_auc(predictions, true_labels)
        hr5 = calculate_hr(predictions, true_labels, user_list, item_list, k=5)
        hr10 = calculate_hr(predictions, true_labels, user_list, item_list, k=10)
        hr20 = calculate_hr(predictions, true_labels, user_list, item_list, k=20)
        ndcg5 = calculate_ndcg(predictions, true_labels, user_list, item_list, k=5)
        ndcg10 = calculate_ndcg(predictions, true_labels, user_list, item_list, k=10)
        ndcg20 = calculate_ndcg(predictions, true_labels, user_list, item_list, k=20)

        # Save metrics for this epoch
        metric_results["AUC"].append(auc)
        metric_results["HR@5"].append(hr5)
        metric_results["HR@10"].append(hr10)
        metric_results["HR@20"].append(hr20)
        metric_results["NDCG@5"].append(ndcg5)
        metric_results["NDCG@10"].append(ndcg10)
        metric_results["NDCG@20"].append(ndcg20)

        print(f"Epoch {epoch+1}, Loss: {total_loss/len(loader):.4f}, AUC: {auc:.4f}, HR@5: {hr5:.4f}, HR@10: {hr10:.4f}, HR@20: {hr20:.4f}, NDCG@5: {ndcg5:.4f}, NDCG@10: {ndcg10:.4f}, NDCG@20: {ndcg20:.4f}")

         # Return dictionary of metrics across all epochs
    return metric_results

# Unlearning Methods Implementation
def retrained_baseline(interactions, num_users, num_items, target_users, lr, bs, device, num_epochs=2):
    """Retrains the model from scratch after removing targeted user data (only when training on retained users)."""

    # Do NOT filter if we're already training on targeted users only
    user_ids = set(u for u, _, _ in interactions)
    if user_ids.issubset(target_users):
        remaining_data = interactions # Keep all data if all users are targeted
    else:
        remaining_data = [(u, i, l) for u, i, l in interactions if u not in target_users] # Otherwise, remove interactions from target users

        # If no data is left, raise an error to avoid crashing later
    if len(remaining_data) == 0:
        raise ValueError("Training data is empty. Cannot proceed with training.")

         # Initialize a new model and create a copy as the teacher/original model
    model = NCF(num_users, num_items)
    original_model = copy.deepcopy(model)
      # Train the model using the filtered data
    return train_model_with_params(model, original_model, remaining_data, target_users, device, lr, bs, lambda_general=0.5, num_epochs=num_epochs)


def sisa(interactions, num_users, num_items, target_users, lr, bs, device, num_epochs=2, num_shards=5):
    """Implements the SISA method with submodel training."""
    random.shuffle(interactions)  # Randomly shuffle the interactions
    shards = [interactions[i::num_shards] for i in range(num_shards)]  # Divide interactions into 'num_shards' equal slices (shards)
    submodels = []
     # Train a separate model for each shard
    for shard in shards:
        model = NCF(num_users, num_items)
        original_model = copy.deepcopy(model)
        submodels.append(train_model_with_params(model, original_model, shard, target_users, device, lr, bs, lambda_general=0.5, num_epochs=num_epochs))
    return submodels

def receraser(interactions, num_users, num_items, target_users, lr, bs, device, num_epochs=2, num_shards=5):
    """Implements RecEraser by grouping shards based on user-item similarity."""
    # Group all interactions by use
    interactions_by_user = {}
    for u, i, l in interactions:
        interactions_by_user.setdefault(u, []).append((i, l))

         # Split users into 'num_shards' groups (shards)
    shards = [list(interactions_by_user.keys())[i::num_shards] for i in range(num_shards)]
    submodels = []
    # Each shard (group of users), collect all their interactions and train a submodel
    for shard in shards:
       # Collect (user, item, label) data for all users in the shard
        shard_data = [(u, i, l) for u in shard for i, l in interactions_by_user[u]]
         # Initialize a new model and copy it as the original (teacher
        model = NCF(num_users, num_items)
        original_model = copy.deepcopy(model)

        # Train the model on this shard's data
        submodels.append(train_model_with_params(
            model=model,
            original_model=original_model,
            train_data=shard_data,
            target_users=target_users,
            device=device,
            lr=lr,
            bs=bs,
            lambda_general=0.5,
            num_epochs=num_epochs
        ))
    return submodels

def badt(interactions, num_users, num_items, target_users, lr, bs, device, num_epochs=2):
    """Implements the BadT method using student-teacher models."""
     #Initialize a teacher model and set it to evaluation mode
    teacher_model = NCF(num_users, num_items)
    teacher_model.eval()
       # Initialize a student model to be trained
    student_model = NCF(num_users, num_items)
    # Set up optimizer for the student model
    optimizer = torch.optim.Adam(student_model.parameters(), lr=lr)
    # Move both models to the appropriate device (GPU or CPU)
    student_model.to(device)
    teacher_model.to(device)

    # Train the student model to mimic the teacher
    for epoch in range(num_epochs):
        student_model.train()  # Set student model to training mode

        # Create a DataLoader for batching the interactions
        loader = DataLoader(
            InteractionDataset(interactions, num_users, num_items),
            batch_size=bs,
            shuffle=True
        )
        # Train the student on each batch
        for users, items, labels in loader:
            users, items, labels = users.to(device), items.to(device), labels.to(device)
            optimizer.zero_grad()  # Clear gradients before backprop

            # Forward pass: student model predictions
            student_preds = student_model(users, items).squeeze()
            with torch.no_grad():
                teacher_preds = teacher_model(users, items).squeeze()

                   # Compute custom loss (FlipRec)
            loss = compute_fliprec_loss(student_preds, labels, teacher_preds, users, target_users)
             # Backpropagation and optimization step
            loss.backward()
            optimizer.step()

    # Do NOT filter out targeted users if we're evaluating on them
    user_ids = set(u for u, _, _ in interactions)
    if user_ids.issubset(target_users):
        final_data = interactions # Keep all interactions if all users are targeted
    else:
        # remove interactions from target users
        final_data = [(u, i, l) for u, i, l in interactions if u not in target_users]

        # If no data is left after filtering, raise an error
    if len(final_data) == 0:
        raise ValueError("Training data is empty. Cannot proceed with training.")

           # Fine-tune the student model on non-target users
    return train_model_with_params(
        model=student_model,    # The trained student model
        original_model=student_model,
        train_data=final_data,
        target_users=set(),
        device=device,
        lr=lr,
        bs=bs,
        lambda_general=0.5,
        num_epochs=num_epochs
    )


def ermax(interactions, num_users, num_items, target_users, lr, bs, device, num_epochs=2):
    """Implements the erMax method to maximize errors on targeted users."""

    # Add noise by flipping labels for targeted users
    noisy_data = [(u, i, random.randint(0, 1)) if u in target_users else (u, i, l) for u, i, l in interactions]

    model = NCF(num_users, num_items)
    original_model = copy.deepcopy(model)

    # Phase 1: Train with noise
    train_model_with_params(
        model=model,
        original_model=original_model,
        train_data=noisy_data,
        target_users=target_users,
        device=device,
        lr=lr,
        bs=bs,
        lambda_general=1.0,
        num_epochs=num_epochs
    )

    # Prepare clean data for fine-tuning
    user_ids = set(u for u, _, _ in interactions)

    if user_ids.issubset(target_users):
        cleaned_data = interactions  # If all are targeted, do not filter
    else:
        cleaned_data = [(u, i, l) for u, i, l in interactions if u not in target_users]

    if len(cleaned_data) == 0:
        raise ValueError("Training data is empty after filtering targeted users.")

    return train_model_with_params(
        model=model,
        original_model=model,
        train_data=cleaned_data,
        target_users=set(),
        device=device,
        lr=lr,
        bs=bs,
        lambda_general=0.5,
        num_epochs=num_epochs
    )


def fliprec(interactions, num_users, num_items, target_users, lr, bs, device, lambda_general=0.5, lambda_target=0.0, num_epochs=2):
    """Train FlipRec using student-teacher framework with per-user loss adjustment."""

    # Label flip for targeted users
    flipped_data = [(u, i, 1 - l) if u in target_users else (u, i, l) for u, i, l in interactions]

    # Contextual User Augmentation (Dtrs)
    item_to_targeted_users = defaultdict(set)
    user_item_pos = defaultdict(set)

    for u, i, l in interactions:
        if u in target_users:
            item_to_targeted_users[i].add(u)
        if l == 1:
            user_item_pos[u].add(i)

    contextual_users = set()
    for i in item_to_targeted_users:
        for u in user_item_pos:
            if u not in target_users and i in user_item_pos[u]:
                contextual_users.add(u)

    contextual_data = []
    for u in contextual_users:
        shared_items = [i for i in user_item_pos[u] if i in item_to_targeted_users]
        positives = [(u, i, 1) for i in shared_items]
        negatives = [(u, random.randint(0, num_items - 1), 0) for _ in shared_items]
        contextual_data.extend(positives + negatives)

    # Combine Dflipf + Dtrs
    final_data = flipped_data + contextual_data

    # Setup model and teacher
    model = NCF(num_users, num_items)
    teacher = copy.deepcopy(model)

    # Train using modified train function with KD support
    return train_model_with_params(
        model=model,
        original_model=teacher,
        train_data=final_data,
        target_users=target_users,
        device=device,
        lr=lr,
        bs=bs,
        lambda_general=lambda_general,
        num_epochs=num_epochs
    )



# Prepare separate result holders
all_results_ret = []
all_results_tgt = []

# Evaluate each method on both retained and targeted interactions
methods = [
    ("Retrained", retrained_baseline),
    ("SISA", sisa),
    ("RecEraser", receraser),
    ("BadT", badt),
    ("erMax", ermax),
    ("FlipRec", fliprec)
]

for method_name, method_func in methods:
    print(f"Running {method_name} on Retained users...")
    method_results_ret = method_func(
        interactions=retained_interactions, # Use only retained user data
        num_users=num_users,
        num_items=num_items,
        target_users=target_users, # Provide list of users to unlearn
        lr=0.00015,
        bs=64,
        device=device
    )
    all_results_ret.append((method_name, method_results_ret)) # Save results

    print(f"Running {method_name} on Targeted users...")
    method_results_tgt = method_func(
        interactions=targeted_interactions, # Use only targeted user data
        num_users=num_users,
        num_items=num_items,
        target_users=target_users,  # Same target users for consistency
        lr=0.00015,
        bs=64,
        device=device
    )
    all_results_tgt.append((method_name, method_results_tgt)) # Save results

# Collect Results in a DataFrame (split by group)
def collect_groupwise_results(methods, all_results_ret, all_results_tgt):
    retained_data = []
    targeted_data = []
    for i, (method_name, _) in enumerate(methods):
        results_ret = all_results_ret[i][1]
        results_tgt = all_results_tgt[i][1]

        def extract_metrics(results):
            if isinstance(results, list):
                return [
                    np.mean([res[metric][-1] for res in results])
                    for metric in ["AUC", "HR@5", "HR@10", "HR@20", "NDCG@5", "NDCG@10", "NDCG@20"]
                ]
            else:
                return [
                    results["AUC"][-1], results["HR@5"][-1], results["HR@10"][-1],
                    results["HR@20"][-1], results["NDCG@5"][-1], results["NDCG@10"][-1], results["NDCG@20"][-1]
                ]

        retained_metrics = extract_metrics(results_ret)
        targeted_metrics = extract_metrics(results_tgt)

        retained_data.append([method_name] + retained_metrics)
        targeted_data.append([method_name] + targeted_metrics)

    columns = ["Method", "AUC", "HR@5", "HR@10", "HR@20", "NDCG@5", "NDCG@10", "NDCG@20"]
    df_retained = pd.DataFrame(retained_data, columns=columns)
    df_targeted = pd.DataFrame(targeted_data, columns=columns)
    print("\nRetained Results Summary:")
    display(df_retained)
    print("\nTargeted Results Summary:")
    display(df_targeted)
    return df_retained, df_targeted

# Generate and display the groupwise results
df_retained, df_targeted = collect_groupwise_results(methods, all_results_ret, all_results_tgt)

# Activation Distance & JS-Divergence Calculation
def calculate_activation_distance_and_js(original_probs, unlearned_probs, users, target_users):
    def safe_probs(p):
        p = np.clip(np.array(p), 1e-9, 1.0)
        return p / np.sum(p)

    distances = {"Targeted": [], "Retained": [], "All": []}
    js_values = {"Targeted": [], "Retained": [], "All": []}

    for p1, p2, u in zip(original_probs, unlearned_preds, users):
        p1 = safe_probs([p1[0], 1 - p1[0]])
        p2 = safe_probs([p2[0], 1 - p2[0]])
        m = 0.5 * (p1 + p2)
        js = 0.5 * (np.sum(kl_div(p1, m)) + np.sum(kl_div(p2, m)))
        dist = euclidean(p1, p2)

        group = "Targeted" if u in target_users else "Retained"
        distances[group].append(dist)
        js_values[group].append(js)
        distances["All"].append(dist)
        js_values["All"].append(js)

    return {
        "Activation": {g: np.mean(distances[g]) for g in distances},
        "JS": {g: np.mean(js_values[g]) for g in js_values},
    }

# Display as Table
def display_effectiveness_table(results_by_method):
    rows = []
    for group in ["Targeted", "Retained", "All"]:
        act_row = [group + " (Activation)"] + [f"{results_by_method[m]['Activation'][group]:.2f}" for m in results_by_method]
        js_row = [group + " (JS-Div)"] + [f"{results_by_method[m]['JS'][group]:.2f}" for m in results_by_method]
        rows.append(act_row)
        rows.append(js_row)
    methods = list(results_by_method.keys())
    df = pd.DataFrame(rows, columns=["Group"] + methods)
    print("Activation Distance & JS-Divergence Summary:")
    display(df)

# Run Activation Distance & JS-Divergence Evaluation
results_by_method = {}

# Use original model trained on full data as the baseline
baseline_model = NCF(num_users, num_items).to(device)
baseline_model.eval() # Set model to evaluation mode (no training)
# Create a DataLoader for the full interaction set
loader = DataLoader(InteractionDataset(interactions, num_users, num_items), batch_size=64)
original_preds = [] # To store original model predictions
user_ids = [] # To store corresponding user IDs

with torch.no_grad():
    for users, items, _ in loader:
        users, items = users.to(device), items.to(device)
        probs = baseline_model(users, items).cpu().numpy()
        original_preds.extend(probs)
        user_ids.extend(users.cpu().numpy())

for method_name, method_func in methods:
    print(f"Evaluating {method_name} model for activation and JS...")
    model = NCF(num_users, num_items).to(device)
    model.eval()
    unlearned_preds = []

    with torch.no_grad():
        for users, items, _ in loader:
            users, items = users.to(device), items.to(device)
            probs = model(users, items).cpu().numpy()
            unlearned_preds.extend(probs)

    metrics = calculate_activation_distance_and_js(original_preds, unlearned_preds, user_ids, target_users)
    results_by_method[method_name] = metrics

display_effectiveness_table(results_by_method)

Using device: cuda
Retained Users: 991, Targeted Users: 52
Retained Interactions: 9495, Targeted Interactions: 505
Number of users: 1084, Number of items: 5032
Total interactions: 10000
Running Retrained on Retained users...
Epoch 1, Loss: 0.6780, AUC: 0.5083, HR@5: 0.8944, HR@10: 0.9930, HR@20: 1.0000, NDCG@5: 0.5278, NDCG@10: 0.5415, NDCG@20: 0.5816
Epoch 2, Loss: 0.6767, AUC: 0.5188, HR@5: 0.8910, HR@10: 0.9930, HR@20: 1.0000, NDCG@5: 0.5254, NDCG@10: 0.5457, NDCG@20: 0.5877
Running Retrained on Targeted users...
Epoch 1, Loss: 0.6641, AUC: 0.4713, HR@5: 0.7941, HR@10: 0.9444, HR@20: 1.0000, NDCG@5: 0.5014, NDCG@10: 0.5140, NDCG@20: 0.5550
Epoch 2, Loss: 0.6636, AUC: 0.4844, HR@5: 0.7647, HR@10: 0.9444, HR@20: 1.0000, NDCG@5: 0.5006, NDCG@10: 0.5296, NDCG@20: 0.5598
Running SISA on Retained users...
Epoch 1, Loss: 0.7106, AUC: 0.4729, HR@5: 0.9145, HR@10: 0.9643, HR@20: 1.0000, NDCG@5: 0.6206, NDCG@10: 0.6317, NDCG@20: 0.4327
Epoch 2, Loss: 0.7063, AUC: 0.4975, HR@5: 0.9402, HR@10: 

Unnamed: 0,Method,AUC,HR@5,HR@10,HR@20,NDCG@5,NDCG@10,NDCG@20
0,Retrained,0.51883,0.890971,0.992982,1.0,0.525365,0.545712,0.5877
1,SISA,0.516354,0.918631,0.977961,1.0,0.619988,0.633829,0.634257
2,RecEraser,0.511214,0.910428,0.988933,0.990909,0.536528,0.535081,0.5618
3,BadT,0.516765,0.902896,0.989474,0.991525,0.538249,0.539153,0.560853
4,erMax,0.512765,0.889267,0.989474,1.0,0.528466,0.534942,0.553491
5,FlipRec,0.516672,0.906303,0.992982,1.0,0.525478,0.531246,0.551982



Targeted Results Summary:


Unnamed: 0,Method,AUC,HR@5,HR@10,HR@20,NDCG@5,NDCG@10,NDCG@20
0,Retrained,0.484427,0.764706,0.944444,1.0,0.5006,0.529619,0.5598
1,SISA,0.484994,0.914286,0.4,0.0,0.596758,0.226481,0.0
2,RecEraser,0.504337,0.749206,0.91,1.0,0.539363,0.531637,0.576668
3,BadT,0.529973,0.852941,0.944444,1.0,0.547069,0.562615,0.649469
4,erMax,0.491129,0.852941,0.944444,1.0,0.562576,0.520744,0.603578
5,FlipRec,0.573453,0.970588,1.0,1.0,0.778299,0.755246,0.715665


Evaluating Retrained model for activation and JS...
Evaluating SISA model for activation and JS...
Evaluating RecEraser model for activation and JS...
Evaluating BadT model for activation and JS...
Evaluating erMax model for activation and JS...
Evaluating FlipRec model for activation and JS...
Activation Distance & JS-Divergence Summary:


Unnamed: 0,Group,Retrained,SISA,RecEraser,BadT,erMax,FlipRec
0,Targeted (Activation),0.02,0.13,0.23,0.02,0.19,0.08
1,Targeted (JS-Div),0.0,0.0,0.01,0.0,0.01,0.0
2,Retained (Activation),0.02,0.13,0.23,0.02,0.18,0.08
3,Retained (JS-Div),0.0,0.0,0.01,0.0,0.01,0.0
4,All (Activation),0.02,0.13,0.23,0.02,0.19,0.08
5,All (JS-Div),0.0,0.0,0.01,0.0,0.01,0.0
