# This example shows a pre-trained Mnist10 + MLP benchmark setting and evaluates Influence Function (CG) algorithm by LDS.

[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/carolinef35/dattri/blob/colab_examples/examples/quickstart/influence_function_lds.ipynb)

Note: The installation block in the notebook is specifically designed for Google Colab and the use cases in this notebook. Standard installation instructions can me found in the [README](https://github.com/TRAIS-Lab/dattri/blob/main/README.md#quick-start).

In [None]:
!pip install dattri

Collecting dattri
  Downloading dattri-0.2.0-py3-none-any.whl.metadata (12 kB)
Collecting pretty-midi (from dattri)
  Downloading pretty_midi-0.2.11.tar.gz (5.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m77.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting mido>=1.1.16 (from pretty-midi->dattri)
  Downloading mido-1.3.3-py3-none-any.whl.metadata (6.4 kB)
Downloading dattri-0.2.0-py3-none-any.whl (173 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m173.9/173.9 kB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading mido-1.3.3-py3-none-any.whl (54 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.6/54.6 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: pretty-midi
  Building wheel for pretty-midi (setup.py) ... [?25l[?25hdone
  Created wheel for pretty-midi: filename=pretty_midi-0.2.11-py3-none-any.whl s

Import libraries needed to run code.

Note: If ""----- WARNING: CUDA devices not detected. This will cause the model to run very slow! -----" message appears, change your runtime type to GPU by going to Runtime -> Change runtime type and selecting 'GPU'.

In [None]:
import argparse

import torch
from torch import nn
from torch.utils.data import DataLoader

from dattri.algorithm.influence_function import IFAttributorCG
from dattri.benchmark.load import load_benchmark
from dattri.metric import lds
from dattri.task import AttributionTask

LDS Score: used to evaluate the overall performance of a data attribution method.

* A score near 1 means the attribution method accurately predicts the model's response to data changes



In [None]:

# initialize argument parser to handle command-line arguments
parser = argparse.ArgumentParser()
# dynamically set device to 'cuda' if available, otherwise 'cpu'
parser.add_argument("--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu")
args = parser.parse_args([])

# download the pre-trained benchmark
# includes some trained model and ground truth
model_details, groundtruth = load_benchmark(
    model="mlp", dataset="mnist", metric="lds"
)

# define a functional loss function 'f' that calculates CrossEntropyLoss
# takes model parameters and a data-target pair (image, label) as input
def f(params, data_target_pair):
    image, label = data_target_pair
    loss = nn.CrossEntropyLoss()
    # apply the model with given parameters to the image.
    yhat = torch.func.functional_call(model_details["model"], params, image)
    return loss(yhat, label.long())

# initialize the AttributionTask with the model, loss function, and model checkpoints
task = AttributionTask(
    model=model_details["model"].to(args.device),
    loss_func=f,
    checkpoints=model_details["models_full"][0],  # use one full model checkpoint
)

# initialize the IFAttributorCG (Influence Function Attributor using Conjugate Gradient)
# requires the task, device, regularization parameter, and max iterations
attributor = IFAttributorCG(
    task=task, device=args.device, regularization=5e-3, max_iter=10
)
# cache the training data using a DataLoader
# pre-processes/stores training data for attribution
attributor.cache(
    DataLoader(
        model_details["train_dataset"],
        batch_size=5000,
        sampler=model_details["train_sampler"],
    )
)

# perform attribution without gradient calculation (inference mode)
with torch.no_grad():
    # calculate influence scores of training data on test data
    score = attributor.attribute(
        DataLoader(
            model_details["train_dataset"],
            batch_size=5000,
            sampler=model_details["train_sampler"],
        ),
        DataLoader(
            model_details["test_dataset"],
            batch_size=5000,
            sampler=model_details["test_sampler"],
        ),
    )

# calculate the LDS score
lds_score = lds(score, groundtruth)[0]
# print the mean of the non-null LDS scores
print("lds:", torch.mean(lds_score[~torch.isnan(lds_score)]))

100%|██████████| 9.91M/9.91M [00:00<00:00, 14.6MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 482kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 4.52MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 9.62MB/s]
calculating gradient of training set...:   0%|          | 0/1 [00:00<?, ?it/s]
calculating gradient of test set...:   0%|          | 0/1 [00:00<?, ?it/s][A
calculating gradient of test set...: 100%|██████████| 1/1 [00:02<00:00,  2.28s/it][A
  spearmanr(sum_scores[:, i], gt_values[:, i]).correlation,


lds: tensor(0.2594)


  spearmanr(sum_scores[:, i], gt_values[:, i]).pvalue,
