In [3]:
from clearml import Task

def load_model_from_task(task_id: str, artifact_name: str = "trained Model"):
    """
    Connects to a ClearML Task and retrieves a pickled model artifact (e.g. from a Pipeline step).
    
    Args:
        task_id (str): The ID of the specific task (e.g. the "Model Training" step).
        artifact_name (str): The name of the artifact to retrieve. Defaults to "trained Model" 
                             (matches 'return_values' in the pipeline component).

    Returns:
        The deserialized model object (e.g. LitResNet).
    """
    print(f"Connecting to task: {task_id}")
    task = Task.get_task(task_id=task_id)
    
    if artifact_name in task.artifacts:
        print(f"Found artifact: '{artifact_name}'. Downloading and deserializing...")
        # .get() downloads the pickle and returns the Python object
        return task.artifacts[artifact_name].get()
    else:
        available_artifacts = list(task.artifacts.keys())
        raise ValueError(
            f"Artifact '{artifact_name}' not found in task {task_id}.\n"
            f"Available artifacts: {available_artifacts}"
        )
    
baseline    = load_model_from_task("3cebe0e4059e4fa58c57bd9a650ef7f5", "Baseline Model")
unlearn_ds  = load_model_from_task("ce89c3a50dda4ef1809314d2bce71374", "Test Dataloader")
test_dl     = load_model_from_task("ce89c3a50dda4ef1809314d2bce71374", "Unlearning Dataset")

Connecting to task: 3cebe0e4059e4fa58c57bd9a650ef7f5
Found artifact: 'Baseline Model'. Downloading and deserializing...


Exception 'Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.' encountered when getting artifact with type pickle and content type application/pickle


Connecting to task: ce89c3a50dda4ef1809314d2bce71374
Found artifact: 'Test Dataloader'. Downloading and deserializing...
Connecting to task: ce89c3a50dda4ef1809314d2bce71374
Found artifact: 'Unlearning Dataset'. Downloading and deserializing...


In [2]:
from clearml import Task, PipelineController
import torch

def load_artifacts_from_pipeline(pipeline_id: str):
    """
    Fetches the necessary artifacts (Model, Unlearning Dataset, Test Loader) 
    from a specific ClearML Pipeline execution for finetuning the unlearning step.
    
    Args:
        pipeline_id (str): The ID of the pipeline controller task (from the Web UI).
        
    Returns:
        tuple: (target_model, unlearn_ds, test_loader)
    """
    print(f"Connecting to Pipeline: {pipeline_id}...")
    pipeline_task = Task.get_task(task_id=pipeline_id)
    
    # Get the list of steps (tasks) created by this pipeline
    # The pipeline controller tracks which tasks it created.
    # We look for tasks named "Train Baseline" and "Preprocess Data" 
    # that belong to this pipeline instance.
    
    # 1. Find the Preprocessing Task (outputs datasets)
    preprocess_task = None
    # 2. Find the Training Task (outputs the model)
    train_task = None

    # Iterate through child tasks to find the correct steps
    # Note: ClearML pipelines usually name steps as "StepName.TaskName" or similar, 
    # but strictly we search by the task names defined in your PipelineDecorator.
    for step in pipeline_task.get_pipeline_details().get('steps', []):
        step_name = step['name']
        task_id = step['task_id']
        
        if step_name == "Preprocess Data":
            preprocess_task = Task.get_task(task_id=task_id)
        elif step_name == "Train Baseline":
            train_task = Task.get_task(task_id=task_id)

    if not preprocess_task or not train_task:
        raise ValueError("Could not find 'Preprocess Data' or 'Train Baseline' steps in this pipeline.")

    print(f"Found Preprocess Task: {preprocess_task.id}")
    print(f"Found Training Task: {train_task.id}")

    # --- Load Artifacts ---
    
    # 1. Load Datasets from "Preprocess Data"
    # The artifact names must match the 'return_values' in your @PipelineDecorator.component
    print("Loading 'Unlearning Dataset'...")
    unlearn_ds = preprocess_task.artifacts['Unlearning Dataset'].get()
    
    print("Loading 'Test Dataloader'...")
    test_loader = preprocess_task.artifacts['Test Dataloader'].get()

    # 2. Load Model from "Train Baseline"
    # artifact name matches 'return_values=["Baseline Model", ...]'
    print("Loading 'Baseline Model'...")
    target_model = train_task.artifacts['Baseline Model'].get()

    print("âœ… All artifacts loaded successfully.")
    return target_model, unlearn_ds, test_loader

ID = "dd5a479f9048425481e993af74648693"
_, _ , _ = load_artifacts_from_pipeline(ID)

Connecting to Pipeline: dd5a479f9048425481e993af74648693...


AttributeError: 'Task' object has no attribute 'get_pipeline_details'

In [None]:
import torch

tensor = torch.tensor([[1.0, 2.0, 3.0]])
tensor.shape

f = [8]
f.extend(tensor.squeeze(0).shape)
v = torch.randn(f) # Original vector of size 10
v.shape

torch.Size([8, 1, 3])

In [1]:
from src.data.dataset_loaders import TrainTestDataset, UnlearningDataLoader, UnlearningPairDataset
from torch.utils.data import DataLoader

# Create the dataset
unlearning_train_set = UnlearningPairDataset(
    csv_file="data/mnist_index.csv", 
    root_dir="data/softtarget_dataset/mnist",
    split='test'
)

# Use the custom DataLoader
unlearning_loader = UnlearningDataLoader(
    unlearning_train_set, 
    batch_size=2, 
    shuffle=True
)

Loading dataset index from data/mnist_index.csv...
Loaded Unlearning Dataset (test split using 'f1_split'): 889 Forget samples and 9111 Non-Forget samples.


In [14]:
len(unlearning_loader)

3151

In [15]:
unlearning_loader.dataset.tensor_to_label

{tensor([1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]): '0',
 tensor([0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]): '1',
 tensor([0., 0., 1., 0., 0., 0., 0., 0., 0., 0.]): '2',
 tensor([0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]): '3',
 tensor([0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]): '4',
 tensor([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]): '5',
 tensor([0., 0., 0., 0., 0., 0., 1., 0., 0., 0.]): '6',
 tensor([0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]): '7',
 tensor([0., 0., 0., 0., 0., 0., 0., 0., 1., 0.]): '8',
 tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]): '9'}

In [21]:
ttd = TrainTestDataset(
    csv_file="data/mnist_index.csv", 
    root_dir="data/softtarget_dataset/mnist",
    split='test',
    sample_mode='forget',
    classes=['7', '8', '9']
)



Loading dataset index from data/mnist_index.csv...
Loaded test split (mode='forget', classes=['7', '8', '9']) with 6302 samples.
