# 1. Label Flipping method : 

## 1.1 Scenario I :
Single Client targeted ( client number 1) + verified clients number 0, 1, 2

### 1.1.1 20 clients:

#### 1.1.1.1 20 clients + Fedopt

In [15]:

from sagemaker.huggingface import HuggingFace

role = "arn:aws:iam::711387130895:role/service-role/SageMaker-ExecutionRole-20250525T120088"

# Define hyperparameters that match your argparse arguments
hyperparameters = {
    # --- Dataset and client setup ---
    "dataset_name": "cifar10",
    "num_clients": 20,
    "frac_clients": 1.0,
    "dirichlet_alpha": 0.5,
    "test_size": 0.2,
    "poison_size": 0.35,

    # --- Training parameters ---
    "rounds": 15,
    "local_epochs": 4,
    "batch_size": 64, # from cifar10 64 
    "client_lr": 0.01,
    "client_momentum": 0.9,
    "weight_decay": 1e-4,

    # --- Server optimisation ---
    "server_opt": "fedopt",
    "server_lr": 0.001,

    # --- Fingerprinting ---
    "enable_fingerprinting": "1",          # or False if disabled
    "fingerprint_method": "sparse",         # choices: ["sparse", "dense"]
    "fingerprint_sparsity": 0.01,
    "target_dot_strength": 1.0,

    # --- Detection / Defence params ---
    "honest_fraction": 0.1,
    "detection_margin": 1.5,
    "seed": 42,
    "history_window": 5,
    "method": "label_flip",                 # choices: ["label_flip", "backdoor","fingerprint"]
    "label_flip_alpha": 1.0,
    "backdoor_target_label": 1,
    "backdoor_patch_size": 15,
    "backdoor_intensity": 1.0,
    "tau_backdoor_threshold_statistical": tau_stat,
    "tau_backdoor_threshold_emprical": tau_emp,


    # --- Client selection ---
    # Lists must be passed as space-separated strings for SageMaker
    "targeted_clients": "1",
    "verified_clients": "0 1 2",
}


huggingface_estimator = HuggingFace(
    entry_point="main.py",   # script inside source_dir
    source_dir="s3://poison-detect-bucket/submission2/scripts/sourcedir.tar.gz",  # zipped dir
    role=role,
    instance_type="ml.g4dn.12xlarge",  # use quota for faster training
    instance_count=1,
    transformers_version="4.36.0",
    pytorch_version="2.1.0",
    py_version="py310",
    base_job_name="label-flip-scenario1-fedopt-20clients-cifar10",
    hyperparameters=hyperparameters,  # pass args here
)

# Launch training
huggingface_estimator.fit(wait=False)



INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: label-flip-scenario1-fedopt-20clients-c-2025-11-14-15-07-58-824


#### 1.1.1.2 20 clients + FedAVG

In [6]:

from sagemaker.huggingface import HuggingFace

role = "arn:aws:iam::711387130895:role/service-role/SageMaker-ExecutionRole-20250525T120088"

# Define hyperparameters that match your argparse arguments
hyperparameters = {
    # --- Dataset and client setup ---
    "dataset_name": "cifar10",
    "num_clients": 20,
    "frac_clients": 1.0,
    "dirichlet_alpha": 0.5,
    "test_size": 0.2,
    "poison_size": 0.35,

    # --- Training parameters ---
    "rounds": 15,
    "local_epochs": 4,
    "batch_size": 32, # from cifar10 64 
    "client_lr": 0.02,
    "client_momentum": 0.9,
    "weight_decay": 1e-4,

    # --- Server optimisation ---
    "server_opt": "fedavg",
    "server_lr": 1.0,

    # --- Fingerprinting ---
    "enable_fingerprinting": "1",          # or False if disabled
    "fingerprint_method": "sparse",         # choices: ["sparse", "dense"]
    "fingerprint_sparsity": 0.01,
    "target_dot_strength": 1.0,

    # --- Detection / Defence params ---
    "honest_fraction": 0.1,
    "detection_margin": 1.5,
    "seed": 42,
    "history_window": 5,
    "method": "label_flip",                 # choices: ["label_flip", "backdoor","fingerprint"]
    "label_flip_alpha": 1.0,
    "backdoor_target_label": 1,
    "backdoor_patch_size": 15,
    "backdoor_intensity": 1.0,
    "tau_backdoor_threshold_statistical": tau_stat,
    "tau_backdoor_threshold_emprical": tau_emp,


    # --- Client selection ---
    # Lists must be passed as space-separated strings for SageMaker
    "targeted_clients": "1",
    "verified_clients": "0 1 2",
}

huggingface_estimator = HuggingFace(
    entry_point="main.py",   # script inside source_dir
    source_dir="s3://poison-detect-bucket/submission2/scripts/sourcedir.tar.gz",  # zipped dir
    role=role,
    instance_type="ml.g4dn.12xlarge",  # use quota for faster training
    instance_count=1,
    transformers_version="4.36.0",
    pytorch_version="2.1.0",
    py_version="py310",
    base_job_name="label-flip-scenario1-fedavg-20clients-cifar10",
    hyperparameters=hyperparameters,  # pass args here
)

# Launch training
huggingface_estimator.fit(wait=False)



INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: label-flip-scenario1-fedavg-20clients-c-2025-11-14-08-13-18-777


### 1.1.2 50 clients:

#### 1.1.2.1 Fedopt

In [2]:

from sagemaker.huggingface import HuggingFace

role = "arn:aws:iam::711387130895:role/service-role/SageMaker-ExecutionRole-20250525T120088"

# Define hyperparameters that match your argparse arguments
hyperparameters = {
    # --- Dataset and client setup ---
    "dataset_name": "cifar10",
    "num_clients": 50,
    "frac_clients": 1.0,
    "dirichlet_alpha": 0.5,
    "test_size": 0.2,
    "poison_size": 0.35,

    # --- Training parameters ---
    "rounds": 18,
    "local_epochs": 3,
    "batch_size": 64,
    "client_lr":0.012,
    "client_momentum": 0.9,
    "weight_decay": 1e-4,

    # --- Server optimisation ---
    "server_opt": "fedopt",
    "server_lr": 0.4,

    # --- Fingerprinting ---
    "enable_fingerprinting": "1",          # or False if disabled
    "fingerprint_method": "sparse",         # choices: ["sparse", "dense"]
    "fingerprint_sparsity": 0.01,
    "target_dot_strength": 1.0,

    # --- Detection / Defence params ---
    "honest_fraction": 0.1,
    "detection_margin": 1.5,
    "seed": 42,
    "history_window": 5,
    "method": "label_flip",                 # choices: ["label_flip", "backdoor","fingerprint"]
    "label_flip_alpha": 1.0,
    "backdoor_target_label": 1,
    "backdoor_patch_size": 15,
    "backdoor_intensity": 1.0,
    "tau_backdoor_threshold_statistical": tau_stat,
    "tau_backdoor_threshold_emprical": tau_emp,


    # --- Client selection ---
    # Lists must be passed as space-separated strings for SageMaker
    "targeted_clients": "1",
    "verified_clients": "0 1 2",
}


huggingface_estimator = HuggingFace(
    entry_point="main.py",   # script inside source_dir
    source_dir="s3://poison-detect-bucket/submission2/scripts/sourcedir.tar.gz",  # zipped dir
    role=role,
    instance_type="ml.g4dn.12xlarge",  # use quota for faster training
    instance_count=1,
    transformers_version="4.36.0",
    pytorch_version="2.1.0",
    py_version="py310",
    base_job_name="label-flip-scenario1-fedopt-50clients-cifar10",
    hyperparameters=hyperparameters,  # pass args here
)

# Launch training
huggingface_estimator.fit(wait=False)


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: label-flip-scenario1-fedopt-50clients-c-2025-11-16-10-36-53-393


#### 1.1.2.2 FedAVG

In [3]:

from sagemaker.huggingface import HuggingFace

role = "arn:aws:iam::711387130895:role/service-role/SageMaker-ExecutionRole-20250525T120088"

# Define hyperparameters that match your argparse arguments
hyperparameters = {
    # --- Dataset and client setup ---
    "dataset_name": "cifar10",
    "num_clients": 50,
    "frac_clients": 1.0,
    "dirichlet_alpha": 0.5,
    "test_size": 0.2,
    "poison_size": 0.35,

    # --- Training parameters ---
    "rounds": 18,
    "local_epochs": 3,
    "batch_size": 64,
    "client_lr":0.012,
    "client_momentum": 0.9,
    "weight_decay": 1e-4,

    # --- Server optimisation ---
    "server_opt": "fedavg",
    "server_lr": 0.38,

    # --- Fingerprinting ---
    "enable_fingerprinting": "1",          # or False if disabled
    "fingerprint_method": "sparse",         # choices: ["sparse", "dense"]
    "fingerprint_sparsity": 0.01,
    "target_dot_strength": 1.0,

    # --- Detection / Defence params ---
    "honest_fraction": 0.1,
    "detection_margin": 1.5,
    "seed": 42,
    "history_window": 5,
    "method": "label_flip",                 # choices: ["label_flip", "backdoor","fingerprint"]
    "label_flip_alpha": 1.0,
    "backdoor_target_label": 1,
    "backdoor_patch_size": 15,
    "backdoor_intensity": 1.0,
    "tau_backdoor_threshold_statistical": tau_stat,
    "tau_backdoor_threshold_emprical": tau_emp,


    # --- Client selection ---
    # Lists must be passed as space-separated strings for SageMaker
    "targeted_clients": "1",
    "verified_clients": "0 1 2",
}


huggingface_estimator = HuggingFace(
    entry_point="main.py",   # script inside source_dir
    source_dir="s3://poison-detect-bucket/submission2/scripts/sourcedir.tar.gz",  # zipped dir
    role=role,
    instance_type="ml.g4dn.12xlarge",  # use quota for faster training
    instance_count=1,
    transformers_version="4.36.0",
    pytorch_version="2.1.0",
    py_version="py310",
    base_job_name="label-flip-scenario1-fedavg-50clients-cifar10",
    hyperparameters=hyperparameters,  # pass args here
)

# Launch training
huggingface_estimator.fit(wait=False)


INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: label-flip-scenario1-fedavg-50clients-c-2025-11-17-08-11-21-439


## 1.2 Scenario II :
Subset of clients targeted:

- in the case of 20 clients ( two clients targeted ( 0, 1) + two verified ( 1, 2))
- in the case of 50 clients (four clients targeted (0, 1, 2, 3) + four verified (2, 3, 4, 5))


### 1.2.1 20 clients

#### 1.2.1.1 Fedopt

In [2]:

from sagemaker.huggingface import HuggingFace

role = "arn:aws:iam::711387130895:role/service-role/SageMaker-ExecutionRole-20250525T120088"

# Define hyperparameters that match your argparse arguments
hyperparameters = {
    # --- Dataset and client setup ---
    "dataset_name": "cifar10",
    "num_clients": 20,
    "frac_clients": 1.0,
    "dirichlet_alpha": 0.5,
    "test_size": 0.2,
    "poison_size": 0.35,

    # --- Training parameters ---
    "rounds": 15,
    "local_epochs": 4,
    "batch_size": 64, 
    "client_lr": 0.01,
    "client_momentum": 0.9,
    "weight_decay": 1e-4,

    # --- Server optimisation ---
    "server_opt": "fedopt",
    "server_lr": 0.001,

    # --- Fingerprinting ---
    "enable_fingerprinting": "1",          # or False if disabled
    "fingerprint_method": "sparse",         # choices: ["sparse", "dense"]
    "fingerprint_sparsity": 0.01,
    "target_dot_strength": 1.0,

    # --- Detection / Defence params ---
    "honest_fraction": 0.1,
    "detection_margin": 1.5,
    "seed": 42,
    "history_window": 5,
    "method": "label_flip",                 # choices: ["label_flip", "backdoor","fingerprint"]
    "label_flip_alpha": 1.0,
    "backdoor_target_label": 1,
    "backdoor_patch_size": 15,
    "backdoor_intensity": 1.0,
    "tau_backdoor_threshold_statistical": tau_stat,
    "tau_backdoor_threshold_emprical": tau_emp,


    # --- Client selection ---
    # Lists must be passed as space-separated strings for SageMaker
    "targeted_clients": "0 1",
    "verified_clients": "1 2",
}


huggingface_estimator = HuggingFace(
    entry_point="main.py",   # script inside source_dir
    source_dir="s3://poison-detect-bucket/submission2/scripts/sourcedir.tar.gz",  # zipped dir
    role=role,
    instance_type="ml.g4dn.12xlarge",  # use quota for faster training
    instance_count=1,
    transformers_version="4.36.0",
    pytorch_version="2.1.0",
    py_version="py310",
    base_job_name="label-flip-sc2-fedopt-20clients-cifar10",
    hyperparameters=hyperparameters,  # pass args here
)

# Launch training
huggingface_estimator.fit(wait=False)



sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: label-flip-sc2-fedopt-20clients-cifar10-2025-11-18-10-02-04-291


#### 1.2.1.2 Fedavg

In [3]:

from sagemaker.huggingface import HuggingFace

role = "arn:aws:iam::711387130895:role/service-role/SageMaker-ExecutionRole-20250525T120088"

# Define hyperparameters that match your argparse arguments
hyperparameters = {
    # --- Dataset and client setup ---
    "dataset_name": "cifar10",
    "num_clients": 20,
    "frac_clients": 1.0,
    "dirichlet_alpha": 0.5,
    "test_size": 0.2,
    "poison_size": 0.35,

    # --- Training parameters ---
    "rounds": 15,
    "local_epochs": 4,
    "batch_size": 64, 
    "client_lr": 0.01,
    "client_momentum": 0.9,
    "weight_decay": 1e-4,

    # --- Server optimisation ---
    "server_opt": "fedavg",
    "server_lr": 0.001,

    # --- Fingerprinting ---
    "enable_fingerprinting": "1",          # or False if disabled
    "fingerprint_method": "sparse",         # choices: ["sparse", "dense"]
    "fingerprint_sparsity": 0.01,
    "target_dot_strength": 1.0,

    # --- Detection / Defence params ---
    "honest_fraction": 0.1,
    "detection_margin": 1.5,
    "seed": 42,
    "history_window": 5,
    "method": "label_flip",                 # choices: ["label_flip", "backdoor","fingerprint"]
    "label_flip_alpha": 1.0,
    "backdoor_target_label": 1,
    "backdoor_patch_size": 15,
    "backdoor_intensity": 1.0,
    "tau_backdoor_threshold_statistical": tau_stat,
    "tau_backdoor_threshold_emprical": tau_emp,


    # --- Client selection ---
    # Lists must be passed as space-separated strings for SageMaker
    "targeted_clients": "0 1",
    "verified_clients": "1 2",
}


huggingface_estimator = HuggingFace(
    entry_point="main.py",   # script inside source_dir
    source_dir="s3://poison-detect-bucket/submission2/scripts/sourcedir.tar.gz",  # zipped dir
    role=role,
    instance_type="ml.g4dn.12xlarge",  # use quota for faster training
    instance_count=1,
    transformers_version="4.36.0",
    pytorch_version="2.1.0",
    py_version="py310",
    base_job_name="label-flip-sc2-fedavg-20clients-cifar10",
    hyperparameters=hyperparameters,  # pass args here
)

# Launch training
huggingface_estimator.fit(wait=False)



INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: label-flip-sc2-fedavg-20clients-cifar10-2025-11-18-10-02-42-937


### 1.2.2 50 clients

#### 1.2.2.1 Fedopt

In [8]:

from sagemaker.huggingface import HuggingFace

role = "arn:aws:iam::711387130895:role/service-role/SageMaker-ExecutionRole-20250525T120088"

# Define hyperparameters that match your argparse arguments
hyperparameters = {
    # --- Dataset and client setup ---
    "dataset_name": "cifar10",
    "num_clients": 50,
    "frac_clients": 1.0,
    "dirichlet_alpha": 0.5,
    "test_size": 0.2,
    "poison_size": 0.35,

    # --- Training parameters ---
    "rounds": 18,
    "local_epochs": 3,
    "batch_size": 64,
    "client_lr": 0.012,
    "client_momentum": 0.9,
    "weight_decay": 1e-4,

    # --- Server optimisation ---
    "server_opt": "fedopt",
    "server_lr": 0.4,

    # --- Fingerprinting ---
    "enable_fingerprinting": "1",          # or False if disabled
    "fingerprint_method": "sparse",         # choices: ["sparse", "dense"]
    "fingerprint_sparsity": 0.01,
    "target_dot_strength": 1.0,

    # --- Detection / Defence params ---
    "honest_fraction": 0.1,
    "detection_margin": 1.5,
    "seed": 42,
    "history_window": 5,
    "method": "label_flip",                 # choices: ["label_flip", "backdoor","fingerprint"]
    "label_flip_alpha": 1.0,
    "backdoor_target_label": 1,
    "backdoor_patch_size": 15,
    "backdoor_intensity": 1.0,
    "tau_backdoor_threshold_statistical": tau_stat,
    "tau_backdoor_threshold_emprical": tau_emp,


    # --- Client selection ---
    # Lists must be passed as space-separated strings for SageMaker
    "targeted_clients": "0 1 2 3",
    "verified_clients": "2 3 4 5",
}


huggingface_estimator = HuggingFace(
    entry_point="main.py",   # script inside source_dir
    source_dir="s3://poison-detect-bucket/submission2/scripts/sourcedir.tar.gz",  # zipped dir
    role=role,
    instance_type="ml.g4dn.12xlarge",  # use quota for faster training
    instance_count=1,
    transformers_version="4.36.0",
    pytorch_version="2.1.0",
    py_version="py310",
    base_job_name="label-flip-sc2-fedopt-50clients-cifar10",
    hyperparameters=hyperparameters,  # pass args here
)

# Launch training
huggingface_estimator.fit(wait=False)


INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: label-flip-sc2-fedopt-50clients-cifar10-2025-11-18-10-18-37-450


#### 1.2.2.2 Fedavg

In [9]:

from sagemaker.huggingface import HuggingFace

role = "arn:aws:iam::711387130895:role/service-role/SageMaker-ExecutionRole-20250525T120088"

# Define hyperparameters that match your argparse arguments
hyperparameters = {
    # --- Dataset and client setup ---
    "dataset_name": "cifar10",
    "num_clients": 50,
    "frac_clients": 1.0,
    "dirichlet_alpha": 0.5,
    "test_size": 0.2,
    "poison_size": 0.35,

    # --- Training parameters ---
    "rounds": 18,
    "local_epochs": 3,
    "batch_size": 64,
    "client_lr": 0.012,
    "client_momentum": 0.9,
    "weight_decay": 1e-4,

    # --- Server optimisation ---
    "server_opt": "fedavg",
    "server_lr": 0.4,

    # --- Fingerprinting ---
    "enable_fingerprinting": "1",          # or False if disabled
    "fingerprint_method": "sparse",         # choices: ["sparse", "dense"]
    "fingerprint_sparsity": 0.01,
    "target_dot_strength": 1.0,

    # --- Detection / Defence params ---
    "honest_fraction": 0.1,
    "detection_margin": 1.5,
    "seed": 42,
    "history_window": 5,
    "method": "label_flip",                 # choices: ["label_flip", "backdoor","fingerprint"]
    "label_flip_alpha": 1.0,
    "backdoor_target_label": 1,
    "backdoor_patch_size": 15,
    "backdoor_intensity": 1.0,
    "tau_backdoor_threshold_statistical": tau_stat,
    "tau_backdoor_threshold_emprical": tau_emp,


    # --- Client selection ---
    # Lists must be passed as space-separated strings for SageMaker
    "targeted_clients": "0 1 2 3",
    "verified_clients": "2 3 4 5",
}


huggingface_estimator = HuggingFace(
    entry_point="main.py",   # script inside source_dir
    source_dir="s3://poison-detect-bucket/submission2/scripts/sourcedir.tar.gz",  # zipped dir
    role=role,
    instance_type="ml.g4dn.12xlarge",  # use quota for faster training
    instance_count=1,
    transformers_version="4.36.0",
    pytorch_version="2.1.0",
    py_version="py310",
    base_job_name="label-flip-sc2-fedavg-50clients-cifar10",
    hyperparameters=hyperparameters,  # pass args here
)

# Launch training
huggingface_estimator.fit(wait=False)


INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: label-flip-sc2-fedavg-50clients-cifar10-2025-11-18-10-18-52-432


# 2. Backdoor Trigger Method:

## 2.1 Scenario 1:

### 2.1.1 20 clients

##### 2.1.1.1 Fedopt

In [3]:

from sagemaker.huggingface import HuggingFace

role = "arn:aws:iam::711387130895:role/service-role/SageMaker-ExecutionRole-20250525T120088"

# Define hyperparameters that match your argparse arguments
hyperparameters = {
    # --- Dataset and client setup ---
    "dataset_name": "cifar10",
    "num_clients": 20,
    "frac_clients": 1.0,
    "dirichlet_alpha": 0.5,
    "test_size": 0.2,
    "poison_size": 0.35,

    # --- Training parameters ---
    "rounds": 15,
    "local_epochs": 4,
    "batch_size": 64, # from cifar10 64 
    "client_lr": 0.01,
    "client_momentum": 0.9,
    "weight_decay": 1e-4,

    # --- Server optimisation ---
    "server_opt": "fedopt",
    "server_lr": 0.001,

    # --- Fingerprinting ---
    "enable_fingerprinting": "1",          # or False if disabled
    "fingerprint_method": "sparse",         # choices: ["sparse", "dense"]
    "fingerprint_sparsity": 0.01,
    "target_dot_strength": 1.0,

    # --- Detection / Defence params ---
    "honest_fraction": 0.1,
    "detection_margin": 1.5,
    "seed": 42,
    "history_window": 5,
    "method": "backdoor",                 # choices: ["label_flip", "backdoor","fingerprint"]
    "label_flip_alpha": 1.0,
    "backdoor_target_label": 1,
    "backdoor_patch_size": 15,
    "backdoor_intensity": 1.0,
    "tau_backdoor_threshold_statistical": tau_stat,
    "tau_backdoor_threshold_emprical": tau_emp,


    # --- Client selection ---
    # Lists must be passed as space-separated strings for SageMaker
    "targeted_clients": "1",
    "verified_clients": "0 1 2",
}


huggingface_estimator = HuggingFace(
    entry_point="main.py",   # script inside source_dir
    source_dir="s3://poison-detect-bucket/submission2/scripts/sourcedir.tar.gz",  # zipped dir
    role=role,
    instance_type="ml.g4dn.12xlarge",  # use quota for faster training
    instance_count=1,
    transformers_version="4.36.0",
    pytorch_version="2.1.0",
    py_version="py310",
    base_job_name="backdoor-fedopt-20clients-cifar10",
    hyperparameters=hyperparameters,  # pass args here
)

# Launch training
huggingface_estimator.fit(wait=False)



INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: backdoor-fedopt-20clients-cifar10-2025-11-14-21-23-42-389


##### 2.1.1.1 FedAVG

In [18]:

from sagemaker.huggingface import HuggingFace

role = "arn:aws:iam::711387130895:role/service-role/SageMaker-ExecutionRole-20250525T120088"

# Define hyperparameters that match your argparse arguments
hyperparameters = {
    # --- Dataset and client setup ---
    "dataset_name": "cifar10",
    "num_clients": 20,
    "frac_clients": 1.0,
    "dirichlet_alpha": 0.5,
    "test_size": 0.2,
    "poison_size": 0.35,

    # --- Training parameters ---
    "rounds": 15,
    "local_epochs": 4,
    "batch_size": 64, # from cifar10 64 
    "client_lr": 0.01,
    "client_momentum": 0.9,
    "weight_decay": 1e-4,

    # --- Server optimisation ---
    "server_opt": "fedavg",
    "server_lr": 0.001,

    # --- Fingerprinting ---
    "enable_fingerprinting": "1",          # or False if disabled
    "fingerprint_method": "sparse",         # choices: ["sparse", "dense"]
    "fingerprint_sparsity": 0.01,
    "target_dot_strength": 1.0,

    # --- Detection / Defence params ---
    "honest_fraction": 0.1,
    "detection_margin": 1.5,
    "seed": 42,
    "history_window": 5,
    "method": "backdoor",                 # choices: ["label_flip", "backdoor","fingerprint"]
    "label_flip_alpha": 1.0,
    "backdoor_target_label": 1,
    "backdoor_patch_size": 15,
    "backdoor_intensity": 1.0,
    "tau_backdoor_threshold_statistical": tau_stat,
    "tau_backdoor_threshold_emprical": tau_emp,


    # --- Client selection ---
    # Lists must be passed as space-separated strings for SageMaker
    "targeted_clients": "1",
    "verified_clients": "0 1 2",
}


huggingface_estimator = HuggingFace(
    entry_point="main.py",   # script inside source_dir
    source_dir="s3://poison-detect-bucket/submission2/scripts/sourcedir.tar.gz",  # zipped dir
    role=role,
    instance_type="ml.g4dn.12xlarge",  # use quota for faster training
    instance_count=1,
    transformers_version="4.36.0",
    pytorch_version="2.1.0",
    py_version="py310",
    base_job_name="backdoor-scenario1-fedopt-20clients-cifar10",
    hyperparameters=hyperparameters,  # pass args here
)

# Launch training
huggingface_estimator.fit(wait=False)



INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: backdoor-scenario1-fedopt-20clients-cif-2025-11-14-16-40-59-410


### 2.1.2 50 clients

#### 2.1.2.1 Fedopt

In [4]:

from sagemaker.huggingface import HuggingFace

role = "arn:aws:iam::711387130895:role/service-role/SageMaker-ExecutionRole-20250525T120088"

# Define hyperparameters that match your argparse arguments
hyperparameters = {
    # --- Dataset and client setup ---
    "dataset_name": "cifar10",
    "num_clients": 50,
    "frac_clients": 1.0,
    "dirichlet_alpha": 0.5,
    "test_size": 0.2,
    "poison_size": 0.35,

    # --- Training parameters ---
    "rounds": 18,
    "local_epochs": 3,
    "batch_size": 64,
    "client_lr":0.012,
    "client_momentum": 0.9,
    "weight_decay": 1e-4,

    # --- Server optimisation ---
    "server_opt": "fedopt",
    "server_lr": 0.4,

    # --- Fingerprinting ---
    "enable_fingerprinting": "1",          # or False if disabled
    "fingerprint_method": "sparse",         # choices: ["sparse", "dense"]
    "fingerprint_sparsity": 0.01,
    "target_dot_strength": 1.0,

    # --- Detection / Defence params ---
    "honest_fraction": 0.1,
    "detection_margin": 1.5,
    "seed": 42,
    "history_window": 5,
    "method": "backdoor",                 # choices: ["label_flip", "backdoor","fingerprint"]
    "label_flip_alpha": 1.0,
    "backdoor_target_label": 1,
    "backdoor_patch_size": 15,
    "backdoor_intensity": 1.0,
    "tau_backdoor_threshold_statistical": tau_stat,
    "tau_backdoor_threshold_emprical": tau_emp,


    # --- Client selection ---
    # Lists must be passed as space-separated strings for SageMaker
    "targeted_clients": "1",
    "verified_clients": "0 1 2",
}


huggingface_estimator = HuggingFace(
    entry_point="main.py",   # script inside source_dir
    source_dir="s3://poison-detect-bucket/submission2/scripts/sourcedir.tar.gz",  # zipped dir
    role=role,
    instance_type="ml.g4dn.12xlarge",  # use quota for faster training
    instance_count=1,
    transformers_version="4.36.0",
    pytorch_version="2.1.0",
    py_version="py310",
    base_job_name="backdoor-scenario1-fedopt-50clients-cifar10",
    hyperparameters=hyperparameters,  # pass args here
)

# Launch training
huggingface_estimator.fit(wait=False)


INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: backdoor-scenario1-fedopt-50clients-cif-2025-11-17-08-13-01-706


#### 2.1.2.2 FedvAvg

In [5]:

from sagemaker.huggingface import HuggingFace

role = "arn:aws:iam::711387130895:role/service-role/SageMaker-ExecutionRole-20250525T120088"

# Define hyperparameters that match your argparse arguments
hyperparameters = {
    # --- Dataset and client setup ---
    "dataset_name": "cifar10",
    "num_clients": 50,
    "frac_clients": 1.0,
    "dirichlet_alpha": 0.5,
    "test_size": 0.2,
    "poison_size": 0.35,

    # --- Training parameters ---
    "rounds": 18,
    "local_epochs": 3,
    "batch_size": 64,
    "client_lr":0.012,
    "client_momentum": 0.9,
    "weight_decay": 1e-4,

    # --- Server optimisation ---
    "server_opt": "fedavg",
    "server_lr": 0.4,

    # --- Fingerprinting ---
    "enable_fingerprinting": "1",          # or False if disabled
    "fingerprint_method": "sparse",         # choices: ["sparse", "dense"]
    "fingerprint_sparsity": 0.01,
    "target_dot_strength": 1.0,

    # --- Detection / Defence params ---
    "honest_fraction": 0.1,
    "detection_margin": 1.5,
    "seed": 42,
    "history_window": 5,
    "method": "backdoor",                 # choices: ["label_flip", "backdoor","fingerprint"]
    "label_flip_alpha": 1.0,
    "backdoor_target_label": 1,
    "backdoor_patch_size": 15,
    "backdoor_intensity": 1.0,
    "tau_backdoor_threshold_statistical": tau_stat,
    "tau_backdoor_threshold_emprical": tau_emp,


    # --- Client selection ---
    # Lists must be passed as space-separated strings for SageMaker
    "targeted_clients": "1",
    "verified_clients": "0 1 2",
}


huggingface_estimator = HuggingFace(
    entry_point="main.py",   # script inside source_dir
    source_dir="s3://poison-detect-bucket/submission2/scripts/sourcedir.tar.gz",  # zipped dir
    role=role,
    instance_type="ml.g4dn.12xlarge",  # use quota for faster training
    instance_count=1,
    transformers_version="4.36.0",
    pytorch_version="2.1.0",
    py_version="py310",
    base_job_name="backdoor-scenario1-fedavg-50clients-cifar10",
    hyperparameters=hyperparameters,  # pass args here
)

# Launch training
huggingface_estimator.fit(wait=False)


INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: backdoor-scenario1-fedavg-50clients-cif-2025-11-17-08-13-29-783


## 2.2 Scenario 2

### 2.2.1 20 clients

#### 2.2.1.1 Fedopt

In [4]:

from sagemaker.huggingface import HuggingFace

role = "arn:aws:iam::711387130895:role/service-role/SageMaker-ExecutionRole-20250525T120088"

# Define hyperparameters that match your argparse arguments
hyperparameters = {
    # --- Dataset and client setup ---
    "dataset_name": "cifar10",
    "num_clients": 20,
    "frac_clients": 1.0,
    "dirichlet_alpha": 0.5,
    "test_size": 0.2,
    "poison_size": 0.35,

    # --- Training parameters ---
    "rounds": 15,
    "local_epochs": 4,
    "batch_size": 64, 
    "client_lr": 0.01,
    "client_momentum": 0.9,
    "weight_decay": 1e-4,

    # --- Server optimisation ---
    "server_opt": "fedopt",
    "server_lr": 0.001,

    # --- Fingerprinting ---
    "enable_fingerprinting": "1",          # or False if disabled
    "fingerprint_method": "sparse",         # choices: ["sparse", "dense"]
    "fingerprint_sparsity": 0.01,
    "target_dot_strength": 1.0,

    # --- Detection / Defence params ---
    "honest_fraction": 0.1,
    "detection_margin": 1.5,
    "seed": 42,
    "history_window": 5,
    "method": "backdoor",                 # choices: ["label_flip", "backdoor","fingerprint"]
    "label_flip_alpha": 1.0,
    "backdoor_target_label": 1,
    "backdoor_patch_size": 15,
    "backdoor_intensity": 1.0,
    "tau_backdoor_threshold_statistical": tau_stat,
    "tau_backdoor_threshold_emprical": tau_emp,


    # --- Client selection ---
    # Lists must be passed as space-separated strings for SageMaker
    "targeted_clients": "0 1",
    "verified_clients": "1 2",
}


huggingface_estimator = HuggingFace(
    entry_point="main.py",   # script inside source_dir
    source_dir="s3://poison-detect-bucket/submission2/scripts/sourcedir.tar.gz",  # zipped dir
    role=role,
    instance_type="ml.g4dn.12xlarge",  # use quota for faster training
    instance_count=1,
    transformers_version="4.36.0",
    pytorch_version="2.1.0",
    py_version="py310",
    base_job_name="backdoor-sc2-fedopt-20clients-cifar10",
    hyperparameters=hyperparameters,  # pass args here
)

# Launch training
huggingface_estimator.fit(wait=False)



INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: backdoor-sc2-fedopt-20clients-cifar10-2025-11-18-10-06-03-404


#### 2.2.1.2 Fedavg

In [5]:

from sagemaker.huggingface import HuggingFace

role = "arn:aws:iam::711387130895:role/service-role/SageMaker-ExecutionRole-20250525T120088"

# Define hyperparameters that match your argparse arguments
hyperparameters = {
    # --- Dataset and client setup ---
    "dataset_name": "cifar10",
    "num_clients": 20,
    "frac_clients": 1.0,
    "dirichlet_alpha": 0.5,
    "test_size": 0.2,
    "poison_size": 0.35,

    # --- Training parameters ---
    "rounds": 15,
    "local_epochs": 4,
    "batch_size": 64, 
    "client_lr": 0.01,
    "client_momentum": 0.9,
    "weight_decay": 1e-4,

    # --- Server optimisation ---
    "server_opt": "fedavg",
    "server_lr": 0.001,

    # --- Fingerprinting ---
    "enable_fingerprinting": "1",          # or False if disabled
    "fingerprint_method": "sparse",         # choices: ["sparse", "dense"]
    "fingerprint_sparsity": 0.01,
    "target_dot_strength": 1.0,

    # --- Detection / Defence params ---
    "honest_fraction": 0.1,
    "detection_margin": 1.5,
    "seed": 42,
    "history_window": 5,
    "method": "backdoor",                 # choices: ["label_flip", "backdoor","fingerprint"]
    "label_flip_alpha": 1.0,
    "backdoor_target_label": 1,
    "backdoor_patch_size": 15,
    "backdoor_intensity": 1.0,
    "tau_backdoor_threshold_statistical": tau_stat,
    "tau_backdoor_threshold_emprical": tau_emp,


    # --- Client selection ---
    # Lists must be passed as space-separated strings for SageMaker
    "targeted_clients": "0 1",
    "verified_clients": "1 2",
}


huggingface_estimator = HuggingFace(
    entry_point="main.py",   # script inside source_dir
    source_dir="s3://poison-detect-bucket/submission2/scripts/sourcedir.tar.gz",  # zipped dir
    role=role,
    instance_type="ml.g4dn.12xlarge",  # use quota for faster training
    instance_count=1,
    transformers_version="4.36.0",
    pytorch_version="2.1.0",
    py_version="py310",
    base_job_name="backdoor-sc2-fedavg-20clients-cifar10",
    hyperparameters=hyperparameters,  # pass args here
)

# Launch training
huggingface_estimator.fit(wait=False)



INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: backdoor-sc2-fedavg-20clients-cifar10-2025-11-18-10-06-39-218


### 2.2.2 50 clients

#### 2.2.2.1 Fedopt

In [10]:

from sagemaker.huggingface import HuggingFace

role = "arn:aws:iam::711387130895:role/service-role/SageMaker-ExecutionRole-20250525T120088"

# Define hyperparameters that match your argparse arguments
hyperparameters = {
    # --- Dataset and client setup ---
    "dataset_name": "cifar10",
    "num_clients": 50,
    "frac_clients": 1.0,
    "dirichlet_alpha": 0.5,
    "test_size": 0.2,
    "poison_size": 0.35,

    # --- Training parameters ---
    "rounds": 18,
    "local_epochs": 3,
    "batch_size": 64,
    "client_lr": 0.012,
    "client_momentum": 0.9,
    "weight_decay": 1e-4,

    # --- Server optimisation ---
    "server_opt": "fedopt",
    "server_lr": 0.4,

    # --- Fingerprinting ---
    "enable_fingerprinting": "1",          # or False if disabled
    "fingerprint_method": "sparse",         # choices: ["sparse", "dense"]
    "fingerprint_sparsity": 0.01,
    "target_dot_strength": 1.0,

    # --- Detection / Defence params ---
    "honest_fraction": 0.1,
    "detection_margin": 1.5,
    "seed": 42,
    "history_window": 5,
    "method": "backdoor",                 # choices: ["label_flip", "backdoor","fingerprint"]
    "label_flip_alpha": 1.0,
    "backdoor_target_label": 1,
    "backdoor_patch_size": 15,
    "backdoor_intensity": 1.0,
    "tau_backdoor_threshold_statistical": tau_stat,
    "tau_backdoor_threshold_emprical": tau_emp,


    # --- Client selection ---
    # Lists must be passed as space-separated strings for SageMaker
    "targeted_clients": "0 1 2 3",
    "verified_clients": "2 3 4 5",
}


huggingface_estimator = HuggingFace(
    entry_point="main.py",   # script inside source_dir
    source_dir="s3://poison-detect-bucket/submission2/scripts/sourcedir.tar.gz",  # zipped dir
    role=role,
    instance_type="ml.g4dn.12xlarge",  # use quota for faster training
    instance_count=1,
    transformers_version="4.36.0",
    pytorch_version="2.1.0",
    py_version="py310",
    base_job_name="backdoor-sc2-fedopt-50clients-cifar10",
    hyperparameters=hyperparameters,  # pass args here
)

# Launch training
huggingface_estimator.fit(wait=False)


INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: backdoor-sc2-fedopt-50clients-cifar10-2025-11-18-10-20-07-451


#### 2.2.2.2 Fedavg

In [11]:

from sagemaker.huggingface import HuggingFace

role = "arn:aws:iam::711387130895:role/service-role/SageMaker-ExecutionRole-20250525T120088"

# Define hyperparameters that match your argparse arguments
hyperparameters = {
    # --- Dataset and client setup ---
    "dataset_name": "cifar10",
    "num_clients": 50,
    "frac_clients": 1.0,
    "dirichlet_alpha": 0.5,
    "test_size": 0.2,
    "poison_size": 0.35,

    # --- Training parameters ---
    "rounds": 18,
    "local_epochs": 3,
    "batch_size": 64,
    "client_lr": 0.012,
    "client_momentum": 0.9,
    "weight_decay": 1e-4,

    # --- Server optimisation ---
    "server_opt": "fedavg",
    "server_lr": 0.4,

    # --- Fingerprinting ---
    "enable_fingerprinting": "1",          # or False if disabled
    "fingerprint_method": "sparse",         # choices: ["sparse", "dense"]
    "fingerprint_sparsity": 0.01,
    "target_dot_strength": 1.0,

    # --- Detection / Defence params ---
    "honest_fraction": 0.1,
    "detection_margin": 1.5,
    "seed": 42,
    "history_window": 5,
    "method": "backdoor",                 # choices: ["label_flip", "backdoor","fingerprint"]
    "label_flip_alpha": 1.0,
    "backdoor_target_label": 1,
    "backdoor_patch_size": 15,
    "backdoor_intensity": 1.0,
    "tau_backdoor_threshold_statistical": tau_stat,
    "tau_backdoor_threshold_emprical": tau_emp,


    # --- Client selection ---
    # Lists must be passed as space-separated strings for SageMaker
    "targeted_clients": "0 1 2 3",
    "verified_clients": "2 3 4 5",
}


huggingface_estimator = HuggingFace(
    entry_point="main.py",   # script inside source_dir
    source_dir="s3://poison-detect-bucket/submission2/scripts/sourcedir.tar.gz",  # zipped dir
    role=role,
    instance_type="ml.g4dn.12xlarge",  # use quota for faster training
    instance_count=1,
    transformers_version="4.36.0",
    pytorch_version="2.1.0",
    py_version="py310",
    base_job_name="backdoor-sc2-fedavg-50clients-cifar10",
    hyperparameters=hyperparameters,  # pass args here
)

# Launch training
huggingface_estimator.fit(wait=False)


INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: backdoor-sc2-fedavg-50clients-cifar10-2025-11-18-10-20-44-572


# 3. Fingerprinting Method:

## 3.1 Scenario 1:

### 3.1.1 20 clients

##### 3.1.1.1 Fedopt:

In [3]:

from sagemaker.huggingface import HuggingFace

role = "arn:aws:iam::711387130895:role/service-role/SageMaker-ExecutionRole-20250525T120088"

# Define hyperparameters that match your argparse arguments
hyperparameters = {
    # --- Dataset and client setup ---
    "dataset_name": "cifar10",
    "num_clients": 20,
    "frac_clients": 1.0,
    "dirichlet_alpha": 0.5,
    "test_size": 0.2,
    "poison_size": 0.35,

    # --- Training parameters ---
    "rounds": 15,
    "local_epochs": 4,
    "batch_size": 64, # from cifar10 64 
    "client_lr": 0.01,
    "client_momentum": 0.9,
    "weight_decay": 1e-4,

    # --- Server optimisation ---
    "server_opt": "fedopt",
    "server_lr": 0.8,

    # --- Fingerprinting ---
    "enable_fingerprinting": "1",          # or False if disabled
    "fingerprint_method": "sparse",         # choices: ["sparse", "dense"]
    "fingerprint_sparsity": 0.01,
    "target_dot_strength": 1.0,

    # --- Detection / Defence params ---
    "honest_fraction": 0.1,
    "detection_margin": 1.5,
    "seed": 42,
    "history_window": 5,
    "method": "fingerprint",                 # choices: ["label_flip", "backdoor","fingerprint"]
    "label_flip_alpha": 1.0,
    "backdoor_target_label": 1,
    "backdoor_patch_size": 15,
    "backdoor_intensity": 1.0,
    "tau_backdoor_threshold_statistical": tau_stat,
    "tau_backdoor_threshold_emprical": tau_emp,


    # --- Client selection ---
    # Lists must be passed as space-separated strings for SageMaker
    "targeted_clients": "1",
    "verified_clients": "0 1 2",
}


huggingface_estimator = HuggingFace(
    entry_point="main.py",   # script inside source_dir
    source_dir="s3://poison-detect-bucket/submission2/scripts/sourcedir.tar.gz",  # zipped dir
    role=role,
    instance_type="ml.g4dn.12xlarge",  # use quota for faster training
    instance_count=1,
    transformers_version="4.36.0",
    pytorch_version="2.1.0",
    py_version="py310",
    base_job_name="fingerprint-fedopt-20clients-cifar10",
    hyperparameters=hyperparameters,  # pass args here
)

# Launch training
huggingface_estimator.fit(wait=False)



INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: fingerprint-fedopt-20clients-cifar10-2025-11-15-10-00-06-467


##### 3.1.1.2 Fedavg:

In [4]:

from sagemaker.huggingface import HuggingFace

role = "arn:aws:iam::711387130895:role/service-role/SageMaker-ExecutionRole-20250525T120088"

# Define hyperparameters that match your argparse arguments
hyperparameters = {
    # --- Dataset and client setup ---
    "dataset_name": "cifar10",
    "num_clients": 20,
    "frac_clients": 1.0,
    "dirichlet_alpha": 0.5,
    "test_size": 0.2,
    "poison_size": 0.35,

    # --- Training parameters ---
    "rounds": 15,
    "local_epochs": 4,
    "batch_size": 64, # from cifar10 64 
    "client_lr": 0.01,
    "client_momentum": 0.9,
    "weight_decay": 1e-4,

    # --- Server optimisation ---
    "server_opt": "fedavg",
    "server_lr": 0.8,

    # --- Fingerprinting ---
    "enable_fingerprinting": "1",          # or False if disabled
    "fingerprint_method": "sparse",         # choices: ["sparse", "dense"]
    "fingerprint_sparsity": 0.01,
    "target_dot_strength": 1.0,

    # --- Detection / Defence params ---
    "honest_fraction": 0.1,
    "detection_margin": 1.5,
    "seed": 42,
    "history_window": 5,
    "method": "fingerprint",                 # choices: ["label_flip", "backdoor","fingerprint"]
    "label_flip_alpha": 1.0,
    "backdoor_target_label": 1,
    "backdoor_patch_size": 15,
    "backdoor_intensity": 1.0,
    "tau_backdoor_threshold_statistical": tau_stat,
    "tau_backdoor_threshold_emprical": tau_emp,


    # --- Client selection ---
    # Lists must be passed as space-separated strings for SageMaker
    "targeted_clients": "1",
    "verified_clients": "0 1 2",
}


huggingface_estimator = HuggingFace(
    entry_point="main.py",   # script inside source_dir
    source_dir="s3://poison-detect-bucket/submission2/scripts/sourcedir.tar.gz",  # zipped dir
    role=role,
    instance_type="ml.g4dn.12xlarge",  # use quota for faster training
    instance_count=1,
    transformers_version="4.36.0",
    pytorch_version="2.1.0",
    py_version="py310",
    base_job_name="fingerprint-fedavg-20clients-cifar10",
    hyperparameters=hyperparameters,  # pass args here
)

# Launch training
huggingface_estimator.fit(wait=False)



INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: fingerprint-fedavg-20clients-cifar10-2025-11-15-11-19-07-977


### 3.1.2 50 clients

#### 3.1.2.1 Fedopt

In [6]:

from sagemaker.huggingface import HuggingFace

role = "arn:aws:iam::711387130895:role/service-role/SageMaker-ExecutionRole-20250525T120088"

# Define hyperparameters that match your argparse arguments
hyperparameters = {
    # --- Dataset and client setup ---
    "dataset_name": "cifar10",
    "num_clients": 50,
    "frac_clients": 1.0,
    "dirichlet_alpha": 0.5,
    "test_size": 0.2,
    "poison_size": 0.35,

    # --- Training parameters ---
    "rounds": 18,
    "local_epochs": 3,
    "batch_size": 64,
    "client_lr":0.012,
    "client_momentum": 0.9,
    "weight_decay": 1e-4,

    # --- Server optimisation ---
    "server_opt": "fedopt",
    "server_lr": 0.4,

    # --- Fingerprinting ---
    "enable_fingerprinting": "1",          # or False if disabled
    "fingerprint_method": "sparse",         # choices: ["sparse", "dense"]
    "fingerprint_sparsity": 0.01,
    "target_dot_strength": 1.0,

    # --- Detection / Defence params ---
    "honest_fraction": 0.1,
    "detection_margin": 1.5,
    "seed": 42,
    "history_window": 5,
    "method": "fingerprint",                 # choices: ["label_flip", "backdoor","fingerprint"]
    "label_flip_alpha": 1.0,
    "backdoor_target_label": 1,
    "backdoor_patch_size": 15,
    "backdoor_intensity": 1.0,
    "tau_backdoor_threshold_statistical": tau_stat,
    "tau_backdoor_threshold_emprical": tau_emp,


    # --- Client selection ---
    # Lists must be passed as space-separated strings for SageMaker
    "targeted_clients": "1",
    "verified_clients": "0 1 2",
}


huggingface_estimator = HuggingFace(
    entry_point="main.py",   # script inside source_dir
    source_dir="s3://poison-detect-bucket/submission2/scripts/sourcedir.tar.gz",  # zipped dir
    role=role,
    instance_type="ml.g4dn.12xlarge",  # use quota for faster training
    instance_count=1,
    transformers_version="4.36.0",
    pytorch_version="2.1.0",
    py_version="py310",
    base_job_name="fingerprint-scenario1-fedopt-50clients-cifar10",
    hyperparameters=hyperparameters,  # pass args here
)

# Launch training
huggingface_estimator.fit(wait=False)


INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: fingerprint-scenario1-fedopt-50clients--2025-11-17-08-15-04-985


#### 3.1.2.2 Fedavg

In [7]:

from sagemaker.huggingface import HuggingFace

role = "arn:aws:iam::711387130895:role/service-role/SageMaker-ExecutionRole-20250525T120088"

# Define hyperparameters that match your argparse arguments
hyperparameters = {
    # --- Dataset and client setup ---
    "dataset_name": "cifar10",
    "num_clients": 50,
    "frac_clients": 1.0,
    "dirichlet_alpha": 0.5,
    "test_size": 0.2,
    "poison_size": 0.35,

    # --- Training parameters ---
    "rounds": 18,
    "local_epochs": 3,
    "batch_size": 64,
    "client_lr":0.012,
    "client_momentum": 0.9,
    "weight_decay": 1e-4,

    # --- Server optimisation ---
    "server_opt": "fedavg",
    "server_lr": 0.4,

    # --- Fingerprinting ---
    "enable_fingerprinting": "1",          # or False if disabled
    "fingerprint_method": "sparse",         # choices: ["sparse", "dense"]
    "fingerprint_sparsity": 0.01,
    "target_dot_strength": 1.0,

    # --- Detection / Defence params ---
    "honest_fraction": 0.1,
    "detection_margin": 1.5,
    "seed": 42,
    "history_window": 5,
    "method": "fingerprint",                 # choices: ["label_flip", "backdoor","fingerprint"]
    "label_flip_alpha": 1.0,
    "backdoor_target_label": 1,
    "backdoor_patch_size": 15,
    "backdoor_intensity": 1.0,
    "tau_backdoor_threshold_statistical": tau_stat,
    "tau_backdoor_threshold_emprical": tau_emp,


    # --- Client selection ---
    # Lists must be passed as space-separated strings for SageMaker
    "targeted_clients": "1",
    "verified_clients": "0 1 2",
}


huggingface_estimator = HuggingFace(
    entry_point="main.py",   # script inside source_dir
    source_dir="s3://poison-detect-bucket/submission2/scripts/sourcedir.tar.gz",  # zipped dir
    role=role,
    instance_type="ml.g4dn.12xlarge",  # use quota for faster training
    instance_count=1,
    transformers_version="4.36.0",
    pytorch_version="2.1.0",
    py_version="py310",
    base_job_name="fingerprint-scenario1-fedavg-50clients-cifar10",
    hyperparameters=hyperparameters,  # pass args here
)

# Launch training
huggingface_estimator.fit(wait=False)


INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: fingerprint-scenario1-fedavg-50clients--2025-11-17-08-15-30-533


## 3.2 Scenario2 

### 3.2.1 20 clients

#### 3.2.1.1 Fedopt

In [6]:

from sagemaker.huggingface import HuggingFace

role = "arn:aws:iam::711387130895:role/service-role/SageMaker-ExecutionRole-20250525T120088"

# Define hyperparameters that match your argparse arguments
hyperparameters = {
    # --- Dataset and client setup ---
    "dataset_name": "cifar10",
    "num_clients": 20,
    "frac_clients": 1.0,
    "dirichlet_alpha": 0.5,
    "test_size": 0.2,
    "poison_size": 0.35,

    # --- Training parameters ---
    "rounds": 15,
    "local_epochs": 4,
    "batch_size": 64, 
    "client_lr": 0.01,
    "client_momentum": 0.9,
    "weight_decay": 1e-4,

    # --- Server optimisation ---
    "server_opt": "fedopt",
    "server_lr": 0.001,

    # --- Fingerprinting ---
    "enable_fingerprinting": "1",          # or False if disabled
    "fingerprint_method": "sparse",         # choices: ["sparse", "dense"]
    "fingerprint_sparsity": 0.01,
    "target_dot_strength": 1.0,

    # --- Detection / Defence params ---
    "honest_fraction": 0.1,
    "detection_margin": 1.5,
    "seed": 42,
    "history_window": 5,
    "method": "fingerprint",                 # choices: ["label_flip", "backdoor","fingerprint"]
    "label_flip_alpha": 1.0,
    "backdoor_target_label": 1,
    "backdoor_patch_size": 15,
    "backdoor_intensity": 1.0,
    "tau_backdoor_threshold_statistical": tau_stat,
    "tau_backdoor_threshold_emprical": tau_emp,


    # --- Client selection ---
    # Lists must be passed as space-separated strings for SageMaker
    "targeted_clients": "0 1",
    "verified_clients": "1 2",
}


huggingface_estimator = HuggingFace(
    entry_point="main.py",   # script inside source_dir
    source_dir="s3://poison-detect-bucket/submission2/scripts/sourcedir.tar.gz",  # zipped dir
    role=role,
    instance_type="ml.g4dn.12xlarge",  # use quota for faster training
    instance_count=1,
    transformers_version="4.36.0",
    pytorch_version="2.1.0",
    py_version="py310",
    base_job_name="fingerprint-sc2-fedopt-20clients-cifar10",
    hyperparameters=hyperparameters,  # pass args here
)

# Launch training
huggingface_estimator.fit(wait=False)



INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: fingerprint-sc2-fedopt-20clients-cifar1-2025-11-18-10-08-44-606


#### 3.2.1.2 Fedavg

In [7]:

from sagemaker.huggingface import HuggingFace

role = "arn:aws:iam::711387130895:role/service-role/SageMaker-ExecutionRole-20250525T120088"

# Define hyperparameters that match your argparse arguments
hyperparameters = {
    # --- Dataset and client setup ---
    "dataset_name": "cifar10",
    "num_clients": 20,
    "frac_clients": 1.0,
    "dirichlet_alpha": 0.5,
    "test_size": 0.2,
    "poison_size": 0.35,

    # --- Training parameters ---
    "rounds": 15,
    "local_epochs": 4,
    "batch_size": 64, 
    "client_lr": 0.01,
    "client_momentum": 0.9,
    "weight_decay": 1e-4,

    # --- Server optimisation ---
    "server_opt": "fedavg",
    "server_lr": 0.001,

    # --- Fingerprinting ---
    "enable_fingerprinting": "1",          # or False if disabled
    "fingerprint_method": "sparse",         # choices: ["sparse", "dense"]
    "fingerprint_sparsity": 0.01,
    "target_dot_strength": 1.0,

    # --- Detection / Defence params ---
    "honest_fraction": 0.1,
    "detection_margin": 1.5,
    "seed": 42,
    "history_window": 5,
    "method": "fingerprint",                 # choices: ["label_flip", "backdoor","fingerprint"]
    "label_flip_alpha": 1.0,
    "backdoor_target_label": 1,
    "backdoor_patch_size": 15,
    "backdoor_intensity": 1.0,
    "tau_backdoor_threshold_statistical": tau_stat,
    "tau_backdoor_threshold_emprical": tau_emp,


    # --- Client selection ---
    # Lists must be passed as space-separated strings for SageMaker
    "targeted_clients": "0 1",
    "verified_clients": "1 2",
}


huggingface_estimator = HuggingFace(
    entry_point="main.py",   # script inside source_dir
    source_dir="s3://poison-detect-bucket/submission2/scripts/sourcedir.tar.gz",  # zipped dir
    role=role,
    instance_type="ml.g4dn.12xlarge",  # use quota for faster training
    instance_count=1,
    transformers_version="4.36.0",
    pytorch_version="2.1.0",
    py_version="py310",
    base_job_name="fingerprint-sc2-fedavg-20clients-cifar10",
    hyperparameters=hyperparameters,  # pass args here
)

# Launch training
huggingface_estimator.fit(wait=False)



INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: fingerprint-sc2-fedavg-20clients-cifar1-2025-11-18-10-09-23-565


### 3.2.2 50 clients

#### 3.2.2.1 Fedopt

In [12]:

from sagemaker.huggingface import HuggingFace

role = "arn:aws:iam::711387130895:role/service-role/SageMaker-ExecutionRole-20250525T120088"

# Define hyperparameters that match your argparse arguments
hyperparameters = {
    # --- Dataset and client setup ---
    "dataset_name": "cifar10",
    "num_clients": 50,
    "frac_clients": 1.0,
    "dirichlet_alpha": 0.5,
    "test_size": 0.2,
    "poison_size": 0.35,

    # --- Training parameters ---
    "rounds": 18,
    "local_epochs": 3,
    "batch_size": 64,
    "client_lr": 0.012,
    "client_momentum": 0.9,
    "weight_decay": 1e-4,

    # --- Server optimisation ---
    "server_opt": "fedopt",
    "server_lr": 0.4,

    # --- Fingerprinting ---
    "enable_fingerprinting": "1",          # or False if disabled
    "fingerprint_method": "sparse",         # choices: ["sparse", "dense"]
    "fingerprint_sparsity": 0.01,
    "target_dot_strength": 1.0,

    # --- Detection / Defence params ---
    "honest_fraction": 0.1,
    "detection_margin": 1.5,
    "seed": 42,
    "history_window": 5,
    "method": "fingerprint",                 # choices: ["label_flip", "backdoor","fingerprint"]
    "label_flip_alpha": 1.0,
    "backdoor_target_label": 1,
    "backdoor_patch_size": 15,
    "backdoor_intensity": 1.0,
    "tau_backdoor_threshold_statistical": tau_stat,
    "tau_backdoor_threshold_emprical": tau_emp,


    # --- Client selection ---
    # Lists must be passed as space-separated strings for SageMaker
    "targeted_clients": "0 1 2 3",
    "verified_clients": "2 3 4 5",
}


huggingface_estimator = HuggingFace(
    entry_point="main.py",   # script inside source_dir
    source_dir="s3://poison-detect-bucket/submission2/scripts/sourcedir.tar.gz",  # zipped dir
    role=role,
    instance_type="ml.g4dn.12xlarge",  # use quota for faster training
    instance_count=1,
    transformers_version="4.36.0",
    pytorch_version="2.1.0",
    py_version="py310",
    base_job_name="fingerprint-sc2-fedopt-50clients-cifar10",
    hyperparameters=hyperparameters,  # pass args here
)

# Launch training
huggingface_estimator.fit(wait=False)


INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: fingerprint-sc2-fedopt-50clients-cifar1-2025-11-18-10-23-11-470


#### 3.2.2.2 Fedavg

In [13]:

from sagemaker.huggingface import HuggingFace

role = "arn:aws:iam::711387130895:role/service-role/SageMaker-ExecutionRole-20250525T120088"

# Define hyperparameters that match your argparse arguments
hyperparameters = {
    # --- Dataset and client setup ---
    "dataset_name": "cifar10",
    "num_clients": 50,
    "frac_clients": 1.0,
    "dirichlet_alpha": 0.5,
    "test_size": 0.2,
    "poison_size": 0.35,

    # --- Training parameters ---
    "rounds": 18,
    "local_epochs": 3,
    "batch_size": 64,
    "client_lr": 0.012,
    "client_momentum": 0.9,
    "weight_decay": 1e-4,

    # --- Server optimisation ---
    "server_opt": "fedavg",
    "server_lr": 0.4,

    # --- Fingerprinting ---
    "enable_fingerprinting": "1",          # or False if disabled
    "fingerprint_method": "sparse",         # choices: ["sparse", "dense"]
    "fingerprint_sparsity": 0.01,
    "target_dot_strength": 1.0,

    # --- Detection / Defence params ---
    "honest_fraction": 0.1,
    "detection_margin": 1.5,
    "seed": 42,
    "history_window": 5,
    "method": "fingerprint",                 # choices: ["label_flip", "backdoor","fingerprint"]
    "label_flip_alpha": 1.0,
    "backdoor_target_label": 1,
    "backdoor_patch_size": 15,
    "backdoor_intensity": 1.0,
    "tau_backdoor_threshold_statistical": tau_stat,
    "tau_backdoor_threshold_emprical": tau_emp,


    # --- Client selection ---
    # Lists must be passed as space-separated strings for SageMaker
    "targeted_clients": "0 1 2 3",
    "verified_clients": "2 3 4 5",
}


huggingface_estimator = HuggingFace(
    entry_point="main.py",   # script inside source_dir
    source_dir="s3://poison-detect-bucket/submission2/scripts/sourcedir.tar.gz",  # zipped dir
    role=role,
    instance_type="ml.g4dn.12xlarge",  # use quota for faster training
    instance_count=1,
    transformers_version="4.36.0",
    pytorch_version="2.1.0",
    py_version="py310",
    base_job_name="fingerprint-sc2-fedavg-50clients-cifar10",
    hyperparameters=hyperparameters,  # pass args here
)

# Launch training
huggingface_estimator.fit(wait=False)


INFO:sagemaker.telemetry.telemetry_logging:SageMaker Python SDK will collect telemetry to help us better understand our user's needs, diagnose issues, and deliver additional features.
To opt out of telemetry, please disable via TelemetryOptOut parameter in SDK defaults config. For more information, refer to https://sagemaker.readthedocs.io/en/stable/overview.html#configuring-and-using-defaults-with-the-sagemaker-python-sdk.
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: fingerprint-sc2-fedavg-50clients-cifar1-2025-11-18-10-23-43-083
