In [None]:
import sagemaker
import boto3

sagemaker_session = sagemaker.Session()
region = boto3.Session().region_name
bucket_name = "fashion-mnist-mia"  # Change this to your actual bucket
s3_prefix = "membership-inference-data" # Change this to your actual prefix

s3_data_path = f"s3://{bucket_name}/{s3_prefix}/"
print(f"Data will be stored in: {s3_data_path}")


Data will be stored in: s3://s3-sagemaker-fashion-mnist-mia/membership-inference-data/


## Load and process the subset : 

In [None]:
from sagemaker.processing import ProcessingInput
import sagemaker
from sagemaker.processing import ScriptProcessor
from sagemaker import get_execution_role
from sagemaker.processing import ProcessingOutput

# Define IAM role and SageMaker session
role = "arn:aws:iam::711387130895:role/processor"  # Replace with your SageMaker IAM role ARN
sagemaker_session = sagemaker.Session()

image_uri = sagemaker.image_uris.retrieve(
    framework="sklearn",
    region="eu-central-1",
    version="1.0-1"
)

script_processor = ScriptProcessor(
    image_uri=image_uri,
    role=role,
    instance_count=1,
    instance_type="ml.m5.xlarge",
    command=["/bin/sh", "-c", "pip install kaggle --no-cache-dir && python3 /opt/ml/processing/input/code/data-process.py --s3-bucket-name BUCKET_NAME"],
    env={
        "KAGGLE_USERNAME": "username",
        "KAGGLE_KEY": "ckey"
    }
)


# Define output
outputs = [
    ProcessingOutput(
        source="/opt/ml/processing/output",
        destination="s3://BUCKET_NAME/fashion-product-images-processed",
    )
]


inputs = [
    ProcessingInput(
        source="./data-process.py",  # Ensure this is the correct local path
        destination="/opt/ml/processing/input",  # Inside the container
    )
]

script_processor.run(
    code="data-process.py",  # Use local path, SageMaker will handle the upload
    inputs=inputs,
    outputs=outputs,
    arguments=["--s3-bucket-name", "s3-sagemaker-fashion-mnist-mia"],
)
print("Finished")


#### split and save splits in S3 

In [None]:
import boto3
import os

# Constants
s3_bucket = "fashion-mnist-mia"  # Your bucket
s3_prefix = "fashion-product-images-processed"  #update with your prefix
local_data_dir = "/tmp/fashion-products"

# Ensure local directory exists
os.makedirs(local_data_dir, exist_ok=True)

# Initialize S3 client
s3_client = boto3.client("s3")

# List all objects in the S3 directory
paginator = s3_client.get_paginator("list_objects_v2")
for page in paginator.paginate(Bucket=s3_bucket, Prefix=s3_prefix):
    if "Contents" in page:
        for obj in page["Contents"]:
            s3_key = obj["Key"]
            
            # Ensure we don't download just the top-level prefix
            if s3_key.endswith("/"):
                continue
            
            # Preserve subdirectory structure
            local_file_path = os.path.join(local_data_dir, os.path.relpath(s3_key, s3_prefix))
            local_dir = os.path.dirname(local_file_path)
            os.makedirs(local_dir, exist_ok=True)

            # Download file
            s3_client.download_file(s3_bucket, s3_key, local_file_path)
            print(f"Downloaded {s3_key} to {local_file_path}")

print("Dataset download complete!")


Downloaded fashion-product-images-processed/accessories/14889.jpg to /tmp/fashion-products/accessories/14889.jpg
Downloaded fashion-product-images-processed/accessories/14890.jpg to /tmp/fashion-products/accessories/14890.jpg
Downloaded fashion-product-images-processed/accessories/14893.jpg to /tmp/fashion-products/accessories/14893.jpg
Downloaded fashion-product-images-processed/accessories/14894.jpg to /tmp/fashion-products/accessories/14894.jpg
Downloaded fashion-product-images-processed/accessories/15654.jpg to /tmp/fashion-products/accessories/15654.jpg
Downloaded fashion-product-images-processed/accessories/15928.jpg to /tmp/fashion-products/accessories/15928.jpg
Downloaded fashion-product-images-processed/accessories/17359.jpg to /tmp/fashion-products/accessories/17359.jpg
Downloaded fashion-product-images-processed/accessories/17360.jpg to /tmp/fashion-products/accessories/17360.jpg
Downloaded fashion-product-images-processed/accessories/17361.jpg to /tmp/fashion-products/acces

In [27]:
def split_dataset(dataset, num_splits=6):
    """
    Split the dataset into `num_splits` equal chunks, ensuring the dataset is balanced
    across the splits. Each split will have an equal number of images from each class.
    """
    class_indices = {}
    
    # Group indices by class
    for idx, label in enumerate(dataset.labels):
        if label not in class_indices:
            class_indices[label] = []
        class_indices[label].append(idx)

    # Shuffle each class's indices
    for class_id in class_indices:
        random.shuffle(class_indices[class_id])

    # Create the splits
    splits = {i: [] for i in range(num_splits)}
    
    for class_id, indices in class_indices.items():
        chunk_size = len(indices) // num_splits
        for i in range(num_splits):
            start_idx = i * chunk_size
            end_idx = (i + 1) * chunk_size if i < num_splits - 1 else len(indices)
            splits[i].extend(indices[start_idx:end_idx])

    return splits


In [29]:
# Split the dataset into 6 chunks
from shutil import copy

splits = split_dataset(dataset, num_splits=6)

# Now save each split to a specific directory (or S3 if needed)
output_root = "data/"
for i, split_indices in splits.items():
    split_dir = os.path.join(output_root, f"split_{i+1}")
    os.makedirs(split_dir, exist_ok=True)
    
    for idx in split_indices:
        img_path = dataset.image_paths[idx]
        class_name = dataset.classes[dataset.labels[idx]]
        # Create class directories if needed
        class_dir = os.path.join(split_dir, class_name)
        os.makedirs(class_dir, exist_ok=True)
        
        # Copy image to the corresponding split folder
        copy(img_path, os.path.join(class_dir, os.path.basename(img_path)))

print("✅ Dataset successfully split into 6 balanced chunks!")

✅ Dataset successfully split into 6 balanced chunks!


In [None]:
import os
import boto3
from tqdm import tqdm

def upload_to_s3(local_dir, s3_bucket, s3_prefix=""):
    """
    Uploads the contents of a local directory to an S3 bucket.
    
    :param local_dir: Local directory path
    :param s3_bucket: The target S3 bucket
    :param s3_prefix: Prefix (folder path) in the S3 bucket
    """
    s3 = boto3.client("s3")

    # Walk through the local directory and upload files to S3
    for root, dirs, files in os.walk(local_dir):
        for file in files:
            local_file = os.path.join(root, file)
            # Construct the S3 path (relative to the local_dir)
            relative_path = os.path.relpath(local_file, local_dir)
            s3_key = os.path.join(s3_prefix, relative_path)

            try:
                # Print the file being uploaded for logging purposes
                print(f"Uploading {local_file} to {s3_key}")
                s3.upload_file(local_file, s3_bucket, s3_key)
            except Exception as e:
                print(f"Error uploading {local_file}: {str(e)}")
                continue  # Continue with the next file if an error occurs

# Example: S3 Bucket and Prefix
s3_bucket = "="  # Your S3 bucket name
s3_prefix = ""  # S3 prefix where you want to upload

output_root = "data/splits/"  # Directory containing the splits (updated)

# Check if the local split directory exists before attempting to upload
if os.path.exists(output_root):
    print("Starting the upload process...")
    upload_to_s3(output_root, s3_bucket, s3_prefix)
    print("✅ Dataset splits successfully uploaded to S3!")
else:
    print(f"Error: The directory {output_root} does not exist.")


Starting the upload process...
Uploading data/splits/split_5/n01855672/n01855672_346.JPEG to datasets/tiny-imagenet-splits/split_5/n01855672/n01855672_346.JPEG
Uploading data/splits/split_5/n01855672/n01855672_85.JPEG to datasets/tiny-imagenet-splits/split_5/n01855672/n01855672_85.JPEG
Uploading data/splits/split_5/n01855672/n01855672_382.JPEG to datasets/tiny-imagenet-splits/split_5/n01855672/n01855672_382.JPEG
Uploading data/splits/split_5/n01855672/n01855672_247.JPEG to datasets/tiny-imagenet-splits/split_5/n01855672/n01855672_247.JPEG
Uploading data/splits/split_5/n01855672/n01855672_166.JPEG to datasets/tiny-imagenet-splits/split_5/n01855672/n01855672_166.JPEG
Uploading data/splits/split_5/n01855672/n01855672_58.JPEG to datasets/tiny-imagenet-splits/split_5/n01855672/n01855672_58.JPEG
Uploading data/splits/split_5/n01855672/n01855672_391.JPEG to datasets/tiny-imagenet-splits/split_5/n01855672/n01855672_391.JPEG
Uploading data/splits/split_5/n01855672/n01855672_373.JPEG to datasets

## Train shadows :

In [None]:
import sagemaker
from sagemaker.pytorch import PyTorch
import boto3
from sagemaker import get_execution_role

# Variables
s3_bucket = "fashion-mnist-mia" # Update with your bucket name
s3_prefix = "datasets/fashion_products-splits" # Update with your dataset prefix
output_path = f"s3://{s3_bucket}/output"
role = get_execution_role()

# Initialize SageMaker session
sagemaker_session = sagemaker.Session()

# Path to class mapping in S3
class_mapping_s3_path = f"s3://{s3_bucket}/fashion-product-images-processed/class_mapping.json"

# List the splits in S3
splits = [f"split_{i+1}" for i in range(6)]  # 5 splits for shadow models + 1 test split

# Define the training job function
def train_shadow_model(split_index, training_data_s3_path, model_name):
    print(f"Starting training for {model_name} on {training_data_s3_path}")

    estimator = PyTorch(
        entry_point="train_shadows.py",  # Your training script
        role=role,
        framework_version="1.8.0",  # PyTorch version
        py_version="py3",  # Python version
        instance_count=1,
        instance_type="ml.g4dn.4xlarge",  # Modify instance type as needed
        output_path=output_path,
        sagemaker_session=sagemaker_session,
        hyperparameters={
            "batch_size": 64,
            "epochs":70,
            "class_mapping": class_mapping_s3_path,
            "number": split_index,

            },  # Pass class mapping file from S3
        
        code_dir="path_to_your_code",  # Update with the correct directory
    )

    # Set up input channels for the estimator
    inputs = {
        "training": sagemaker.inputs.TrainingInput(
            s3_data=training_data_s3_path,
            content_type="application/x-image",
        ),
    }

    # Start training job
    estimator.fit(inputs)
    print(f"Training job started for {model_name} with split {split_index+1}")

# Launch training for the 5 shadow models
for i in range(5):
    split_index = i + 1
    training_data_s3_path = f"s3://{s3_bucket}/{s3_prefix}/{splits[i]}"
    model_name = f"shadow_model_{split_index}"

    train_shadow_model(split_index, training_data_s3_path, model_name)

print("Training jobs for shadow models have been launched.")


INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: pytorch-training-2025-02-20-09-00-20-923


Starting training for shadow_model_1 on s3://s3-sagemaker-fashion-mnist-mia/datasets/fashion_products-splits/split_1
2025-02-20 09:00:23 Starting - Starting the training job...
2025-02-20 09:00:36 Starting - Preparing the instances for training...
2025-02-20 09:01:02 Downloading - Downloading input data...
2025-02-20 09:01:42 Downloading - Downloading the training image...........................
2025-02-20 09:06:13 Training - Training image download completed. Training in progress....[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2025-02-20 09:06:48,961 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2025-02-20 09:06:48,991 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2025-02-20 09:06:48,993 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2025-02-20 09:06:49

INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: pytorch-training-2025-02-20-09-10-42-546


Training seconds: 559
Billable seconds: 559
Training job started for shadow_model_1 with split 2
Starting training for shadow_model_2 on s3://s3-sagemaker-fashion-mnist-mia/datasets/fashion_products-splits/split_2
2025-02-20 09:10:44 Starting - Starting the training job...
2025-02-20 09:10:58 Starting - Preparing the instances for training...
2025-02-20 09:11:30 Downloading - Downloading input data...
2025-02-20 09:12:10 Downloading - Downloading the training image...........................
2025-02-20 09:16:38 Training - Training image download completed. Training in progress.....[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2025-02-20 09:17:16,391 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2025-02-20 09:17:16,421 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2025-02-20 09:17:16,423 sagemak

INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.


Training seconds: 557
Billable seconds: 557
Training job started for shadow_model_2 with split 3
Starting training for shadow_model_3 on s3://s3-sagemaker-fashion-mnist-mia/datasets/fashion_products-splits/split_3


INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: pytorch-training-2025-02-20-09-21-03-183


2025-02-20 09:21:04 Starting - Starting the training job...
2025-02-20 09:21:18 Starting - Preparing the instances for training...
2025-02-20 09:21:49 Downloading - Downloading input data...
2025-02-20 09:22:30 Downloading - Downloading the training image...........................
2025-02-20 09:26:57 Training - Training image download completed. Training in progress.....[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2025-02-20 09:27:36,172 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2025-02-20 09:27:36,204 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2025-02-20 09:27:36,206 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2025-02-20 09:27:36,521 sagemaker-training-toolkit INFO     Invoking user script[0m
[34mTraining Env:[0m
[34m{
    "additional_fram

INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: pytorch-training-2025-02-20-09-31-23-616


Training seconds: 562
Billable seconds: 562
Training job started for shadow_model_3 with split 4
Starting training for shadow_model_4 on s3://s3-sagemaker-fashion-mnist-mia/datasets/fashion_products-splits/split_4
2025-02-20 09:31:24 Starting - Starting the training job...
2025-02-20 09:31:39 Starting - Preparing the instances for training...
2025-02-20 09:32:03 Downloading - Downloading input data...
2025-02-20 09:32:44 Downloading - Downloading the training image...........................
2025-02-20 09:37:12 Training - Training image download completed. Training in progress....[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2025-02-20 09:37:50,528 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2025-02-20 09:37:50,558 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2025-02-20 09:37:50,560 sagemake

INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: pytorch-training-2025-02-20-09-41-44-147


Training seconds: 563
Billable seconds: 563
Training job started for shadow_model_4 with split 5
Starting training for shadow_model_5 on s3://s3-sagemaker-fashion-mnist-mia/datasets/fashion_products-splits/split_5
2025-02-20 09:41:45 Starting - Starting the training job...
2025-02-20 09:41:59 Starting - Preparing the instances for training...
2025-02-20 09:42:22 Downloading - Downloading input data...
2025-02-20 09:43:02 Downloading - Downloading the training image...........................
2025-02-20 09:47:30 Training - Training image download completed. Training in progress....[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2025-02-20 09:48:08,095 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2025-02-20 09:48:08,126 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2025-02-20 09:48:08,129 sagemake


## Launch the training for the attack models

In [None]:
import sagemaker
from sagemaker.pytorch import PyTorch
import boto3
# Define S3 paths
bucket = "fashion-mnist-mia" # Update with your bucket name
dataset_prefix ="datasets/fashion_products-splits" # Update with your dataset prefix
output_path = f"s3://{bucket}/attack_models/"
binary_dataset_path = f"s3://{bucket}/Binary_datasets/"
shadow_model_prefix = "checkpoints/"
# Initialize SageMaker session
boto3.setup_default_session(region_name='eu-central-1')  # Europe (Frankfurt) region
boto3_session = boto3.Session(region_name='eu-central-1')  # Frankfurt region

sagemaker_session = sagemaker.Session(boto_session=boto3_session)


# Define hyperparameters
hyperparameters={
    "batch_size": 64,
    "epochs": 178,
    "input_size": 10,
    "num_classes": 10,
    "num_shadow_models": 5,
    "s3_bucket": bucket,
    "s3_dataset_prefix": dataset_prefix,
    "s3_output_path": output_path,
    "s3_shadow_prefix": shadow_model_prefix,
    "s3_binary_dataset_path": binary_dataset_path,  # Update with the correct path
    "class_mapping": f"s3://{bucket}/fashion-product-images-processed/class_mapping.json"
}


# Define estimator
role = sagemaker.get_execution_role()
estimator = PyTorch(
    entry_point="trainAttack.py",  # Your script
    role=role,
    instance_count=1,
    instance_type="ml.g4dn.4xlarge",  # Change based on available resources
    framework_version="1.12",  # Update if needed
    py_version="py38",
    hyperparameters=hyperparameters,
    output_path=output_path,
    sagemaker_session=sagemaker_session,

)

# Launch training
estimator.fit()


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: pytorch-training-2025-02-21-14-21-31-542


2025-02-21 14:21:32 Starting - Starting the training job...
2025-02-21 14:21:46 Starting - Preparing the instances for training...
2025-02-21 14:22:21 Downloading - Downloading the training image...........................
2025-02-21 14:26:59 Training - Training image download completed. Training in progress.....[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2025-02-21 14:27:30,379 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2025-02-21 14:27:30,399 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)[0m
[34m2025-02-21 14:27:30,410 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2025-02-21 14:27:30,412 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2025-02-21 14:27:30,674 sagemaker-training-toolkit INFO     No Neurons de