In [None]:
import optuna
import pandas as pd
import numpy as np
import yaml
import pickle
import torch
import os

from torchvision.datasets import CIFAR10
from torchvision import transforms
from torch import tensor, cat, save, load, optim, nn
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from src.models.resnet18_model import ResNet18

from optuna.storages import JournalStorage
from optuna.storages.journal import JournalFileBackend

import src.study_handler as sh
from src.utils import print_yaml, get_shadow_signals, percentile_score_normalization
from LeakPro.leakpro.attacks.mia_attacks.rmia import rmia_vectorised

In [None]:
1. set device
2. Load the study.yaml which contains the fbd_study key
3. load baseline audit signals, shadow models logits, sm inmask and metadata
4. Use metadata to update the training part of fbd_study
5. Select sm to be used
6. TODO Calc the vulnerability of the baseline model
7. Normalize the vulnerability score
8. Prepare the dataset using the baseline inmask to make sure we train on the baseline trainset
9. Init the study and run it
10. Visualize the study

In [None]:
# -------------#
#  Set device  #
# -------------#
sh.DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

In [None]:
#---------------------#
#  Load Study Config  #
#---------------------#
config = None
with open("./study.yaml") as file:
    config = yaml.safe_load(file)
print_yaml(f"Initial study config: {config}")

In [None]:
#----------------------#
#  Load Model Signals  #
#----------------------#
audit_signals_folder = config["fbd_study"]["audit_signals_folder"]
metadata, rescaled_target_logits, rescaled_shadow_model_logits, shadow_models_in_mask, target_in_mask, audit_data_indices = loadAudit(audit_signals_folder)

In [None]:
#-----------------------#
#  Update Study Config  #
#-----------------------#
print_yaml(f"Initial study config: {config['fbd_study']}")

train_metadata = metadata["trainCfg"]["train"]

config['fbd_study']["epochs"] = train_metadata["epochs"]
config['fbd_study']["batch_size"] = train_metadata["batch_size"]
config['fbd_study']["momentum"] = train_metadata["momentum"]
config['fbd_study']["learning_rate"] = train_metadata["learning_rate"]
config['fbd_study']["t_max"] = train_metadata["t_max"]
config['fbd_study']["weight_decay"] = train_metadata["weight_decay"]

print_yaml(f"Updated study config: {config['fbd_study']}")

In [None]:
#------------------------#
#  Select Shadow Models  #
#------------------------#
sm_count = config['fbd_study']['shadow_model_count']
print(f"Shape of rescaled_sm_logits: {rescaled_shadow_model_logits.shape}")
shadow_logits, shadow_inmask = get_shadow_signals(rescaled_shadow_model_logits, shadow_models_in_mask, sm_count)
print(f"Shape of selected_sm_logits: {shadow_logits.shape}")

In [None]:
#--------------------------------------------#
#  Calculate Vulnerability score using RMIA  #
#--------------------------------------------#
# This will by default be 8 as it achieves a strong attack result using few models, observed in Low-Cost High-Power Membership Inference Attacks page 7
baseline_vulnerability_sm_count = 8
baseline_vulnerability_shadow_logits, baseline_vulnerability_shadow_inmask = get_shadow_signals(rescaled_shadow_model_logits, shadow_models_in_mask, baseline_vulnerability_sm_count)

# As seen in 
x_indices = # x_indices will be 50% of the dataset used
z_indices = # 2500 are used as per the article, 2500 compared to 1250 gave ~0.68 better auc, 2500 compare to 6250 gve ~0.53% better auc
rmia_scores = rmia_vectorised(rescaled_target_logits, baseline_vulnerability_shadow_logits, baseline_vulnerability_shadow_inmask, x_indices=x_indices, z_indices=z_indices)

In [None]:
#----------------------------------#
#  Normalize Vulnerability Scores  #
#----------------------------------#
percentile = 2
norm_scores = percentile_score_normalization(rmia_scores, percentile)

In [None]:
#-------------------#
#  Prepare dataset  #
#-------------------#
from src.dataset_handler import processDataset, loadDataset
data_cfg = config['data']
trainset, testset = loadDataset(data_cfg)

# Will split the dataset to use the same in indices as the baseline target model
train_dataset, test_dataset, train_indices, test_indices = processDataset(data_cfg, trainset, testset, in_indices_mask=target_in_mask)

In [None]:
# ------------------------#
#        Run study        #
# ------------------------#
import src.save_load as sl

def run_optimization(config):
    study_cfg = config['fbd_study']
    
    metadata = sl.buildStudyMetadata(study_cfg, config['data'])
    _, save_path = sl.saveStudy(metadata, savePath=study_cfg['root'])
    
    journal_path = os.path.join(save_path, "journal.log")
    storage = JournalStorage(JournalFileBackend(file_path=journal_path))
    
    study = optuna.create_study(
        study_name=study_cfg["study_name"],
        storage=storage,
        load_if_exists=True,
        directions=["maximize", "minimize"]
    )
    
    func = lambda trial: sh.fbd_objective(trial, norm_scores, train_dataset, test_dataset,
                                       config, shadow_logits, shadow_inmask )
    
    study.optimize(func, n_trials=study_cfg["trials"])
    
    
    print(f"Study '{study_cfg['study_name']}' completed. Best value: {study.best_value}, params: {study.best_params}")
    
    df = study.trials_dataframe()
    df.to_csv(os.path.join(save_path, "results.csv"), index=False)
    print(f"ðŸ“„ Results saved to {os.path.join(save_path, 'results.csv')}")

    return study

study = None
if config is not None:
    study = run_optimization(config)

In [None]:
if study is not None:
    optuna.visualization.plot_pareto_front(study)
    optuna.visualization.plot_param_importances(study)
else:
    print("Study has not been run")