In [None]:
import optuna
import pandas as pd
import numpy as np
import yaml
import pickle
import torch
import os
import matplotlib.pyplot as plt

from torchvision.datasets import CIFAR10
from torchvision import transforms
from torch import tensor, cat, save, load, optim, nn
from torch.utils.data import DataLoader


from optuna.storages import JournalStorage
from optuna.storages.journal import JournalFileBackend

import src.study_handler as sh
from src.utils import print_yaml, get_shadow_signals, calculate_tauc
from LeakPro.leakpro.attacks.mia_attacks.rmia import rmia_vectorised, rmia_get_gtlprobs
from src.save_load import loadTargetSignals, loadShadowModelSignals
from src.models.resnet18_model import ResNet18
from src.optimize_fbd_model import parallell_optimization, FbdArgs

In [None]:
# -------------#
#  Set device  #
# -------------#
sh.DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {sh.DEVICE}")

In [None]:
#---------------------#
#  Load Study Config  #
#---------------------#
config = None
with open("./study.yaml") as file:
    config = yaml.safe_load(file)
print_yaml(f"Initial study config: {config}")

In [None]:
#----------------------#
#  Load Model Signals  #
#----------------------#
target_folder = config["fbd_study"]["target_folder"]

# Load target signals
target_logits, target_inmask, metadata = loadTargetSignals(target_folder)
print(f"Target logits and inmask shapes: {target_logits.shape}, {target_inmask.shape}")

# Load processed shadow model signals
sm_logits, sm_inmask = loadShadowModelSignals(target_folder)
print(f"Target logits and inmask shapes: {sm_logits.shape}, {sm_inmask.shape}")

In [None]:
#-----------------------#
#  Update Study Config  #
#-----------------------#
print_yaml("Initial study config:")
print_yaml(config['fbd_study'])
train_metadata = metadata["train"]

config['fbd_study']["epochs"] = train_metadata["epochs"]
config['fbd_study']["batch_size"] = train_metadata["batch_size"]
config['fbd_study']["momentum"] = train_metadata["momentum"]
config['fbd_study']["learning_rate"] = train_metadata["learning_rate"]
config['fbd_study']["t_max"] = train_metadata["t_max"]
config['fbd_study']["weight_decay"] = train_metadata["weight_decay"]

print_yaml("\nUpdated study config:")
print_yaml(config['fbd_study'])

In [None]:
#------------------------#
#  Select Shadow Models  #
#------------------------#
sm_count = config['fbd_study']['shadow_model_count']
print(f"Randomly selecting {sm_count} to be used for the study.")
shadow_logits, shadow_inmask = get_shadow_signals(sm_logits, sm_inmask, sm_count)
print(f"Shape of selected_sm_logits: {shadow_logits.shape}")

In [None]:
#-------------------#
#  Prepare dataset  #
#-------------------#
from src.dataset_handler import processDataset, loadDataset
data_cfg = config['data']
trainset, testset = loadDataset(data_cfg)

# Will split the dataset to use the same in indices as the baseline target model
train_dataset, test_dataset, train_indices, test_indices = processDataset(data_cfg, trainset, testset, in_indices_mask=target_inmask)

# Retrieve the targets
full_dataset = train_dataset.dataset
labels = full_dataset.targets
print(f"Length of dataset targets/labels: {len(labels)}")
print(f"First 10 targets/labels: {labels[:10]}")

In [None]:
#--------------------------------------------#
#  Calculate Vulnerability score using RMIA  #
#--------------------------------------------#
# Calculate the GTL Probabilities for shadow model logits
N, M, C = shadow_logits.shape
shadow_gtl_probs_list = []

for m in range(M):
    model_logits = shadow_logits[:, m, :]  # shape (N, C)
    probs = rmia_get_gtlprobs(model_logits, labels)
    shadow_gtl_probs_list.append(probs)
    print(f"{len(shadow_gtl_probs_list)} shadow gtl probs calculated")

shadow_gtl_probs = np.stack(shadow_gtl_probs_list, axis=1)  # shape = (N, M)

# Calculate the GTL Probabilities for the target logits
target_gtl_probs = rmia_get_gtlprobs(target_logits, labels)
print(f"Target gtl_probs: {target_gtl_probs[:10]}, shape: {target_gtl_probs.shape}")

rmia_scores = rmia_vectorised(target_gtl_probs, shadow_gtl_probs, shadow_inmask, online=True, use_gpu_if_available=True)

#----------------------------#
#  Calculate Reference TAUC  #
#----------------------------#
# Reference tail AUC at fpr=0.1
tauc_ref = calculate_tauc(rmia_scores, target_inmask, fpr=0.1)
print(f"tauc_ref@(0.1): {tauc_ref}")

In [None]:
# Visualize the RMIA score distribution
plt.hist(rmia_scores, bins=60)
plt.show()

In [None]:
# ------------------------#
#        Run study        #
# ------------------------#

fbd_args = FbdArgs(
    rmia_scores=rmia_scores,
    train_dataset=train_dataset,
    test_dataset=test_dataset,
    shadow_gtl_probs=shadow_gtl_probs,
    shadow_inmask=shadow_inmask,
    target_inmask=target_inmask,
    tauc_ref=tauc_ref,
)

# Specify which gpus to be used 
gpu_ids = [0, 1]

study = None 
if config is not None: 
    study = parallell_optimization(config, labels, gpu_ids, fbd_args)

In [None]:
# ------------------------#
#        Run study      COPY      #
# ------------------------#
import src.save_load as sl

def run_optimization(config, gpu_id, trials, save_path): 
    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
    
    study_cfg = config['fbd_study']

    # Parallell storage setup
    db_path = os.path.join(study_cfg['root'], "fbd_study.db")
    storage = f"sqlite:///{db_path}"
    
    study = optuna.create_study(
        study_name=study_cfg["study_name"],
        storage=storage,
        load_if_exists=True,
        directions=["minimize", "maximize"]
    )
    
    func = lambda trial: sh.fbd_objective(trial, rmia_scores, train_dataset, test_dataset,
                                       config, shadow_gtl_probs, shadow_inmask,
                                       target_inmask, tauc_ref, gpu_id, save_path)
    
    study.optimize(func, n_trials=trials)
    
    print(f"Study '{study_cfg['study_name']}' completed on GPU {gpu_id}.")
    df = study.trials_dataframe() 
    df.to_csv(os.path.join(save_path, f"results_gpu_{gpu_id}.csv"), index=False) 
    print(f"ðŸ“„ Results saved to {os.path.join(save_path, f'results_gpu_{gpu_id}.csv')}")

def parallell_optimization(config): 
    study_cfg = config['fbd_study'] 

    metadata = sl.buildStudyMetadata(study_cfg, config['data']) 
    _, save_path = sl.saveStudy(metadata, savePath=study_cfg['root'], labels=labels) 
    # Specify which gpus to be used 
    gpu_ids = [0, 1]
    assert study_cfg["trials"] % len(gpu_ids) == 0, f"amount of trials {study_cfg['trials']} cannot be equally split among {len(gpu_ids)}"
    trials = study_cfg["trials"] // len(gpu_ids)
    processes = [multiprocessing.Process(target=run_optimization, args=(config, gpu_id, trials, save_path)) for gpu_id in gpu_ids] 
    for p in processes:
        p.start() 
    for p in processes:
        p.join()
        
    db_path = os.path.join(study_cfg['root'], "fbd_study.db")
    storage = f"sqlite:///{db_path}"
    study = optuna.load_study(study_name=study_cfg["study_name"], storage=storage)
    return study
        
study = None 
if config is not None: 
    study = parallell_optimization(config)

In [None]:
load_from_journal = False

if load_from_journal:
    journal_path = "study/cifar10-resnet-fbd-lira-aa5cec7773/journal.log"
    
    # Re-open storage
    storage = JournalStorage(JournalFileBackend(journal_path))
    
    # Load the study by name (must match what was used originally)
    study = optuna.load_study(
        study_name="cifar10-resnet-fbd",
        storage=storage
    )
    print("study loaded: cifar10-resnet-fbd")

if study is not None:
    print("visualizing study")
    fig1 = optuna.visualization.plot_pareto_front(study)
    fig1.update_layout(xaxis_title="Ï„@0.1", yaxis_title="Accuracy")
    fig1.show()

    fig2 = optuna.visualization.plot_param_importances(study)
    fig2.show()
else:
    print("Study has not been run")