In [None]:
import optuna
import pandas as pd
import numpy as np
import yaml
import pickle
import torch
import os
import matplotlib.pyplot as plt

import src.study_handler as sh
from src.utils import print_yaml, get_shadow_signals, calculate_tauc
from LeakPro.leakpro.attacks.mia_attacks.rmia import rmia_vectorised, rmia_get_gtlprobs
from src.save_load import loadTargetSignals, loadShadowModelSignals
from src.models.resnet18_model import ResNet18
from src.optimize_fbd_model import parallell_optimization
from src.dataclasses import FbdArgs
from src.dataset_handler import processDataset, loadDataset

In [None]:
#---------------------#
#  Load Study Config  #
#---------------------#
config = None
with open("./study_fbd.yaml") as file:
    config = yaml.safe_load(file)
print_yaml(config)

In [None]:
#----------------------#
#  Load Model Signals  #
#----------------------#
target_folder = config["fbd_study"]["target_folder"]

# Load target signals
target_logits, target_inmask, resc_logits, target_gtl_probs, metadata, metadata_pkl = loadTargetSignals(target_folder)

# Load processed shadow model signals
what_to_load = {
    "logits": False,
    "resc_logits": False,
    "gtl_probs": True,
    "in_mask": True,
    "metadata_pkl": False
}
# The missing indices variable will contain a list of all indices not found up to the set amount of shadow models
shadow_logits, sm_resc_logits, shadow_gtl_probs, shadow_inmask, sm_metadata_pkl, missing_indices = loadShadowModelSignals(target_folder, what_to_load)

In [None]:
#-----------------------#
#  Update Study Config  #
#-----------------------#
print_yaml("------------ Initial study config: ------------")
print_yaml(config['fbd_study'])
train_metadata = metadata["train"]

config['fbd_study']["epochs"] = train_metadata["epochs"]
config['fbd_study']["batch_size"] = train_metadata["batch_size"]
config['fbd_study']["momentum"] = train_metadata["momentum"]
config['fbd_study']["learning_rate"] = train_metadata["learning_rate"]
config['fbd_study']["t_max"] = train_metadata["t_max"]
config['fbd_study']["weight_decay"] = train_metadata["weight_decay"]
config['fbd_study']["model"] = train_metadata["model"]
config['fbd_study']["optimizer"] = train_metadata["optimizer"]
config['fbd_study']["drop_rate"] = train_metadata["drop_rate"] 

# Build the study name
config["fbd_study"]["study_name"] = f'{config["data"]["dataset"]}-{train_metadata["model"]}-fbd'


print_yaml("\n------------ Updated study config: ------------")
print_yaml(config['fbd_study'])

In [None]:
#------------------------#
#  Select Shadow Models  #
#------------------------#
sel_sm = False
if sel_sm:
    sm_count = config['fbd_study']['shadow_model_count']
    print(f"Randomly selecting {sm_count} to be used for the study.")
    shadow_logits, shadow_inmask = get_shadow_signals(shadow_logits, shadow_inmask, sm_count)
    print(f"Shape of selected_sm_logits: {shadow_logits.shape}")

In [None]:
#-------------------#
#  Prepare dataset  #
#-------------------#
data_cfg = config['data']
trainset, testset, full_dataset = loadDataset(data_cfg)

# Will split the dataset to use the same in indices as the baseline target model
train_dataset, test_dataset, train_indices, test_indices = processDataset(data_cfg, trainset, testset, in_indices_mask=target_inmask, dataset=full_dataset)

# Retrieve the targets
full_dataset = train_dataset.dataset
labels = full_dataset.targets
print(f"Length of dataset targets/labels: {len(labels)}")
print(f"First 10 targets/labels: {labels[:10]}")

In [None]:
#--------------------------------------------#
#  Calculate Vulnerability score using RMIA  #
#--------------------------------------------#
# Calculate the GTL Probabilities for shadow model logits
calc_sm_gtl = False
if calc_sm_gtl:
    N, M, C = shadow_logits.shape
    shadow_gtl_probs_list = []
    
    for m in range(M):
        model_logits = shadow_logits[:, m, :]  # shape (N, C)
        probs = rmia_get_gtlprobs(model_logits, labels)
        shadow_gtl_probs_list.append(probs)
        print(f"{len(shadow_gtl_probs_list)} shadow gtl probs calculated")

    shadow_gtl_probs = np.stack(shadow_gtl_probs_list, axis=1)  # shape = (N, M)
print(f"Shadow Model gtl probs shape: {shadow_gtl_probs.shape}")

# Calculate the GTL Probabilities for the target logits
calc_targ_gtl = False
if calc_targ_gtl:
    target_gtl_probs = rmia_get_gtlprobs(target_logits, labels)
print(f"Target gtl_probs: {target_gtl_probs[:10]}, shape: {target_gtl_probs.shape}")

# Calulate the reference RMIA score
rmia_scores = rmia_vectorised(target_gtl_probs, shadow_gtl_probs, shadow_inmask, online=True, use_gpu_if_available=True)

#----------------------------#
#  Calculate Reference TAUC  #
#----------------------------#
# Reference tail AUC at fpr=0.1
tauc_ref = calculate_tauc(rmia_scores, target_inmask, fpr=0.1)
print(f"tauc_ref@(0.1): {tauc_ref}")

In [None]:
# ------------------------#
#        Run study        #
# ------------------------#

fbd_args = FbdArgs(
    rmia_scores=rmia_scores,
    train_dataset=train_dataset,
    test_dataset=test_dataset,
    shadow_gtl_probs=shadow_gtl_probs,
    shadow_inmask=shadow_inmask,
    target_inmask=target_inmask,
    tauc_ref=tauc_ref,
)

gpu_ids = [0, 1, 2, 3, 4, 5, 6]

if config is not None: 
    study = parallell_optimization(config, labels, fbd_args, gpu_ids, study_hash=None)

In [None]:
load_from_db = True
if load_from_db:
    study_cfg = config['fbd_study'] 
    db_path = os.path.join(study_cfg['root'], "fbd_study.db")
    storage = f"sqlite:///{db_path}"
    study = optuna.load_study(study_name="cifar10-resnet-fbd-815409b641", storage=storage)

if study is not None:
    print("visualizing study")
    fig1 = optuna.visualization.plot_pareto_front(study)
    fig1.update_layout(xaxis_title="Ï„@0.1", yaxis_title="Accuracy")
    fig1.show()

    fig2 = optuna.visualization.plot_param_importances(study)
    fig2.show()
else:
    print("Study has not been run")