In [None]:
import optuna
import pandas as pd
import numpy as np
import yaml
import pickle
import torch
import os
import matplotlib.pyplot as plt

from torchvision.datasets import CIFAR10
from torchvision import transforms
from torch import tensor, cat, save, load, optim, nn
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from src.models.resnet18_model import ResNet18

from optuna.storages import JournalStorage
from optuna.storages.journal import JournalFileBackend

import src.study_handler as sh
from src.utils import print_yaml, get_shadow_signals, percentile_score_normalization, rescale_logits
from LeakPro.leakpro.attacks.mia_attacks.rmia import rmia_vectorised, rmia_get_gtlprobs
from src.save_load import loadTargetSignals, loadShadowModelSignals

In [None]:
1. set device
2. Load the study.yaml which contains the fbd_study key
3. load baseline audit signals, shadow models logits, sm inmask and metadata
4. Use metadata to update the training part of fbd_study
5. Select sm to be used
6. TODO Calc the vulnerability of the baseline model
7. Normalize the vulnerability score
8. Prepare the dataset using the baseline inmask to make sure we train on the baseline trainset
9. Init the study and run it
10. Visualize the study

In [None]:
# -------------#
#  Set device  #
# -------------#
sh.DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {sh.DEVICE}")

In [None]:
#---------------------#
#  Load Study Config  #
#---------------------#
config = None
with open("./study.yaml") as file:
    config = yaml.safe_load(file)
print_yaml(f"Initial study config: {config}")

In [None]:
#----------------------#
#  Load Model Signals  #
#----------------------#
target_folder = config["fbd_study"]["target_folder"]

# Load target signals
target_logits, target_inmask, metadata = loadTargetSignals(target_folder)
print(f"Target logits and inmask shapes: {target_logits.shape}, {target_inmask.shape}")

# Load processed shadow model signals
sm_logits, sm_inmask = loadShadowModelSignals(target_folder)
print(f"Target logits and inmask shapes: {sm_logits.shape}, {sm_inmask.shape}")

In [None]:
#-----------------------#
#  Update Study Config  #
#-----------------------#
print_yaml("Initial study config:")
print_yaml(config['fbd_study'])
train_metadata = metadata["train"]

config['fbd_study']["epochs"] = train_metadata["epochs"]
config['fbd_study']["batch_size"] = train_metadata["batch_size"]
config['fbd_study']["momentum"] = train_metadata["momentum"]
config['fbd_study']["learning_rate"] = train_metadata["learning_rate"]
config['fbd_study']["t_max"] = train_metadata["t_max"]
config['fbd_study']["weight_decay"] = train_metadata["weight_decay"]

print_yaml("\nUpdated study config:")
print_yaml(config['fbd_study'])

In [None]:
#------------------------#
#  Select Shadow Models  #
#------------------------#
sm_count = config['fbd_study']['shadow_model_count']
print(f"Randomly selecting {sm_count} to be used for the study.")
shadow_logits, shadow_inmask = get_shadow_signals(sm_logits, sm_inmask, sm_count)
print(f"Shape of selected_sm_logits: {shadow_logits.shape}")

In [None]:
#-------------------#
#  Prepare dataset  #
#-------------------#
from src.dataset_handler import processDataset, loadDataset
data_cfg = config['data']
trainset, testset = loadDataset(data_cfg)

# Will split the dataset to use the same in indices as the baseline target model
train_dataset, test_dataset, train_indices, test_indices = processDataset(data_cfg, trainset, testset, in_indices_mask=target_inmask)

# Retrieve the targets
full_dataset = train_dataset.dataset
targets = full_dataset.targets
print(f"Length of dataset targets/labels: {len(targets)}")
print(f"First 10 targets/labels: {targets[:10]}")

In [None]:
#--------------------------------------------#
#  Calculate Vulnerability score using RMIA  #
#--------------------------------------------#
# Calculate the GTL Probabilities for shadow model logits
N, M, C = shadow_logits.shape
shadow_gtl_probs_list = []

for m in range(M):
    model_logits = shadow_logits[:, m, :]  # shape (N, C)
    probs = rmia_get_gtlprobs(model_logits, targets)
    shadow_gtl_probs_list.append(probs)
    print(f"{len(shadow_gtl_probs_list)} shadow gtl probs calculated")

shadow_gtl_probs = np.stack(shadow_gtl_probs_list, axis=1)  # shape = (N, M)

# Calculate the GTL Probabilities for the target logits
target_gtl_probs = rmia_get_gtlprobs(target_logits, targets)
print(f"Target gtl_probs: {target_gtl_probs[:10]}, shape: {target_gtl_probs.shape}")

rmia_scores = rmia_vectorised(target_gtl_probs, shadow_gtl_probs, shadow_inmask, online=True, use_gpu_if_available=True)

In [None]:
#---------------------------------------------#
#  Calculate Rescaled shadow logits for LiRA  #
#---------------------------------------------#
rescaled_shadow_logits = np.zeros((N, M))  # Each column = rescaled logits for one shadow model

for m in range(M):
    rescaled_shadow_logits[:, m] = rescale_logits(shadow_logits[:, m, :], targets)

print(f"rescaled_shadow_logits shape: {rescaled_shadow_logits.shape}")

In [None]:
# Visualize the RMIA score distribution
plt.hist(rmia_scores, bins=60)
plt.show()

In [None]:
# ------------------------#
#        Run study        #
# ------------------------#
import src.save_load as sl

def run_optimization(config):
    study_cfg = config['fbd_study']
    
    metadata = sl.buildStudyMetadata(study_cfg, config['data'])
    _, save_path = sl.saveStudy(metadata, savePath=study_cfg['root'])
    
    journal_path = os.path.join(save_path, "journal.log")
    storage = JournalStorage(JournalFileBackend(file_path=journal_path))
    
    study = optuna.create_study(
        study_name=study_cfg["study_name"],
        storage=storage,
        load_if_exists=True,
        directions=["maximize", "minimize"]
    )
    
    func = lambda trial: sh.fbd_objective(trial, rmia_scores, train_dataset, test_dataset,
                                       config, rescaled_shadow_logits, shadow_inmask, target_inmask)
    
    study.optimize(func, n_trials=study_cfg["trials"])
    
    
    print(f"Study '{study_cfg['study_name']}' completed. Best value: {study.best_values}, params: {study.best_params}")
    
    df = study.trials_dataframe()
    df.to_csv(os.path.join(save_path, "results.csv"), index=False)
    print(f"ðŸ“„ Results saved to {os.path.join(save_path, 'results.csv')}")

    return study

study = None
if config is not None:
    study = run_optimization(config)

In [None]:
load_from_journal = True

if load_from_journal:
    journal_path = "study/cifar10-resnet-fbd-lira-aa5cec7773/journal.log"
    
    # Re-open storage
    storage = JournalStorage(JournalFileBackend(journal_path))
    
    # Load the study by name (must match what was used originally)
    study = optuna.load_study(
        study_name="cifar10-resnet-fbd-lira",
        storage=storage
    )
    print("study loaded: cifar10-resnet-fbd-lira")

if study is not None:
    print("visualizing study")
    fig1 = optuna.visualization.plot_pareto_front(study)
    fig1.show()

    fig2 = optuna.visualization.plot_param_importances(study)
    fig2.show()
else:
    print("Study has not been run")