In [1]:
import sys
sys.path.append('../')
# only if you are using CUDA devices
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1"
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

from tensorflow.keras.backend import clear_session
from tensorflow.keras.utils import to_categorical
from conf import BASE_DIR
from Applications.Poisoning.gen_configs import main as gen_configs
from util import LoggedGradientTape, ModelTmpState, CSVLogger, measure_time, GradientLoggingContext
from Applications.Poisoning.unlearn.core import approx_retraining


model_folder = BASE_DIR/'models'/'poisoning'
train_conf = BASE_DIR/'Applications'/'Poisoning'/'configs'/'demo'/'train.json'
poison_conf = BASE_DIR/'Applications'/'Poisoning'/'configs'/'demo'/'poison.json'
unlearn_conf = BASE_DIR/'Applications'/'Poisoning'/'configs'/'demo'/'unlearn.json'

gen_configs(model_folder, train_conf, poison_conf, unlearn_conf)

from Applications.Poisoning.poison.poison_models import train_poisoned
from Applications.Poisoning.configs.demo.config import Config

poisoned_folder = model_folder/'budget-10000'/'seed-42'
clean_folder = model_folder/'clean'
first_unlearn_folder = model_folder/'budget-10000'/'seed-42'/'first-order'
second_unlearn_folder = model_folder/'budget-10000'/'seed-42'/'second-order'


poison_kwargs = Config.from_json(poisoned_folder/'poison_config.json')
train_kwargs = Config.from_json(poisoned_folder/'train_config.json')

poisoned_weights = poisoned_folder/'best_model.hdf5'       # model that has been trained on poisoned data
fo_repaired_weights = poisoned_folder/'fo_repaired.hdf5'   # model weights after unlearning (first-order)
so_repaired_weights = poisoned_folder/'so_repaired.hdf5'   # model weights after unlearning (second-order)
injector_path = poisoned_folder/'injector.pkl'             # cached injector for reproducibility
clean_results = model_folder/'clean'/'train_results.json'  # path to reference results on clean dataset

from Applications.Poisoning.unlearn.first_order import run_experiment as fo_experiment
from Applications.Poisoning.unlearn.second_order import run_experiment as so_experiment
from Applications.Poisoning.unlearn.common import get_delta_idx, batch_pred, plot_cm, evaluate_model_diff, unlearn_update
from Applications.Poisoning.unlearn.core import get_gradients_diff, get_inv_hvp_lissa


fo_unlearn_kwargs = Config.from_json(poisoned_folder/'first-order'/'unlearn_config.json')
so_unlearn_kwargs = Config.from_json(poisoned_folder/'second-order'/'unlearn_config.json')

from Applications.Poisoning.train import main as train
from Applications.Poisoning.evaluate import evaluate

# train one clean and one poisoned model
# datasets = ['Cifar10', 'Cifar100', 'SVHN', 'FashionMnist']
datasets = ['Cifar100', 'Cifar10', 'SVHN']
# modelnames = ['extractfeatures_VGG16', 'classifier_VGG16']
modelnames = ['VGG16']
# modelnames = ['VGG16', 'RESNET50']

2024-07-13 10:57:56.252731: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/cuda-12.2/lib64
2024-07-13 10:57:56.252757: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
from Applications.Poisoning.model import get_VGG16_CIFAR100, get_VGG16_CIFAR10, get_VGG16_SVHN
from Applications.Poisoning.dataset import Cifar10, SVHN, FashionMnist, Cifar100
from Applications.Poisoning.evaluate import evaluate
from Applications.Poisoning.poison.injector import LabelflipInjector
from os.path import dirname as parent
from util import UnlearningResult, reduce_dataset
import json
from sklearn.metrics import accuracy_score 
import numpy as np


def model_init():
    return get_VGG16_CIFAR10
def evaluate(model, data, weights_path):
    (x_train, y_train), (x_test, y_test), (x_val, y_val) = data
    model.load_weights(weights_path)
    y_pred = model.predict(x=x_test)
    y_pred = np.argmax(y_pred, axis=1)
    y_true = np.argmax(y_test, axis=1)
    acc = accuracy_score(y_true, y_pred)
    return acc

def get_flatten_index(model):
    for i, layer in enumerate(model.layers):
        if isinstance(layer, tf.keras.layers.Flatten):
            return i
    return -1

def update_weights_after_flatten(model, d_theta, tau, flatten_weights_start_index):
    updated_weights = []
    d_theta_index = 0
    for i, w in enumerate(model.trainable_weights):
        if i >= flatten_weights_start_index and d_theta_index < len(d_theta):
            updated_weights.append(w - tau * d_theta[d_theta_index])
            d_theta_index += 1
        else:
            updated_weights.append(w)
    return updated_weights

def update_weights_before_flatten(model, d_theta, tau, flatten_weights_start_index):
    updated_weights = []
    d_theta_index = 0

    for i, w in enumerate(model.trainable_weights):
        if i < flatten_weights_start_index and d_theta_index < len(d_theta):
            updated_weights.append(w - tau * d_theta[d_theta_index])
            d_theta_index += 1
        else:
            updated_weights.append(w)
    

current_data = Cifar10.load()
model = model_init()
dataset = 'Cifar10'
modelname = 'VGG16'
model_folder = poisoned_folder/'first-order'
update_targets = ['classifier', 'feature_extractor', 'both']


In [3]:
weights_path = model_folder/'_Cifar10_VGG16_repaired_model.hdf5'
evaluate(model=model(), data=current_data, weights_path=weights_path)

2024-07-13 10:59:20.913036: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_UNKNOWN: unknown error
2024-07-13 10:59:20.913093: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: conite-HP
2024-07-13 10:59:20.913105: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: conite-HP
2024-07-13 10:59:20.913313: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 555.42.2
2024-07-13 10:59:20.913352: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 555.42.2
2024-07-13 10:59:20.913362: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:310] kernel version seems to match DSO: 555.42.2
2024-07-13 10:59:20.913833: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical 

Loading weights from None


0.8448

## Unlearning

Preparing data

In [4]:
# fo_experiment(poisoned_folder/'first-order', train_kwargs, poison_kwargs, fo_unlearn_kwargs, dataset=dataset, modelname=modelname, update_target=update_target)
# inject label flips
(x_train, y_train), (x_test, y_test), (x_val, y_val) = current_data
verbose = 1
reduction = 1
order = 1
unlearn_kwargs = fo_unlearn_kwargs
y_train_orig = y_train.copy()
injector_path = os.path.join(model_folder, 'injector.pkl')
if os.path.exists(injector_path):
    injector = LabelflipInjector.from_pickle(injector_path)
else:
    injector = LabelflipInjector(parent(model_folder), **poison_kwargs)
x_train, y_train = injector.inject(x_train, y_train)
data = ((x_train, y_train), current_data[1], current_data[2])

# prepare unlearning data
(x_train,  y_train), _, _ = data
x_train, y_train, idx_reduced, delta_idx = reduce_dataset(
    x_train, y_train, reduction=reduction, delta_idx=injector.injected_idx)
if verbose:
    print(f">> reduction={reduction}, new train size: {x_train.shape[0]}")
y_train_orig = y_train_orig[idx_reduced]
data = ((x_train, y_train), data[1], data[2])

>> reduction=1, new train size: 50000


Set recording file names and logs

In [5]:
poisoned_filename = dataset+"_"+modelname+'_poisoned_model.hdf5'
repaired_filename = dataset+"_"+modelname+'_repaired_model.hdf5'

update_target = update_targets[0]
unlearning_result = UnlearningResult(model_folder, dataset, modelname+'_'+update_target)
poisoned_weights = os.path.join(parent(model_folder), poisoned_filename)
log_dir = model_folder

# check if unlearning has already been performed
if unlearning_result.exists:
    print(f"Unlearning results already exist for {modelname} {dataset}")
    exit()

# start unlearning hyperparameter search for the poisoned model
train_result = dataset+"_"+modelname+'_train_results.json'
with open(model_folder.parents[2]/'clean'/train_result, 'r') as f:
    clean_acc = json.load(f)['accuracy']
repaired_filepath = os.path.join(model_folder, repaired_filename)
cm_dir = os.path.join(model_folder, 'cm')
os.makedirs(cm_dir, exist_ok=True)
unlearn_kwargs['order'] = order
clear_session()

In [None]:
(x_train, y_train), (x_test, y_test), (x_valid, y_valid) = data
params = np.sum(np.product([ xi for xi in x.shape]) for x in model().trainable_variables).item()

new_theta, diverged, logs, duration_s = unlearn_update(
        x_train, y_train, y_train_orig, delta_idx, model(), x_valid, y_valid, unlearn_kwargs, verbose=verbose, cm_dir=cm_dir, log_dir=log_dir, update_target=update_target)

z_x, z_y, z_y_delta, x_val, y_val = x_train, y_train, y_train_orig, x_valid, y_valid

model_weights = poisoned_weights
new_theta = model_weights
new_model = model_init()
new_model().set_weights(new_theta)
logs = LoggedGradientTape.logs['unlearn']

if repaired_filepath is not None:
        new_model.save_weights(repaired_filepath)
acc_before, acc_after, diverged = evaluate_model_diff(
        model, new_model, x_valid, y_valid, diverged, verbose, clean_acc)

acc_perc_restored = (acc_after - acc_before) / (clean_acc - acc_before)
unlearning_duration_s = duration_s
print(f'Acc_after : {acc_after} \n Acc_before : {acc_before}, \n diverge => {diverged}')
unlearning_result.update({
        'acc_clean': clean_acc,
        'acc_before_fix': acc_before,
        'acc_after_fix': acc_after,
        'acc_perc_restored': acc_perc_restored,
        'diverged': diverged,
        'n_gradients': sum(logs),
        'unlearning_duration_s': unlearning_duration_s,
        'num_params': params
    })
unlearning_result.save()

In [None]:
model_folder = poisoned_folder/'first-order'
modelname = 'VGG16'
dataset = 'Cifar10'
data = Cifar10.load()
model = model_init()
poisoned_filename = dataset+"_"+modelname+'_poisoned_model.hdf5'
repaired_filename = dataset+"_"+modelname+'_repaired_model.hdf5'

update_targets = ['classifier', 'feature_extractor']
orders = [1, 2]
for order in orders:
    for update_target in update_targets:
        model, data = unlearn_model(update_target, model_folder, dataset, modelname, parent, poisoned_filename, repaired_filename, order, data)    
        print(f"Evaluation VGG CIFAR  update_target : {update_target} # order : {order}")
        evaluate(model=model(), data=current_data)