In [0]:
# Install necessary libraries
%pip install transformers openai torch tqdm numpy datasets accelerate matplotlib

In [0]:
# Imports
import logging
logging.basicConfig(level='ERROR')
import numpy as np
from pathlib import Path
# import openai
import torch
import zlib
import os
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig, AutoModel
from tqdm import tqdm
import numpy as np
from datasets import load_dataset
from eval import *
from args import *
from model_utils import *
from unlearning import *
from accelerate import init_empty_weights, infer_auto_device_map


In [0]:
# Helper functions for evaluating
def inference(model1, model2, tokenizer1, tokenizer2, text, ex, modelname1, modelname2):
    pred = {}

    p1, all_prob, p1_likelihood = calculatePerplexity(text, model1, tokenizer1, gpu=model1.device)
    p_lower, _, p_lower_likelihood = calculatePerplexity(text.lower(), model1, tokenizer1, gpu=model1.device)

    p_ref, all_prob_ref, p_ref_likelihood = calculatePerplexity(text, model2, tokenizer2, gpu=model2.device)
   
    # ppl
    pred["ppl"] = p1
    # Ratio of log ppl of large and small models
    # pred["ppl/Ref_ppl (calibrate PPL to the reference model)"] = p1_likelihood-p_ref_likelihood

    print(f'p1: {p1}')
    print(f'pref: {p_ref}')
    MIM = (p1 - p_ref)/p_ref

    print(f'MIM: {MIM}')
    pred["MIM"] = MIM

    # Ratio of log ppl of lower-case and normal-case
    pred["ppl/lowercase_ppl"] = -(np.log(p_lower) / np.log(p1)).item()
    # Ratio of log ppl of large and zlib
    zlib_entropy = len(zlib.compress(bytes(text, 'utf-8')))
    pred["ppl/zlib"] = np.log(p1)/zlib_entropy
    # min-k prob
    for ratio in [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6]:
        k_length = int(len(all_prob)*ratio)
        topk_prob = np.sort(all_prob)[:k_length]
        pred[f"Min_{ratio*100}% Prob"] = -np.mean(topk_prob).item()

    ex["pred"] = pred
    return ex

def evaluate_data(test_data, model1, model2, tokenizer1, tokenizer2, col_name, modelname1, modelname2):
    print(f"all data size: {len(test_data)}")
    all_output = []
    test_data = test_data
    for ex in tqdm(test_data): 
        text = ex[col_name]
        new_ex = inference(model1, model2, tokenizer1, tokenizer2, text, ex, modelname1, modelname2)
        all_output.append(new_ex)
    return all_output


In [0]:
output_dir = "experiment_output/pythia-2.8b"

experiment_args = ExperimentArgs(
        target_model = 'EleutherAI/pythia-2.8b',
        unlearn_model = 'EleutherAI/pythia-2.8b',
        output_dir = output_dir,
        key_name = 'input',
        data = 'swj0419/WikiMIA',
        length = 128,
        save_loss = True
    )

unlearning_args = UnlearningArgs(
        lr=1e-6,
        epochs = 1,
        col_name = "input",
        save_model = True,
        only_members = True,
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    )

Path(output_dir).mkdir(parents=True, exist_ok=True)

In [0]:
# Load base model
# model1, tokenizer1 = load_base_model(experiment_args.target_model)

In [0]:
# Load unlearned model
model2, tokenizer2 = load_unlearned_model(experiment_args, unlearning_args)

In [0]:
if "jsonl" in experiment_args.data:
  data = load_jsonl(f"{experiment_args.data}")
else: # load data from huggingface
  dataset = load_dataset(experiment_args.data, split=f"WikiMIA_length{experiment_args.length}")
  data = convert_huggingface_data_to_list_dic(dataset)

In [0]:
# out_text_model1 = generate_text(model1, tokenizer1, "What is the diameter of the sun?")
# print(f"Model 1 output: {out_text_model1}\n")
out_text_model2 = generate_text(model2, tokenizer2, "What is the diameter of the sun?")
print(f"Model 2 output: {out_text_model2}\n")

In [0]:
all_output = evaluate_data(data, model1, model2, tokenizer1, tokenizer2, experiment_args.key_name, experiment_args.target_model, experiment_args.unlearn_model)

In [0]:
fig_fpr_tpr(all_output, experiment_args.output_dir)