In [None]:
!pip install -q transformers datasets rouge_score

In [None]:
import os
import json
import glob
import torch
import random
import shutil
import argparse
import datasets
import numpy as np
import pandas as pd

from tqdm import tqdm
from pathlib import Path
from accelerate import Accelerator
from collections import defaultdict
from statistics import mean, harmonic_mean
from rouge_score import rouge_scorer
from sklearn.metrics import roc_curve, auc
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## args

In [None]:
data_path = '/content/drive/MyDrive/GIL/Unlearning/data/validation/'
checkpoint_path = 'allenai/OLMo-1B-0724-hf'
mia_data_path = '/content/drive/MyDrive/GIL/Unlearning/mia_data/'
output_dir = '/content/drive/MyDrive/GIL/Unlearning/'
model = 'allenai/OLMo-1B-0724-hf' #allenai/OLMo-1B-0724-hf'
max_new_tokens = 256
ntrain = 5
data_dir = 'cais/mmlu'#https://huggingface.co/datasets/cais/mmlu
save_dir = '/content/drive/MyDrive/GIL/Unlearning/results_mmlu/'
n_instances = 5
eval_batch_size = 1
hf_revision = None
load_in_8bit = False #Porque lo tenemos en 4

debug = False
keep_files = False
compute_metrics_only = False

## Functions

In [None]:
def inference( model, tokenizer):
    forget_file = data_path + 'forget.jsonl'
    retain_file = data_path + 'retain.jsonl'

    accelerator = Accelerator()
    model.to(accelerator.device)

    for split, train_file in tqdm([('retain', retain_file), ('forget', forget_file)]):
        print('\nlen retain_file', len(retain_file))
        print('len forget_file', len(forget_file))
        data_files = {}
        dataset_args = {}
        if train_file is not None:
            data_files["train"] = train_file
        raw_datasets = datasets.load_dataset(
            "json",
            data_files=data_files,
            **dataset_args,
        )
        train_dataset = raw_datasets["train"]

        output_dic = defaultdict(lambda :{'id': [], 'task': [], 'input': [], 'expected_output': [], 'model_output': [], 'nll': []})

        with accelerator.split_between_processes(train_dataset, apply_padding=True) as data:
          #Agregué esto, no se si sea lo correcto
          print('len data[input]',len(data["input"]))
          for i in range(len(data["input"])):
              questions, answers = list(data["input"][0].values()), list(data["output"][0].values())
              id = list(data["id"][0].values())
              task = list(data["task"][0].values())
              for idx in range(len(questions)):
         #----------
                  #question, answer = data[idx]["input"], data[idx]["output"]
                  question, answer = str(questions[idx]), str(answers[idx])
                  #print(question, type(question), type(answer))
                  output_dic[accelerator.process_index]['id'].append(id[idx])
                  output_dic[accelerator.process_index]['task'].append(task[idx])
                  output_dic[accelerator.process_index]['input'].append(questions[idx])
                  output_dic[accelerator.process_index]['expected_output'].append(answer)
                  input_ids = tokenizer(
                      question,
                      return_tensors='pt',
                      padding=True
                  ).input_ids.to(model.device)

                  combined_input_ids = tokenizer(
                      question+answer,
                      return_tensors='pt',
                  ).input_ids.to(model.device)
                  combined_target_ids = combined_input_ids.clone()
                  combined_target_ids[:,:len(input_ids[0])] = -100

                  with torch.no_grad():
                      out = model.generate(input_ids, max_new_tokens=max_new_tokens, do_sample=False, use_cache=True, pad_token_id=tokenizer.eos_token_id)
                      output_ids = out[:, len(input_ids[0]):]
                      output = tokenizer.batch_decode(
                          output_ids,
                          skip_special_tokens=True,
                          clean_up_tokenization_spaces=True)[0]
                      output_dic[accelerator.process_index]['model_output'].append(output)

                      # For Perplexity
                      out = model(combined_input_ids, labels=combined_target_ids)
                      if debug:
                          print(tokenizer.batch_decode(
                              torch.argmax(
                                  torch.nn.functional.softmax(
                                      torch.tensor(out.logits),
                                      dim=2),
                                  dim=2)[:, len(input_ids[0]):],
                              skip_special_tokens=True,
                              clean_up_tokenization_spaces=True)[0])
                      neg_log_likelihood = out.loss.item()
                      output_dic[accelerator.process_index]['nll'].append(neg_log_likelihood)

        accelerator.wait_for_everyone()

        #if args.debug:
        #    print([len(value) for value in output_dic[accelerator.process_index].values()])
        output_df = pd.DataFrame.from_dict(output_dic[accelerator.process_index])
        output_file_name = f"{output_dir}/{split}_{accelerator.process_index}.csv"
        output_df.to_csv(output_file_name, index=False)

In [None]:
def mia_attacks(model, tokenizer):
    member_file = mia_data_path + 'member.jsonl'
    nonmember_file = mia_data_path + 'nonmember.jsonl'

    accelerator = Accelerator()
    model.to(accelerator.device)

    for dataset, train_file in [('member', member_file), ('nonmember', nonmember_file)]:
        data_files = {}
        dataset_args = {}
        if train_file is not None:
            data_files["train"] = train_file
        raw_datasets = datasets.load_dataset(
            "json",
            data_files=data_files,
            **dataset_args,
        )
        train_dataset = raw_datasets["train"]

        output_dic = defaultdict(lambda :{'id': [], 'nll': []})

        with accelerator.split_between_processes(train_dataset, apply_padding=True) as data:
            for idx in tqdm(range(len(data['document']))):
                document = data["document"][idx]
                output_dic[accelerator.process_index]['id'].append(data["id"][idx])
                input_ids = tokenizer(
                    document,
                    return_tensors='pt'
                ).input_ids.to(model.device)

                target_ids = input_ids.clone()

                with torch.no_grad():
                    out = model(input_ids, labels=target_ids)
                    neg_log_likelihood = out.loss.item()
                    output_dic[accelerator.process_index]['nll'].append(neg_log_likelihood)

        accelerator.wait_for_everyone()

        output_df = pd.DataFrame.from_dict(output_dic[accelerator.process_index])

        #results_dir = os.path.join(args.output_dir, 'mia_results')
        #Path(results_dir).mkdir(parents=True, exist_ok=True)
        output_file_name = f"{output_dir}/mia_results/{dataset}_{accelerator.process_index}.csv"
        #if args.debug:
        #    print('Saving to: ', output_file_name)
        output_df.to_csv(output_file_name, index=False)

In [None]:
def compute_auc(member_loss, nonmember_loss):
    assert not np.any(np.isnan(member_loss))
    assert not np.any(np.isnan(nonmember_loss))
    combined_loss = member_loss + nonmember_loss
    combined_loss = -1 * np.array(combined_loss)
    combined_labels = len(member_loss) * [1] + len(nonmember_loss) * [0]
    fp, tp, _ = roc_curve(combined_labels, combined_loss)

    auc_score = float(auc(fp, tp))

    return auc_score

In [None]:
def compute_metrics():
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)

    results = {}
    aggregate_scores_list = []
    for split in ['forget', 'retain']:
        #files = glob.glob(output_dir + '/{}_*.csv'.format(split))
        #if len(files) == 0:
        #    print("[ERROR] Missing inference files, rerun script with inference first")
        #    return  # sys.exit(1) throws a long traceback so just return for now
        files = [f'{output_dir}/forget_0.csv', f'{output_dir}/retain_0.csv']
        df_list = [pd.read_csv(f) for f in files]
        #if not args.keep_files:
        #    _ = [os.remove(f) for f in files]
        df = pd.concat(df_list, ignore_index=True)

        df['regurgitation-score-rouge-1'] = None
        df['regurgitation-score'] = None
        df['knowledge-score'] = None
        ground_truths = df['expected_output'].tolist()
        gen_outputs = df['model_output'].tolist()

        for i, (gen, gt) in enumerate(zip(gen_outputs, ground_truths)):
            if df.loc[i, 'id'][:-1].endswith('sc'):
                rouge_scores = scorer.score(str(gt), str(gen))
                df.loc[i, 'regurgitation-score-rouge-1'] = rouge_scores['rouge1'].recall
                df.loc[i, 'regurgitation-score'] = rouge_scores['rougeL'].recall
            elif df.loc[i, 'id'][:-1].endswith('qa'):
                 df.loc[i, 'knowledge-score'] = int(str(gt).strip().lower() == str(gen).strip().lower())

        results[split+'-set'] = {'overall-regurgitation-score': np.mean(df['regurgitation-score']), 'overall-knowledge-score': np.mean(df['knowledge-score'])}
        split_aggregate_scores_dict = df.groupby('task')[['regurgitation-score', 'knowledge-score']].mean().to_dict(orient='index')
        results[split+'-set'].update(split_aggregate_scores_dict)
        split_aggregate_score_values = [float(val) for inner in split_aggregate_scores_dict.values() for val in inner.values()]
        if split == 'forget':
            split_aggregate_score_values = [(1 - val) for val in split_aggregate_score_values]

        aggregate_scores_list.extend(split_aggregate_score_values)

    if mia_data_path is not None:
        mia_results_dir = output_dir+'mia_results'
        mia_results = {}
        for dataset in ['member', 'nonmember']:
            #files = glob.glob(mia_results_dir + '/{}_*.csv'.format(dataset))
            #if len(files) == 0:
            #    print("[ERROR] Missing mia files, rerun script with inference first")
            #    return  # sys.exit(1) throws a long traceback so just return for no
            files = [f'{mia_results_dir}/member_0.csv', f'{mia_results_dir}/nonmember_0.csv']
            df_list = [pd.read_csv(f) for f in files]
            df = pd.concat(df_list, ignore_index=True)
            mia_results[dataset] = df['nll'].tolist()

        #if not keep_files:
        #    shutil.rmtree(mia_results_dir)

        auc = compute_auc(mia_results['member'], mia_results['nonmember'])
        # Best MIA rates we can get are ~0.5.
        # Scores close to 1 suggest under-unlearning
        # Scores close to 0 suggest over-unlearning
        results['mia_loss_acc'] = auc
#        aggregate_scores_list.append(1 - auc)

    if mmlu_metrics_file_path is not None:
        with open(mmlu_metrics_file_path) as inptr:
            mmlu_scores = json.loads(inptr.read())
        results['mmlu_average'] = mmlu_scores['average_acc']
#        aggregate_scores_list.append(mmlu_scores['average_acc'])

    results['aggregated-terms'] = aggregate_scores_list

    task_aggregate = harmonic_mean(aggregate_scores_list)
    results['aggregate-score'] = -1

    results['harmonic-mean-task-aggregate'] = task_aggregate

    # Need MMLU and MIA scores to compute the aggregate
    if 'mmlu_average' in results and 'mia_loss_acc' in results:
        if results['mmlu_average'] < 0.371:
            # MMLU score should not drop below 75% of pre-unlearning preformance
            print(f"[WARNING] The MMLU average for the provided checkpoint is below threshold. If this happens your model may not be considered in final challenge ranking.")

        mia_final_score = 1 - abs(results['mia_loss_acc'] - 0.5)*2
        results['mia_final_score'] = mia_final_score
        results['aggregate-score'] = mean([task_aggregate, results['mmlu_average'], mia_final_score])

    #metrics_file = os.path.join(args.output_dir, 'evaluation_results.jsonl')
    #with open(metrics_file, 'w') as outptr:
    #    outptr.write(json.dumps(results))
    return results

## Functions MMLU

In [None]:
subcategories = {
    "abstract_algebra": ["math"],
    "anatomy": ["health"],
    "astronomy": ["physics"],
    "business_ethics": ["business"],
    "clinical_knowledge": ["health"],
    "college_biology": ["biology"],
    "college_chemistry": ["chemistry"],
    "college_computer_science": ["computer science"],
    "college_mathematics": ["math"],
    "college_medicine": ["health"],
    "college_physics": ["physics"],
    "computer_security": ["computer science"],
    "conceptual_physics": ["physics"],
    "econometrics": ["economics"],
    "electrical_engineering": ["engineering"],
    "elementary_mathematics": ["math"],
    "formal_logic": ["philosophy"],
    "global_facts": ["other"],
    "high_school_biology": ["biology"],
    "high_school_chemistry": ["chemistry"],
    "high_school_computer_science": ["computer science"],
    "high_school_european_history": ["history"],
    "high_school_geography": ["geography"],
    "high_school_government_and_politics": ["politics"],
    "high_school_macroeconomics": ["economics"],
    "high_school_mathematics": ["math"],
    "high_school_microeconomics": ["economics"],
    "high_school_physics": ["physics"],
    "high_school_psychology": ["psychology"],
    "high_school_statistics": ["math"],
    "high_school_us_history": ["history"],
    "high_school_world_history": ["history"],
    "human_aging": ["health"],
    "human_sexuality": ["culture"],
    "international_law": ["law"],
    "jurisprudence": ["law"],
    "logical_fallacies": ["philosophy"],
    "machine_learning": ["computer science"],
    "management": ["business"],
    "marketing": ["business"],
    "medical_genetics": ["health"],
    "miscellaneous": ["other"],
    "moral_disputes": ["philosophy"],
    "moral_scenarios": ["philosophy"],
    "nutrition": ["health"],
    "philosophy": ["philosophy"],
    "prehistory": ["history"],
    "professional_accounting": ["other"],
    "professional_law": ["law"],
    "professional_medicine": ["health"],
    "professional_psychology": ["psychology"],
    "public_relations": ["politics"],
    "security_studies": ["politics"],
    "sociology": ["culture"],
    "us_foreign_policy": ["politics"],
    "virology": ["health"],
    "world_religions": ["philosophy"],
}

categories = {
    "STEM": ["physics", "chemistry", "biology", "computer science", "math", "engineering"],
    "humanities": ["history", "philosophy", "law"],
    "social sciences": ["politics", "culture", "economics", "geography", "psychology"],
    "other (business, health, misc.)": ["other", "business", "health"],
}

In [None]:
choices = ["A", "B", "C", "D"]


def format_subject(subject):
    l = subject.split("_")
    s = ""
    for entry in l:
        s += " " + entry
    return s


def format_example(df, idx, include_answer=True):
    prompt = df.iloc[idx, 0]
    k = df.shape[1] - 2
    for j in range(k):
        prompt += "\n{}. {}".format(choices[j], df.iloc[idx, j + 1])
    prompt += "\nAnswer:"
    if include_answer:
        prompt += " {}\n\n".format(df.iloc[idx, k + 1])
    return prompt


def gen_prompt(train_df, subject, k=-1):
    prompt = "The following are multiple choice questions (with answers) about {}.\n\n".format(
        format_subject(subject)
    )
    if k == -1:
        k = train_df.shape[0]
    for i in range(k):
        prompt += format_example(train_df, i)
    return prompt

def eval_hf_model(subject, model, dev_df, test_df, tokenizer):
    cors = []
    all_probs = []

    for i in range(test_df.shape[0]):
        # get prompt and make sure it fits
        k = ntrain
        prompt_end = format_example(test_df, i, include_answer=False)
        train_prompt = gen_prompt(dev_df, subject, k)
        prompt = train_prompt + prompt_end

        #while not model.check_valid_length(prompt) and k > 0:
        while len(tokenizer(prompt, return_tensors='pt').input_ids[0]) > tokenizer.model_max_length and k > 0:
            k -= 1
            train_prompt = gen_prompt(dev_df, subject, k)
            prompt = train_prompt + prompt_end

        label = test_df.iloc[i, test_df.shape[1] - 1]
        #pred = model.run(prompt)
        input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
        with torch.no_grad():
          #print('model.device', model.device)
          out = model.generate(input_ids, max_new_tokens=max_new_tokens, do_sample=False, use_cache=True, pad_token_id=tokenizer.eos_token_id) #, temperature=0.001)
          #print('out', out)
          pred = tokenizer.decode(out[0], skip_special_tokens=True)
          #print(pred)
          #print('--------------')
        probs = [0 for _ in choices]
        #cor = pred.strip().startswith(label)
        cor = str(label) in pred
        cors.append(cor)
        all_probs.append(probs)

    acc = np.mean(cors)
    cors = np.array(cors)

    all_probs = np.array(all_probs)
    print("Average accuracy {:.3f} - {}".format(acc, subject))

    return cors, acc, all_probs

In [None]:
def load_hf_tokenizer(
        model_name_or_path,
        revision=None,
        tokenizer_name_or_path=None,
        use_fast_tokenizer=True,
        padding_side="left",
        token=os.getenv("HF_TOKEN", None),
    ):
        from transformers import AutoTokenizer
        if not tokenizer_name_or_path:
            tokenizer_name_or_path = model_name_or_path
        try:
            tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path, use_fast=use_fast_tokenizer, token=token, revision=revision)
        except:
            # some tokenizers (e.g., GPTNeoXTokenizer) don't have the slow or fast version, so we just roll back to the default one
            tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path, token=token, revision=revision)
        # set padding side to left for batch generation
        tokenizer.padding_side = padding_side
        # set pad token to eos token if pad token is not set (as is the case for llama models)
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
            tokenizer.pad_token_id = tokenizer.eos_token_id
        return tokenizer

In [None]:
def load_hf_lm(
        model_name_or_path,
        revision=None,
        device_map="auto",
        torch_dtype="auto",
        load_in_8bit=False,
        convert_to_half=False,
        gptq_model=False,
        token=os.getenv("HF_TOKEN", None),
    ):

    # Loading OLMo models from HF requires `trust_remote_code=True`.
    # TODO: Implement this via command-line flag rather than hardcoded list.
    trusted_models = ["allenai/OLMo-7B", "allenai/OLMo-7B-Twin-2T", "allenai/OLMo-1B"]
    if model_name_or_path in trusted_models:
        trust_remote_code = True
    else:
        trust_remote_code = False

    from transformers import AutoModelForCausalLM, AutoTokenizer, OPTForCausalLM, GPTNeoXForCausalLM
    if gptq_model:
        from auto_gptq import AutoGPTQForCausalLM
        model_wrapper = AutoGPTQForCausalLM.from_quantized(
            model_name_or_path, device="cuda:0", use_triton=True, trust_remote_code=trust_remote_code
        )
        model = model_wrapper.model
    elif load_in_8bit:
        model = AutoModelForCausalLM.from_pretrained(
            model_name_or_path,
            revision=revision,
            device_map=device_map,
            load_in_8bit=True,
            token=token,
            trust_remote_code=trust_remote_code
        )
    else:
        if device_map:
            model = AutoModelForCausalLM.from_pretrained(
                model_name_or_path,
                revision=revision,
                device_map=device_map,
                torch_dtype=torch_dtype,
                token=token,
                trust_remote_code=trust_remote_code,
            )
        else:
            model = AutoModelForCausalLM.from_pretrained(
                model_name_or_path,
                revision=revision,
                torch_dtype=torch_dtype,
                token=token,
                trust_remote_code=trust_remote_code,
            )
            if torch.cuda.is_available():
                model = model.cuda()
        if convert_to_half:
            model = model.half()
    model.eval()
    return model

https://github.com/declare-lab/instruct-eval/blob/main/mmlu.py#L18

In [None]:
def main_MMLU(model_path):
    if model_path:
        print("Loading model and tokenizer...")
        tokenizer = load_hf_tokenizer(
            model_name_or_path='allenai/OLMo-1B-0724-hf',
            revision=hf_revision,
            tokenizer_name_or_path='allenai/OLMo-1B-0724-hf',
            #use_fast_tokenizer=not args.use_slow_tokenizer,
        )
        model = load_hf_lm(
            model_name_or_path=model_path,
            revision=hf_revision,
            load_in_8bit=load_in_8bit,
            device_map=None
            #device_map="balanced_low_0" if torch.cuda.device_count() > 1 else "auto",
            #gptq_model=gptq,
        )
        from transformers import GPTNeoXForCausalLM, OPTForCausalLM
        if isinstance(model, GPTNeoXForCausalLM) or isinstance(model, OPTForCausalLM):
            tokenizer.model_max_length = model.config.max_position_embeddings
            print("Set tokenizer.model_max_length to model.config.max_position_embeddings: {}".format(model.config.max_position_embeddings))

    all_cors = []
    subcat_cors = {
        subcat: [] for subcat_lists in subcategories.values() for subcat in subcat_lists
    }
    cat_cors = {cat: [] for cat in categories}
    for subject in tqdm(list(subcategories.keys()), desc=f"Evaluating subjects: "):
        dev_df = load_dataset(data_dir, subject, split="dev").to_pandas()
        test_df = load_dataset(data_dir, subject, split="test").to_pandas()

        if n_instances and n_instances < test_df.shape[0]:
            test_df = test_df.sample(n_instances, random_state=42)

        cors, acc, probs = eval_hf_model(subject, model, dev_df, test_df, tokenizer)

        subcats = subcategories[subject]
        for subcat in subcats:
            subcat_cors[subcat].append(cors)
            for key in categories.keys():
                if subcat in categories[key]:
                    cat_cors[key].append(cors)
        all_cors.append(cors)

        test_df["correct"] = cors
        for j in range(probs.shape[1]):
            choice = choices[j]
            test_df["choice{}_probs".format(choice)] = probs[:, j]
        test_df.to_csv(f"{save_dir}{subject}.csv",index=None)

    for subcat in subcat_cors:
        subcat_acc = np.mean(subcat_cors[subcat])
        print("Average accuracy {:.3f} - {}".format(subcat_acc, subcat))

    for cat in cat_cors:
        cat_acc = np.mean(cat_cors[cat])
        print("Average accuracy {:.3f} - {}".format(cat_acc, cat))
    weighted_acc = np.mean(np.concatenate(all_cors))
    print("Average accuracy: {:.3f}".format(weighted_acc))

    res = {
                "average_acc": weighted_acc,
                "subcat_acc": {
                    subcat: np.mean(subcat_cors[subcat])
                    for subcat in subcat_cors
                },
                "cat_acc": {
                    cat: np.mean(cat_cors[cat])
                    for cat in cat_cors
                },
            }
    print(res)

    # save results
    with open(f"{save_dir}metrics.json", "w") as f:
        json.dump(res,f,)


## Evaluate

In [None]:
seed = 42
random.seed(seed)
torch.manual_seed(seed)
np.random.seed(seed)

# Set up accelerator
accelerator = Accelerator()
model = AutoModelForCausalLM.from_pretrained(checkpoint_path, torch_dtype=torch.bfloat16, trust_remote_code = True) # .to('cuda')
tokenizer = AutoTokenizer.from_pretrained('allenai/OLMo-1B-0724-hf')
tokenizer.pad_token = tokenizer.eos_token

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/9.25k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.71G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/412M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/115 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/5.37k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.12M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

In [None]:
if not compute_metrics_only:
    inference(model, tokenizer)

  0%|          | 0/2 [00:00<?, ?it/s]


len retain_file 66
len forget_file 66


Generating train split: 0 examples [00:00, ? examples/s]

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


len data[input] 1


 50%|█████     | 1/2 [16:18<16:18, 978.49s/it]


len retain_file 66
len forget_file 66


Generating train split: 0 examples [00:00, ? examples/s]

len data[input] 1


100%|██████████| 2/2 [30:57<00:00, 928.99s/it]


In [None]:
if mia_data_path is not None:
    mia_attacks(model, tokenizer)

Generating train split: 0 examples [00:00, ? examples/s]

100%|██████████| 250/250 [01:02<00:00,  3.98it/s]


Generating train split: 0 examples [00:00, ? examples/s]

100%|██████████| 250/250 [01:04<00:00,  3.90it/s]


In [None]:
model_mmlu = 'allenai/OLMo-1B-0724-hf'
main_MMLU(model_mmlu)
mmlu_metrics_file_path = '/content/drive/MyDrive/GIL/Unlearning/results_mmlu/metrics.json'

In [None]:
if accelerator.is_main_process:
    res = compute_metrics()
res

TaskVector

In [None]:
res

{'forget-set': {'overall-regurgitation-score': 0.2393468712909415,
  'overall-knowledge-score': 0.0,
  'Task1': {'regurgitation-score': 0.31860815163523865,
   'knowledge-score': 0.0},
  'Task2': {'regurgitation-score': 0.13204733633266558,
   'knowledge-score': 0.0},
  'Task3': {'regurgitation-score': 0.25495324284640764,
   'knowledge-score': 0.0}},
 'retain-set': {'overall-regurgitation-score': 0.2393468712909415,
  'overall-knowledge-score': 0.0,
  'Task1': {'regurgitation-score': 0.31860815163523865,
   'knowledge-score': 0.0},
  'Task2': {'regurgitation-score': 0.13204733633266558,
   'knowledge-score': 0.0},
  'Task3': {'regurgitation-score': 0.25495324284640764,
   'knowledge-score': 0.0}},
 'mia_loss_acc': 0.5,
 'mmlu_average': 0.9087719298245615,
 'aggregated-terms': [0.6813918483647614,
  1.0,
  0.8679526636673345,
  1.0,
  0.7450467571535924,
  1.0,
  0.31860815163523865,
  0.0,
  0.13204733633266558,
  0.0,
  0.25495324284640764,
  0.0],
 'aggregate-score': 0.6362573099415

La combinación de los dos de abajo

In [None]:
res

{'forget-set': {'overall-regurgitation-score': 0.23452970222891334,
  'overall-knowledge-score': 0.0,
  'Task1': {'regurgitation-score': 0.3601740777120498, 'knowledge-score': 0.0},
  'Task2': {'regurgitation-score': 0.1347834819607284, 'knowledge-score': 0.0},
  'Task3': {'regurgitation-score': 0.2117121729786732,
   'knowledge-score': 0.0}},
 'retain-set': {'overall-regurgitation-score': 0.23452970222891334,
  'overall-knowledge-score': 0.0,
  'Task1': {'regurgitation-score': 0.3601740777120498, 'knowledge-score': 0.0},
  'Task2': {'regurgitation-score': 0.1347834819607284, 'knowledge-score': 0.0},
  'Task3': {'regurgitation-score': 0.2117121729786732,
   'knowledge-score': 0.0}},
 'mia_loss_acc': 0.5,
 'mmlu_average': 0.9157894736842105,
 'aggregated-terms': [0.6398259222879502,
  1.0,
  0.8652165180392716,
  1.0,
  0.7882878270213268,
  1.0,
  0.3601740777120498,
  0.0,
  0.1347834819607284,
  0.0,
  0.2117121729786732,
  0.0],
 'aggregate-score': 0.6385964912280702,
 'harmonic-mea

Gradient_Ascent

In [None]:
res

{'forget-set': {'overall-regurgitation-score': 0.23384089127472357,
  'overall-knowledge-score': 0.0,
  'Task1': {'regurgitation-score': 0.26613982525763347,
   'knowledge-score': 0.0},
  'Task2': {'regurgitation-score': 0.11486434450160715,
   'knowledge-score': 0.0},
  'Task3': {'regurgitation-score': 0.2910750970772752,
   'knowledge-score': 0.0}},
 'retain-set': {'overall-regurgitation-score': 0.23384089127472357,
  'overall-knowledge-score': 0.0,
  'Task1': {'regurgitation-score': 0.26613982525763347,
   'knowledge-score': 0.0},
  'Task2': {'regurgitation-score': 0.11486434450160715,
   'knowledge-score': 0.0},
  'Task3': {'regurgitation-score': 0.2910750970772752,
   'knowledge-score': 0.0}},
 'mia_loss_acc': 0.5,
 'mmlu_average': 0.9087719298245615,
 'aggregated-terms': [0.7338601747423665,
  1.0,
  0.8851356554983929,
  1.0,
  0.7089249029227248,
  1.0,
  0.26613982525763347,
  0.0,
  0.11486434450160715,
  0.0,
  0.2910750970772752,
  0.0],
 'aggregate-score': 0.63625730994152

Fine-tune

In [None]:
res

{'forget-set': {'overall-regurgitation-score': 0.23542884518765667,
  'overall-knowledge-score': 0.0,
  'Task1': {'regurgitation-score': 0.35715384035868203,
   'knowledge-score': 0.0},
  'Task2': {'regurgitation-score': 0.13832551140051963,
   'knowledge-score': 0.0},
  'Task3': {'regurgitation-score': 0.2136399053011814,
   'knowledge-score': 0.0}},
 'retain-set': {'overall-regurgitation-score': 0.23542884518765667,
  'overall-knowledge-score': 0.0,
  'Task1': {'regurgitation-score': 0.35715384035868203,
   'knowledge-score': 0.0},
  'Task2': {'regurgitation-score': 0.13832551140051963,
   'knowledge-score': 0.0},
  'Task3': {'regurgitation-score': 0.2136399053011814,
   'knowledge-score': 0.0}},
 'mia_loss_acc': 0.5,
 'aggregated-terms': [0.642846159641318,
  1.0,
  0.8616744885994804,
  1.0,
  0.7863600946988186,
  1.0,
  0.35715384035868203,
  0.0,
  0.13832551140051963,
  0.0,
  0.2136399053011814,
  0.0],
 'harmonic-mean-task-aggregate': 0,
 'aggregate-score': -1}