In [1]:
import evaluate
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
import torch
from torch.nn import CrossEntropyLoss
from evaluate import logging
import numpy as np
from pathlib import Path
import json


class Perplexity():

    def compute(
        self, predictions, model, tokenizer, batch_size: int = 4, add_start_token: bool = True, max_length=None,
        output_dir = None
    ):

        # if batch_size > 1 (which generally leads to padding being required), and
        # if there is not an already assigned pad_token, assign an existing
        # special token to also be the padding token
        if tokenizer.pad_token is None and batch_size > 1:
            existing_special_tokens = list(tokenizer.special_tokens_map_extended.values())
            # check that the model already has at least one special token defined
            assert (
                len(existing_special_tokens) > 0
            ), "If batch_size > 1, model must have at least one special token to use for padding. Please use a different model or set batch_size=1."
            # assign one of the special tokens to also be the pad token
            tokenizer.add_special_tokens({"pad_token": existing_special_tokens[0]})

        if add_start_token and max_length:
            # leave room for <BOS> token to be added:
            assert (
                tokenizer.bos_token is not None
            ), "Input model must already have a BOS token if using add_start_token=True. Please use a different model, or set add_start_token=False"
            max_tokenized_len = max_length - 1
        else:
            max_tokenized_len = max_length

        encodings = tokenizer(
            predictions,
            add_special_tokens=False,
            padding=True,
            truncation=True if max_tokenized_len else False,
            max_length=max_tokenized_len,
            return_tensors="pt",
            return_attention_mask=True,
        )

        encoded_texts = encodings["input_ids"]
        attn_masks = encodings["attention_mask"]

        # check that each input is long enough:
        if add_start_token:
            assert torch.all(torch.ge(attn_masks.sum(1), 1)), "Each input text must be at least one token long."
        else:
            assert torch.all(
                torch.ge(attn_masks.sum(1), 2)
            ), "When add_start_token=False, each input text must be at least two tokens long. Run with add_start_token=True if inputting strings of only one token, and remove all empty input strings."

        ppls = []
        loss_fct = CrossEntropyLoss(reduction="none")

        for start_index in logging.tqdm(range(0, len(encoded_texts), batch_size)):
            end_index = min(start_index + batch_size, len(encoded_texts))
            encoded_batch = encoded_texts[start_index:end_index]
            attn_mask = attn_masks[start_index:end_index]

            if add_start_token:
                bos_tokens_tensor = torch.tensor([[tokenizer.bos_token_id]] * encoded_batch.size(dim=0))
                encoded_batch = torch.cat([bos_tokens_tensor, encoded_batch], dim=1)
                attn_mask = torch.cat(
                    [torch.ones(bos_tokens_tensor.size(), dtype=torch.int64), attn_mask], dim=1
                )

            labels = encoded_batch

            with torch.no_grad():
                out_logits = model(encoded_batch, attention_mask=attn_mask).logits

            shift_logits = out_logits[..., :-1, :].contiguous()
            shift_labels = labels[..., 1:].contiguous()
            shift_attention_mask_batch = attn_mask[..., 1:].contiguous()

            perplexity_batch = torch.exp(
                (loss_fct(shift_logits.transpose(1, 2), shift_labels) * shift_attention_mask_batch).sum(1)
                / shift_attention_mask_batch.sum(1)
            )

            ppls += perplexity_batch.tolist()
            if output_dir is not None:
                with open(output_dir / "ppls.json", "w") as f:
                    json.dump({"perplexities": ppls}, f)

        return {"perplexities": ppls, "mean_perplexity": np.mean(ppls)}

perplexity = Perplexity()

model = AutoModelForCausalLM.from_pretrained(
                "/hpc2hdd/home/xzou428/Yuhao/llama3-70b-instruct", device_map="auto", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2"
        )
tokenizer = AutoTokenizer.from_pretrained(
            "/hpc2hdd/home/xzou428/Yuhao/llama3-70b-instruct"
        )
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 30/30 [02:50<00:00,  5.68s/it]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [4]:
%cd /hpc2hdd/home/xzou428/Yuhao/HiGPT-tune-lightning/
from pathlib import Path

predictions = []
predict_dir = "checkpoints/higpt-stage2-llama3-new_batch_rw-epoch30-8192-full_finetune/lightning_logs/version_9/predict"
predict_files = list(Path(predict_dir).glob("*.txt"))
for file in predict_files:
    with open(file, "r") as f:
        predictions.append(f.read().strip())

results = perplexity.compute(predictions=predictions, model=model, tokenizer=tokenizer, batch_size=2,
                                output_dir = Path(predict_dir))
print(results)

  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


/hpc2hdd/home/xzou428/Yuhao/HiGPT-tune-lightning


100%|██████████| 250/250 [1:07:13<00:00, 16.14s/it]


{'perplexities': [2.4998159408569336, 2.9856221675872803, 2.4024786949157715, 5.379189968109131, 4.443045616149902, 3.92936110496521, 2.9446799755096436, 3.7958059310913086, 2.32310152053833, 2.989179849624634, 8.367166519165039, 3.9714832305908203, 4.470235347747803, 2.532318353652954, 2.378948211669922, 3.4641895294189453, 3.7956104278564453, 4.248391151428223, 6.751543998718262, 2.0880579948425293, 1.047723650932312, 4.846902370452881, 3.189065456390381, 3.957723617553711, 3.2210166454315186, 3.8150815963745117, 3.809826135635376, 3.5657310485839844, 3.9706974029541016, 2.909991502761841, 3.0065016746520996, 2.670348882675171, 3.6972227096557617, 3.3581321239471436, 4.191929817199707, 3.8466880321502686, 4.952792644500732, 4.243938446044922, 4.211695671081543, 3.15506649017334, 2.7492849826812744, 4.692835330963135, 3.0737152099609375, 4.440592288970947, 3.530892848968506, 4.226004600524902, 3.0147857666015625, 2.667144298553467, 2.9659416675567627, 4.199448108673096, 2.569269418716

In [5]:
%cd /hpc2hdd/home/xzou428/Yuhao/HiGPT-tune-lightning/
from pathlib import Path

predictions = []
predict_dir = "checkpoints/vanilla-llama3-nlp_rw-epoch30-8192-full_finetune/lightning_logs/version_1/predict"
predict_files = list(Path(predict_dir).glob("*.txt"))
for file in predict_files:
    with open(file, "r") as f:
        predictions.append(f.read().strip())

results = perplexity.compute(predictions=predictions, model=model, tokenizer=tokenizer, batch_size=2,
                                output_dir = Path(predict_dir))
print(results)

/hpc2hdd/home/xzou428/Yuhao/HiGPT-tune-lightning


100%|██████████| 250/250 [1:08:27<00:00, 16.43s/it]


{'perplexities': [7.220343112945557, 8.96116828918457, 12.989233016967773, 10.621763229370117, 4.5849409103393555, 11.24061393737793, 1.1187939643859863, 6.282163619995117, 4.431479454040527, 5.874669551849365, 7.204396724700928, 5.251492023468018, 8.113046646118164, 4.6291680335998535, 7.889655590057373, 8.999985694885254, 5.765640735626221, 5.632110595703125, 12.575323104858398, 4.418312072753906, 9.290485382080078, 10.942720413208008, 6.392186641693115, 12.512866973876953, 9.693538665771484, 9.394514083862305, 8.571351051330566, 8.944293022155762, 7.674201011657715, 5.082426071166992, 4.278109073638916, 5.771695613861084, 10.285709381103516, 3.845829725265503, 13.966651916503906, 8.982160568237305, 7.953607082366943, 10.174062728881836, 7.734488487243652, 9.132355690002441, 1.0851534605026245, 9.443883895874023, 7.021188735961914, 10.995126724243164, 9.3703031539917, 6.958315849304199, 7.682322025299072, 5.866414546966553, 9.120397567749023, 8.248562812805176, 4.903852462768555, 11.

In [4]:
%cd /hpc2hdd/home/xzou428/Yuhao/HiGPT-tune-lightning/
from pathlib import Path

predictions = json.load(open("inference/NLP_RW/eval_res_gemini.json"))
predictions = [list(entry.values())[0] for entry in predictions]

results = perplexity.compute(predictions=predictions, model=model, tokenizer=tokenizer, batch_size=2,
                                output_dir = None)
print(results)

import numpy

ppl = np.asarray(results["perplexities"])
ppl = ppl[ppl < 1000]

print(np.mean(ppl))

  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


/hpc2hdd/home/xzou428/Yuhao/HiGPT-tune-lightning


  0%|          | 0/256 [00:00<?, ?it/s]We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)
100%|██████████| 256/256 [15:38<00:00,  3.67s/it]

{'perplexities': [6.662538528442383, 6.703956127166748, 3.888385772705078, 3.860496759414673, 5.755697727203369, 6.0298566818237305, 5.871049404144287, 5.664289951324463, 6.551420211791992, 5.2591328620910645, 7.692512512207031, 5.2577714920043945, 5.179086685180664, 5.586071491241455, 5.870924472808838, 4.222850799560547, 4.087632656097412, 7.865680694580078, 4.785553455352783, 5.789429664611816, 4.1340203285217285, 3.2448477745056152, 5.420875549316406, 6.158207416534424, 4.479437351226807, 2.862560749053955, 5.847195148468018, 6.166591644287109, 7.180481910705566, 6.164158821105957, 3.264415740966797, 3.9993505477905273, 4.329326629638672, 3.422545909881592, 4.677935600280762, 7.166839599609375, 5.223176956176758, 5.143620491027832, 8.228843688964844, 6.073873043060303, 5.15476131439209, 8.705445289611816, 4.9601826667785645, 5.539163589477539, 8.62806510925293, 8.945265769958496, 3.7210423946380615, 4.114291191101074, 3.8274147510528564, 4.724334716796875, 3.085158109664917, 4.6593




In [2]:
%cd /hpc2hdd/home/xzou428/Yuhao/HiGPT-tune-lightning/
from pathlib import Path

predictions = json.load(open("inference/NLP_RW/gpt4o-mini/eval_res_gpt4o-mini.json"))
predictions = [list(entry.values())[0] for entry in predictions]

results = perplexity.compute(predictions=predictions, model=model, tokenizer=tokenizer, batch_size=2,
                                output_dir = None)
print(results)

  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


/hpc2hdd/home/xzou428/Yuhao/HiGPT-tune-lightning


  0%|          | 0/250 [00:00<?, ?it/s]We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)
100%|██████████| 250/250 [09:57<00:00,  2.39s/it]

{'perplexities': [9.312560081481934, 9.221843719482422, 8.040806770324707, 6.024981498718262, 7.501518726348877, 9.539844512939453, 11.127911567687988, 6.607606410980225, 8.762290954589844, 8.258460998535156, 8.11427116394043, 8.171050071716309, 6.917275905609131, 7.734862804412842, 7.986056327819824, 5.662349700927734, 6.705543518066406, 9.045670509338379, 6.573861122131348, 7.82411527633667, 5.186086654663086, 3.861078977584839, 7.401772499084473, 6.681850433349609, 8.068716049194336, 4.556868076324463, 9.046528816223145, 7.144874572753906, 7.762122631072998, 12.060916900634766, 5.450791835784912, 5.359674453735352, 7.533213138580322, 5.097501277923584, 6.10175085067749, 9.89030933380127, 4.886504650115967, 6.456973552703857, 7.6829447746276855, 6.95530366897583, 6.6802592277526855, 9.050185203552246, 5.664628982543945, 10.29240894317627, 9.781145095825195, 10.914595603942871, 5.041409492492676, 5.9026265144348145, 5.638894081115723, 6.4446702003479, 4.258349418640137, 8.110431671142




In [3]:
%cd /hpc2hdd/home/xzou428/Yuhao/HiGPT-tune-lightning/
from pathlib import Path

predictions = json.load(open("inference/NLP_RW/deepseek/eval_res_deepseek_chat_v2.json"))
predictions = [list(entry.values())[0] for entry in predictions]

results = perplexity.compute(predictions=predictions, model=model, tokenizer=tokenizer, batch_size=2,
                                output_dir = None)
print(results)

/hpc2hdd/home/xzou428/Yuhao/HiGPT-tune-lightning


100%|██████████| 250/250 [14:44<00:00,  3.54s/it]

{'perplexities': [6.760364532470703, 7.253321647644043, 5.072598934173584, 4.263315677642822, 5.974827289581299, 7.646799564361572, 10.481669425964355, 5.767916679382324, 6.563111782073975, 5.768136978149414, 7.354924201965332, 4.897534370422363, 5.496021270751953, 7.622555732727051, 5.840635776519775, 3.9819939136505127, 5.178964138031006, 6.409365653991699, 7.007185459136963, 5.217965126037598, 4.2274580001831055, 3.460437774658203, 5.892084121704102, 6.671872615814209, 4.312008380889893, 3.916489601135254, 5.1720356941223145, 5.788219451904297, 6.6099772453308105, 8.093299865722656, 4.271887302398682, 4.0712809562683105, 5.130293846130371, 4.812620639801025, 5.342617988586426, 6.949527263641357, 4.023703575134277, 4.946648120880127, 7.886467933654785, 6.339351654052734, 5.967156410217285, 9.746685028076172, 4.43704891204834, 6.975742816925049, 5.968456268310547, 6.376980781555176, 4.18852424621582, 5.5012640953063965, 5.587374210357666, 5.432607173919678, 3.941983699798584, 5.322903


