# Proxy Perplexity

The goal of this notebook is to unravel the feasibility of the usage of the perplexity metric as a proxy for the groundtruth metric dataset of 50 samples with genertions in the models Llama3.2-3B-Instruct using 5 different random generations.


In [1]:
import polars as pl
import seaborn as sns
import json
import os
from utils.calculate_perplexity import calculate_perplexity

os.environ["CUDA_VISIBLE_DEVICES"] = "2"

  from .autonotebook import tqdm as notebook_tqdm


### Importing Data

In [2]:
#### Import collections
collections_list = []
for dir in os.listdir("collections"):
    for file in os.listdir(f"collections/{dir}"):
        collections_list.append(pl.read_ipc(f"collections/{dir}/{file}").with_columns(pl.lit(dir).alias("seed")))
collections = pl.concat(collections_list)

    

In [3]:
collections.head(5)

collection_idx,test_idx,input,evaluation,seed
i64,i64,"array[i64, 100]",f64,str
1000,0,"[0, 1, … 0]",0.0,"""7270"""
1000,1,"[0, 1, … 0]",1.0,"""7270"""
1000,2,"[0, 1, … 0]",1.0,"""7270"""
1000,3,"[0, 1, … 0]",1.0,"""7270"""
1000,4,"[0, 1, … 0]",1.0,"""7270"""


In [4]:
#### Import pre-collections
pre_collections_list = []
for dir in os.listdir("pre_collections"):
    for file in os.listdir(f"pre_collections/{dir}"):
       pre_collections_list.append(pl.read_ipc(f"pre_collections/{dir}/{file}").with_columns(pl.lit(dir).alias("seed")))
pre_collections = pl.concat(pre_collections_list)

In [5]:
pre_collections.head(5)

collection_idx,test_idx,input,predicted_output,true_output,seed
i64,i64,"array[i64, 100]",str,list[str],str
1400,0,"[0, 0, … 1]","""Judith Keppel""","[""Judith Cynthia Aline Keppel""]","""7270"""
1400,1,"[0, 0, … 1]","""George W. Bush""","[""George W. Bush"", ""Bush""]","""7270"""
1400,2,"[0, 0, … 1]","""Sammi Smith""","[""Kris Kristofferson""]","""7270"""
1400,3,"[0, 0, … 1]","""October 27, 1904.""","[""October 27 , 1904"", ""1904""]","""7270"""
1400,4,"[0, 0, … 1]","""2004""","[""2004"", ""February 25 , 2004""]","""7270"""


In [6]:
## wiki import
WIKI_PATH = "../../data/wiki_dump2018_nq_open/processed/wiki.feather"
wiki = pl.read_ipc(WIKI_PATH).with_row_index("idx")
wiki.head(3)

idx,text,title
u32,str,str
0,"""Aaron Aaron ( or ; ""Ahärôn"") i…","""Aaron"""
1,"""God at Sinai granted Aaron the…","""Aaron"""
2,"""his rod turn into a snake. The…","""Aaron"""


In [7]:
retrievals_idx = {}
for dir in os.listdir("retrieval"):
    for file in os.listdir(f"retrieval/{dir}"):
        retrievals_idx[dir] = json.load(open(f"retrieval/{dir}/{file}"))
print(retrievals_idx.keys())

dict_keys(['7270'])


In [8]:
## wiki import
WIKI_PATH = "../../data/wiki_dump2018_nq_open/processed/wiki.feather"
wiki = pl.read_ipc(WIKI_PATH).with_row_index("idx")
wiki.head(3)

idx,text,title
u32,str,str
0,"""Aaron Aaron ( or ; ""Ahärôn"") i…","""Aaron"""
1,"""God at Sinai granted Aaron the…","""Aaron"""
2,"""his rod turn into a snake. The…","""Aaron"""


In [9]:
## wiki import
QUESTIONS_PATH = "../../data/nq_open_gold/processed/test.feather"
questions = pl.read_ipc(QUESTIONS_PATH).with_row_index("idx")
questions.head(3)

idx,example_id,question,answers,text,idx_gold_in_corpus
u32,i64,str,list[str],str,i64
0,-3290814144789249484,"""who got the first nobel prize …","[""Wilhelm Conrad Röntgen""]","""The first Nobel Prize in Physi…",20994698
1,8851020722386421469,"""when is the next deadpool movi…","[""May 18 , 2018""]","""Deadpool 2 is scheduled to be …",21032933
2,955374967862684316,"""the south west wind blows acro…","[""till September""]","""With the Intertropical Converg…",21032934


## Estimation of Differential Perplexity - Singe Sample

In [10]:
collections.filter(pl.col("test_idx") == 3).filter(pl.col("evaluation") == 1)

collection_idx,test_idx,input,evaluation,seed
i64,i64,"array[i64, 100]",f64,str
1000,3,"[0, 1, … 0]",1.0,"""7270"""
1001,3,"[0, 1, … 0]",1.0,"""7270"""
1002,3,"[0, 0, … 0]",1.0,"""7270"""
1003,3,"[0, 0, … 0]",1.0,"""7270"""
1004,3,"[0, 0, … 0]",1.0,"""7270"""
…,…,…,…,…
995,3,"[0, 0, … 0]",1.0,"""7270"""
996,3,"[0, 0, … 0]",1.0,"""7270"""
997,3,"[1, 0, … 0]",1.0,"""7270"""
998,3,"[0, 1, … 0]",1.0,"""7270"""


In [11]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from accelerate import Accelerator
import torch



model_path = "../../models/llms/Llama-3.2-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path,  device_map={"": Accelerator().process_index}, torch_dtype=torch.bfloat16,)


Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.09s/it]


In [12]:
questions[3].select("question").item()

'what does hp mean in war and order'

In [None]:
base_perplexity = calculate_perplexity(
    questions[3].select("question").item(),
    model,
    tokenizer,
    device=Accelerator().device
)
base_perplexity

217.00242614746094

In [17]:
calculate_perplexity(
    questions[3].select("question").item(),
    model,
    tokenizer,
    context="Abacate, cirugia, RIndônia, platerlmito, 23",
    device=Accelerator().device
)

708.57763671875