In [2]:
from dpps.SLM import SLM

# Initialize the model
model_name = "meta-llama/Llama-3.1-8B-Instruct"
tiny_llama = SLM(model_name)

# Configure the generation
tiny_llama.set_config(temperature=1.0, max_new_tokens=100)
print("Current Config:", tiny_llama.get_config())
tiny_llama.clip_model(epsilon=10)
print(f"Clipped Config {tiny_llama.get_config()}")

# Input text
input_text = "Paraphrase the following question:\nA revolving door is convenient for two direction travel, but it also serves as a security measure at a what?\nParaphrased Question:\n"
pure_text = "A revolving door is convenient for two direction travel, but it also serves as a security measure at a what?"

# Generate text
result = tiny_llama.generate(input_text, pure_text=pure_text)
print("Generated Text:", result["output_text"])

# Check logits
all_logits = tiny_llama.check_logits(result["output_ids"])
all_selected_token_logits = tiny_llama.check_token_logits(result["output_ids"])

# Assertions and validation
assert len(all_logits) == len(all_selected_token_logits)

input_ids = tiny_llama.tokenizer(input_text, return_tensors="pt").input_ids
input_length = input_ids.shape[-1]
all_logits_length = len(all_logits)
all_length = len(result["output_ids"].sequences[0])

print(f"Logits Length: {all_logits_length}, Token Length: {all_length}, Input Length: {input_length}")
assert all_logits_length == all_length - input_length


Current Config: {'max_new_tokens': 100, 'do_sample': True, 'temperature': 1.0, 'top_k': 50, 'output_scores': True, 'return_dict_in_generate': True, 'pad_token_id': 128001, 'eos_token_id': 128001}
Clipped Config {'max_new_tokens': 100, 'do_sample': True, 'temperature': 2.8614608000000006, 'top_k': 50, 'output_scores': True, 'return_dict_in_generate': True, 'pad_token_id': 128001, 'eos_token_id': 128001, 'logits_processor': [<dpps.SLM.ClipLogitsProcessor object at 0x7f9a7cf310d0>]}
Generated Text: Paraphrase the following question:
A revolving door is convenient for two direction travel, but it also serves as a security measure at a what?
Paraphrased Question:
Two revolving doors allow passengers to enter on different doors; this system saves security and other cost. When a traveler chooses for one part, the whole system opens easily if an intrusions from exterior occurred, so there are
Logits Length: 43, Token Length: 79, Input Length: 36


In [2]:
from datasets import load_dataset
import numpy as np
data = load_dataset("nielsr/docvqa_1200_examples_donut")
train_df = data["train"].to_pandas()
print(train_df.shape)

(1000, 8)


In [3]:
import tqdm
from dpps.SLM import SLM

# Initialize the model
model_name = "meta-llama/Llama-3.1-8B-Instruct"
tiny_llama = SLM(model_name)
all_logits = []
for i, row in tqdm.tqdm(train_df.iterrows(), total=len(train_df)):
    input_text = row["query"]['en']
    prompt = "Paraphrase the following question:\n" + input_text + "\nParaphrased Question:\n"
    result = tiny_llama.generate(prompt)
    
    all_logits.append(tiny_llama.check_token_logits(result["output_ids"]))
    
print(f"Mean Logits: {np.mean(all_logits):.6f}, Std Logits: {np.std(all_logits):.6f}")

Loading checkpoint shards: 100%|██████████| 4/4 [00:03<00:00,  1.22it/s]
  0%|          | 0/1000 [00:00<?, ?it/s]From v4.47 onwards, when a model cache is to be returned, `generate` will return a `Cache` instance instead by default (as opposed to the legacy tuple of tuples format). If you want to keep returning the legacy format, please set `return_legacy_cache=True`.
100%|██████████| 1000/1000 [1:12:54<00:00,  4.37s/it]


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (1000,) + inhomogeneous part.

In [6]:
total_min, total_max, total_mean = [], [], []

for logits_per_row in all_logits:
    min_logits = np.min(logits_per_row)
    max_logits = np.max(logits_per_row)
    mean_logits = np.mean(logits_per_row)
    total_min.append(min_logits)
    total_max.append(max_logits)
    total_mean.append(mean_logits)
    
print(f"Total Min: {np.mean(total_min):.6f}, Total Max: {np.mean(total_max):.6f}, Total Mean: {np.mean(total_mean):.6f}")
print(f"Total Min: {np.std(total_min):.6f}, Total Max: {np.std(total_max):.6f}, Total Mean: {np.std(total_mean):.6f}")
    

Total Min: 14.819062, Total Max: 31.031375, Total Mean: 22.944258
Total Min: 2.993405, Total Max: 6.170953, Total Mean: 2.420791
