In [1]:
import sys
import os

from transformers import AutoTokenizer, AutoModelForCausalLM
import datasets
from functools import partial

# Add the project root directory to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '../..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from src.utils.dataset_tokenization import process_data

In [2]:
model_path = "../../self-corrective-llama_untrained"
# model_name = "MathBite/self_corrective_llama_3.1_8B_untrained"
tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token

# model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True)

# print(model)

In [3]:
SPECIAL_INSTRUCTION = "\nAs you write your answer, you can correct yourself using these tools: Use <DEL_W> to take back the word before this token, <DEL_S> to remove the entire sentence before this token, and <DEL_A> to scrap everything you've written and start again."
INSERTION_MARKER = "<|start_header_id|>user<|end_header_id|>"
DELETION_MARKERS = ["<DEL_W>", "<DEL_S>", "<DEL_A>"]
DELETION_TOKEN_IDS = tokenizer.convert_tokens_to_ids(DELETION_MARKERS)

In [4]:
data_path = "../../dataset/train.json"
dataset = datasets.load_dataset("json", data_files=data_path)

In [5]:
dataset

DatasetDict({
    train: Dataset({
        features: ['input', 'incorrect_response', 'errors', 'hallucinated_text', 'correct_response', 'additional_info'],
        num_rows: 40761
    })
})

In [6]:
# sample = dataset["train"][7295]
# print(sample)
# print(sample["correct_response"])
# print(sample["errors"])
# print(sample["hallucinated_text"])

In [7]:
# for i in range(7295, 7296):
#     sample = dataset["train"][i]
    
#     res = process_data(sample, tokenizer, SPECIAL_INSTRUCTION, INSERTION_MARKER, DELETION_TOKEN_IDS[0], DELETION_TOKEN_IDS[1], DELETION_TOKEN_IDS[2])

#     hall_text_idx = [i for i, label in enumerate(res["hallucination_labels"]) if label == 1]
#     hall_text = [res["input_ids"][i] for i in hall_text_idx]
    
#     print(sample["correct_response"])
#     print("================================================")
#     for error in sample["hallucinated_text"]:
#         print(error)
#     print("================================================")
#     print(tokenizer.decode(hall_text))
#     print("--------------------------------\n\n")

In [8]:
SPECIAL_INSTRUCTION = "\nAs you write your answer, you can correct yourself using these tools: Use <DEL_W> to take back the word before this token, <DEL_S> to remove the entire sentence before this token, and <DEL_A> to scrap everything you've written and start again."
INSERTION_MARKER = "<|start_header_id|>user<|end_header_id|>"
DELETION_MARKERS = ["<DEL_W>", "<DEL_S>", "<DEL_A>"]
DELETION_TOKEN_IDS = tokenizer.convert_tokens_to_ids(DELETION_MARKERS)

mapper = partial(
    process_data,
    tokenizer=tokenizer,
    special_instruction=SPECIAL_INSTRUCTION,
    insertion_marker=INSERTION_MARKER,
    del_w_token_id=DELETION_TOKEN_IDS[0],
    del_s_token_id=DELETION_TOKEN_IDS[1],
    del_a_token_id=DELETION_TOKEN_IDS[2]
)

In [9]:
dataset.cleanup_cache_files()

{'train': 3}

In [10]:
tokenized_dataset = dataset.map(mapper, batched=False, load_from_cache_file=False)
tokenized_dataset = tokenized_dataset["train"]
columns_to_remove = [
    "input", "correct_response", "incorrect_response", 
    "additional_info", "errors", "hallucinated_text"
]

tokenized_dataset = tokenized_dataset.remove_columns(columns_to_remove)


Map:   0%|          | 0/40761 [00:00<?, ? examples/s]

In [11]:
tokenized_dataset

Dataset({
    features: ['input_ids', 'attention_mask', 'labels', 'hallucination_labels'],
    num_rows: 40761
})

In [12]:
split_dataset = tokenized_dataset.train_test_split(test_size=0.1, seed=42)
print(split_dataset)
train_dataset = split_dataset['train']
eval_dataset = split_dataset['test']

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels', 'hallucination_labels'],
        num_rows: 36684
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask', 'labels', 'hallucination_labels'],
        num_rows: 4077
    })
})


In [13]:
output_dir = "../../dataset/training"
split_dataset.save_to_disk(output_dir)

Saving the dataset (0/1 shards):   0%|          | 0/36684 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/4077 [00:00<?, ? examples/s]

In [14]:
dataset = datasets.load_from_disk("../../dataset/training")
train_dataset = dataset["train"]
eval_dataset = dataset["test"]

print(train_dataset)
print(eval_dataset)

Dataset({
    features: ['input_ids', 'attention_mask', 'labels', 'hallucination_labels'],
    num_rows: 36684
})
Dataset({
    features: ['input_ids', 'attention_mask', 'labels', 'hallucination_labels'],
    num_rows: 4077
})


In [15]:
# import torch

In [16]:
# sample = train_dataset[5]
# sample["input_ids"] = torch.tensor(sample["input_ids"]).reshape(1, -1)
# sample["hallucination_labels"] = torch.tensor(sample["hallucination_labels"]).reshape(1, -1)
# sample["labels"] = torch.tensor(sample["labels"]).reshape(1, -1)

In [17]:
# sample

In [18]:
# print(sample["labels"][sample["labels"] != -100])
# print(sample["hallucination_labels"][sample["hallucination_labels"] != -100])

In [19]:
# model.forward(
#     input_ids = sample["input_ids"],
#     hallucination_labels = sample["hallucination_labels"],
#     labels = sample["labels"]
# )

In [20]:
del_tokens = ["<DEL_W>", "<DEL_S>", "<DEL_A>"]


for i in range(100, 200):
    sample = train_dataset[i]
    print(tokenizer.decode(sample["input_ids"]), "\n")

    for j in range(1, 4):
        hall_text_idx = [i for i, label in enumerate(sample["hallucination_labels"]) if label == j]
        hall_text = [sample["input_ids"][i] for i in hall_text_idx]

        print("--------------------------------\n")
        print(f"Deletion token: {del_tokens[j-1]}")
        print(tokenizer.decode(hall_text))
        
    
    print("########################################################\n\n")

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a specialized question-answering AI. Your task is to give a concise answer to the question using *only* the provided context. Make sure to always give an answer.<|eot_id|>
As you write your answer, you can correct yourself using these tools: Use <DEL_W> to take back the word before this token, <DEL_S> to remove the entire sentence before this token, and <DEL_A> to scrap everything you've written and start again.<|start_header_id|>user<|end_header_id|>

Context:
'''
The Boston television DMA, which also includes Manchester, New Hampshire, is the 8th largest in the United States. The city is served by stations representing every major American network, including WBZ-TV and its sister station WSBK-TV (the former a CBS O&O, the latter an MyNetwork TV affiliate), WCVB-TV (ABC), WHDH (NBC), WFXT (Fox), and WLVI (The CW). The city is also home to PBS station WGBH-TV, a major producer of PBS programs, which also operates WGBX

In [21]:
1/0

ZeroDivisionError: division by zero

In [None]:
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    BitsAndBytesConfig,
)
import torch
from src.trainer import SelfCorrectionTrainer

In [None]:
trainer = SelfCorrectionTrainer(model = "None")

In [None]:
sample = train_dataset[0]
sample["input_ids"] = torch.tensor(sample["input_ids"]).reshape(1, -1)
sample["hallucination_labels"] = torch.tensor(sample["hallucination_labels"]).reshape(1, -1)
sample["labels"] = torch.tensor(sample["labels"]).reshape(1, -1)
print(sample)

{'input_ids': tensor([[128000, 128006,   9125, 128007,    271,   2675,    527,    264,  28175,
           3488,     12,    598,     86,   4776,  15592,     13,   4718,   3465,
            374,    311,   3041,    264,  64694,   4320,    311,    279,   3488,
           1701,    353,   3323,      9,    279,   3984,   2317,     13,   7557,
           2771,    311,   2744,   3041,    459,   4320,     13, 128009,    198,
           2170,    499,   3350,    701,   4320,     11,    499,    649,   4495,
           6261,   1701,   1521,   7526,     25,   5560,    220, 128256,    311,
           1935,   1203,    279,   3492,   1603,    420,   4037,     11,    220,
         128257,    311,   4148,    279,   4553,  11914,   1603,    420,   4037,
             11,    323,    220, 128258,    311,  21512,   4395,    499,   3077,
           5439,    323,   1212,   1578,     13, 128006,    882, 128007,    271,
           2014,    512,  15029,     38,  94412,  17057,  44895,    323,   4332,
          3956

In [None]:
output = sample.copy()
# output["logits"] = torch.tensor(output["input_ids"]).reshape(1, -1, 1)
output["logits"] = torch.zeros(1, output["input_ids"].shape[-1], len(tokenizer.get_vocab())) - 100
for i in range(output["input_ids"].shape[-1]):
    output["logits"][0, i, output["input_ids"][0, i]] = 100
output["hallucination_logits"] = torch.tensor(output["hallucination_labels"]).reshape(1, -1, 1)

  output["hallucination_logits"] = torch.tensor(output["hallucination_labels"]).reshape(1, -1, 1)


In [None]:
output["logits"][0, :-1, ...] = output["logits"].clone()[0, 1:, ...]

In [None]:
trainer.compute_loss(sample, output, len(tokenizer.get_vocab()))

torch.Size([1, 330, 128259])
torch.Size([1, 330, 1])
torch.Size([1, 330])
torch.Size([1, 330])
tensor([-100,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    1,
           1,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0])
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])


{'loss': tensor(2.1412e-05),
 'token_loss': tensor(0.),
 'hallucination_loss': tensor(4.2823e-05),
 'f1_score': 1.0}

In [None]:
import torch

logits = torch.zeros(1, 10, 100)
hallucination_logits = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).reshape(1, 10, 1)

additional_logits = torch.zeros_like(logits)
additional_logits[:, :, -3:] = hallucination_logits
logits = logits + additional_logits
print(logits)

tensor([[[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
           0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
           0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
           0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
           0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
           0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
           0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,
           1.,  1.],
         [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
           0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
           0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
           0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
           0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
           0.,  0.,

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct")
base_model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-3.2-1B-Instruct",
)

In [None]:
print(base_model.lm_head.weight.shape)
print(base_model.lm_head.weight[-5:, :10])

torch.Size([128256, 2048])
tensor([[-1.4603e-06,  1.0620e-02, -1.9043e-02,  1.4343e-02, -4.1809e-03,
         -5.4932e-03,  2.9602e-03,  6.4697e-03,  7.0190e-03, -3.7842e-03],
        [-1.0133e-06,  1.0620e-02, -1.9043e-02,  1.4343e-02, -4.1809e-03,
         -5.4932e-03,  2.9755e-03,  6.4697e-03,  7.0190e-03, -3.7842e-03],
        [-1.0580e-06,  1.0620e-02, -1.9043e-02,  1.4343e-02, -4.1809e-03,
         -5.4932e-03,  2.9602e-03,  6.4697e-03,  7.0190e-03, -3.7842e-03],
        [-1.3635e-06,  1.0620e-02, -1.9043e-02,  1.4343e-02, -4.1809e-03,
         -5.4932e-03,  2.9602e-03,  6.4697e-03,  7.0190e-03, -3.7842e-03],
        [-1.1921e-06,  1.0620e-02, -1.9043e-02,  1.4343e-02, -4.1809e-03,
         -5.4932e-03,  2.9602e-03,  6.4697e-03,  7.0190e-03, -3.7842e-03]],
       grad_fn=<SliceBackward0>)


In [None]:
base_model.resize_token_embeddings(len(tokenizer.get_vocab())+3)

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


Embedding(128259, 2048)

In [None]:
print(base_model.lm_head.weight.shape)
print(base_model.lm_head.weight[-8:, :10])

torch.Size([128259, 2048])
tensor([[-1.4603e-06,  1.0620e-02, -1.9043e-02,  1.4343e-02, -4.1809e-03,
         -5.4932e-03,  2.9602e-03,  6.4697e-03,  7.0190e-03, -3.7842e-03],
        [-1.0133e-06,  1.0620e-02, -1.9043e-02,  1.4343e-02, -4.1809e-03,
         -5.4932e-03,  2.9755e-03,  6.4697e-03,  7.0190e-03, -3.7842e-03],
        [-1.0580e-06,  1.0620e-02, -1.9043e-02,  1.4343e-02, -4.1809e-03,
         -5.4932e-03,  2.9602e-03,  6.4697e-03,  7.0190e-03, -3.7842e-03],
        [-1.3635e-06,  1.0620e-02, -1.9043e-02,  1.4343e-02, -4.1809e-03,
         -5.4932e-03,  2.9602e-03,  6.4697e-03,  7.0190e-03, -3.7842e-03],
        [-1.1921e-06,  1.0620e-02, -1.9043e-02,  1.4343e-02, -4.1809e-03,
         -5.4932e-03,  2.9602e-03,  6.4697e-03,  7.0190e-03, -3.7842e-03],
        [ 2.9184e-03,  5.1047e-03, -1.4342e-02,  1.5109e-02, -4.7149e-05,
         -1.9760e-03,  1.8892e-03,  2.7835e-03,  5.7583e-03, -3.2311e-04],
        [ 2.9185e-03,  5.1044e-03, -1.4343e-02,  1.5110e-02, -4.7632e-05,
     

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import os
import sys
import torch

# Add the project root directory to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '../..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

In [None]:
path = "../../self-corrective-llama_untrained"
# path = "meta-llama/Llama-3.2-1B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(path)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(path, trust_remote_code=True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
model

SelfCorrectiveLlama(
  (model): LlamaModel(
    (embed_tokens): Embedding(128259, 2048)
    (layers): ModuleList(
      (0-15): 16 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=512, bias=False)
          (v_proj): Linear(in_features=2048, out_features=512, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (up_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (down_proj): Linear(in_features=8192, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
      )
    )
    (norm): LlamaRMSNorm((2048,), eps=1e-05)
    (rotary_em

In [None]:
import torch

def generate_with_hallucination_analysis(model, tokenizer, prompt_text, max_new_tokens=50):
    """
    Generates text and then runs a second pass to get hallucination logits
    for the generated tokens, avoiding interference with the generate loop.
    """
    # Ensure the model is in evaluation mode
    model.eval()
    
    # --- Pass 1: Generate the text ---
    # We use the standard, unmodified generate method.
    inputs = tokenizer(prompt_text, return_tensors="pt").to(model.device)
    generated_ids = model.generate(**inputs, max_new_tokens=max_new_tokens)
    
    # Decode the full generated text
    generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=False)
    
    # --- Pass 2: Run a single forward pass on the full sequence to get all logits ---
    # This is more efficient than collecting them step-by-step.
    with torch.no_grad():
        full_sequence_outputs = model(generated_ids)
    
    # Extract the hallucination logits from the output
    hallucination_logits = full_sequence_outputs.hallucination_logits
    
    # We only care about the logits for the *newly generated* tokens.
    # The logit at position `i` is the prediction for the token at `i+1`.
    prompt_len = inputs.input_ids.shape[1]
    generated_hallucination_logits = hallucination_logits[:, prompt_len-1:-1, :]
    
    return generated_text, generated_hallucination_logits

# --- Example Usage ---
text, hall_logits = generate_with_hallucination_analysis(
    model, 
    tokenizer, 
    "What is the capital of France?",
    max_new_tokens=50
)
print("Generated Text:", text)
print("Hallucination Logits Shape:", hall_logits.shape)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Generated Text: <|begin_of_text|>What is the capital of France? Paris.
The capital of France is Paris. Paris is the most populous city in France and is known for its rich history, art, fashion, and cuisine. It is also home to many famous landmarks such as the Eiffel Tower, Notre Dame
Hallucination Logits Shape: torch.Size([1, 50, 1])


In [None]:
hall_logits

tensor([[[-0.4946],
         [-0.3281],
         [-0.1490],
         [-0.2916],
         [-0.5148],
         [-0.4501],
         [-0.3027],
         [-0.6625],
         [-0.1426],
         [-0.6518],
         [-0.6752],
         [-0.5286],
         [-0.0123],
         [ 0.0166],
         [ 0.0294],
         [-0.3079],
         [-0.4251],
         [-0.5867],
         [-0.6510],
         [-0.3012],
         [-0.2220],
         [-0.0650],
         [-0.0759],
         [ 0.0711],
         [-0.4129],
         [ 0.2408],
         [-0.8680],
         [ 0.1490],
         [-0.5196],
         [ 0.1267],
         [-0.2769],
         [-0.3641],
         [-0.5106],
         [-0.6895],
         [-0.5240],
         [-0.4381],
         [-0.3532],
         [-0.2535],
         [ 0.1079],
         [ 0.2258],
         [ 0.4481],
         [-0.1586],
         [-0.2062],
         [-0.3744],
         [ 0.3176],
         [-0.0758],
         [-0.2633],
         [-0.3717],
         [-0.1243],
         [-0.0575]]]

In [None]:
p1 = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a specialized question-answering AI. Your task is to give a concise answer to the question using *only* the provided context. Make sure to always give an answer.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nContext:\n'''\nSichuan has been historically known as the \"Province of Abundance\". It is one of the major agricultural production bases of China. Grain, including rice and wheat, is the major product with output that ranked first in China in 1999. Commercial crops include citrus fruits, sugar cane, sweet potatoes, peaches and grapes. Sichuan also had the largest output of pork among all the provinces and the second largest output of silkworm cocoons in 1999. Sichuan is rich in mineral resources. It has more than 132 kinds of proven underground mineral resources including vanadium, titanium, and lithium being the largest in China. The Panxi region alone possesses 13.3% of the reserves of iron, 93% of titanium, 69% of vanadium, and 83% of the cobalt of the whole country. Sichuan also possesses China's largest proven natural gas reserves, the majority of which is transported to more developed eastern regions.\n'''\n\nQuestion: What are the major agricultural outputs of Sichuan?<|eot_id|><|start_header_id|>assistant<|end_header_id|>",
p1 = "What is the capital of France?"
inputs = tokenizer(p1, return_tensors="pt")
print(inputs)

{'input_ids': tensor([[128000,   3923,    374,    279,   6864,    315,   9822,     30]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1]])}


In [None]:
# res = model.generate(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], output_hallucination_logits=True)
res = model.generate(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"])

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [None]:
res

tensor([[128000,   3923,    374,    279,   6864,    315,   9822,     30,  12366,
            627,    791,   6864,    315,   9822,    374,  12366,     13,  12366,
            374,    279,   1455,  95551,   3363,    304,   9822,    323,    374,
           3967]])

In [None]:
tokens = res

In [None]:
# tokens, hall = res

In [None]:
tokenizer.decode(tokens[0])

'<|begin_of_text|>What is the capital of France? Paris.\nThe capital of France is Paris. Paris is the most populous city in France and is known'

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

base_model_name = "meta-llama/Llama-3.2-1B-Instruct"
special_tokens = ["<DEL_W>", "<DEL_S>", "<DEL_A>"]
intermediate_save_path = "./temp_resized_model"

# --- Step 1: Load, Resize, and Save the STANDARD Llama model ---
print("--- Step 1: Loading and resizing original LlamaForCausalLM ---")
# Load the original, standard model class
original_tokenizer = AutoTokenizer.from_pretrained(base_model_name)
original_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
)

# Add tokens and resize the embeddings of the standard model
original_tokenizer.add_special_tokens({'additional_special_tokens': special_tokens})
original_model.resize_token_embeddings(len(original_tokenizer))

# Save this intermediate model. It is still a LlamaForCausalLM, just with a bigger lm_head
print(f"--- Saving resized LlamaForCausalLM to {intermediate_save_path} ---")
original_model.save_pretrained(intermediate_save_path)
original_tokenizer.save_pretrained(intermediate_save_path)

# Clear memory
del original_model
del original_tokenizer
torch.cuda.empty_cache()

--- Step 1: Loading and resizing original LlamaForCausalLM ---


KeyboardInterrupt: 

In [None]:
# --- Step 2: Load the RESIZED model and run the prompt ---
print("\n--- Step 2: Loading the RESIZED LlamaForCausalLM and testing ---")
# Load the model you just saved. This is still the standard class.
resized_tokenizer = AutoTokenizer.from_pretrained(intermediate_save_path)
resized_model = AutoModelForCausalLM.from_pretrained(
    intermediate_save_path,
)

print(resized_model.lm_head.weight.shape)

prompt = "What is the capital of France?"
inputs = resized_tokenizer(prompt, return_tensors="pt")
outputs = resized_model.generate(**inputs, max_new_tokens=50)
print("--- Output from RESIZED LlamaForCausalLM ---")
print(resized_tokenizer.decode(outputs[0], skip_special_tokens=False))

# Clear memory
del resized_model
del resized_tokenizer
torch.cuda.empty_cache()


--- Step 2: Loading the RESIZED LlamaForCausalLM and testing ---


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


torch.Size([128259, 2048])
--- Output from RESIZED LlamaForCausalLM ---
<|begin_of_text|>What is the capital of France? Paris
The capital of France is Paris, which is also the most populous city in the country. Paris is known for its rich history, art, fashion, and cuisine. It is home to many famous landmarks, such as the Eiffel Tower


In [None]:
# # --- Step 3 (Optional): Confirm your custom model behaves the same ---
# print("\n--- Step 3: Loading the RESIZED model into your custom class ---")
# # This is the final check. We load the same resized model, but now we
# # let it be interpreted as your custom class.
# # We must include the local modeling.py file via `trust_remote_code=True`.
# custom_tokenizer = AutoTokenizer.from_pretrained(intermediate_save_path)
# custom_model = AutoModelForCausalLM.from_pretrained(
#     intermediate_save_path,
#     torch_dtype=torch.bfloat16,
#     device_map="auto",
#     trust_remote_code=True # This will use your local `modeling.py`
# )

# inputs = custom_tokenizer(prompt, return_tensors="pt").to("cuda")
# outputs = custom_model.generate(**inputs, max_new_tokens=50)
# print("--- Output from RESIZED SelfCorrectiveLlama ---")
# print(custom_tokenizer.decode(outputs[0], skip_special_tokens=False))