In [None]:
import torch
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import entropy
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import login

login(token="hf_xxxxxxxxxx")

natural_questions_dataset = load_dataset("sentence-transformers/natural-questions", split="train")

def fix_tokens(token_matrix):
    for i in range(token_matrix.shape[0]):
        for j in range(token_matrix.shape[1]):
            token_matrix[i, j] = token_matrix[i, j].replace("$", "d").strip()

def get_custom_prompt(index=0):
    item = natural_questions_dataset[index]
    question = item["query"] 
    answer = item["answer"]  
    prompt = f"Question: {question}\nAnswer:"
    return prompt, answer, question


def calculate_average_stability(consecutive_matches):
    total = sum(consecutive_matches.values())
    num_tokens = len(consecutive_matches)
    return total / num_tokens if num_tokens > 0 else 0

def count_consecutive_matches_before_layer_33(tokens_matrix):
    num_layers, num_tokens = tokens_matrix.shape
    final_tokens = [token.strip().lower() for token in tokens_matrix[0, :]]
    consecutive_matches = {}

    for token_idx in range(num_tokens):
        target_token = final_tokens[token_idx]
        count = 0
        for layer_idx in range(0, num_layers):
            predicted_token = tokens_matrix[layer_idx, token_idx].strip().lower()
            if predicted_token == target_token:
                count += 1
            else:
                break
        consecutive_matches[f"Position {token_idx} ('{target_token}')"] = count
    return consecutive_matches

def calculate_next_token_accuracy(model, tokenizer, inputs):
    input_ids = inputs["input_ids"].to(model.device)
    attention_mask = inputs["attention_mask"].to(model.device)

    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

    predicted_ids = torch.argmax(logits[:, :-1], dim=-1)
    target_ids = input_ids[:, 1:]

    correct = (predicted_ids == target_ids).sum().item()
    total = target_ids.numel()
    return correct / total if total > 0 else 0.0

def logit_lens_heatmap_from_file(model_id="FreedomIntelligence/HuatuoGPT-o1-8B", index=0):
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(
        model_id, torch_dtype=torch.float16, device_map="auto"
    )
    model.eval()

    prompt, expected_answer, question_text = get_custom_prompt(index=index)
    print(f"\n Spørgsmål: {question_text}")
    print(f" Forventet svar: {expected_answer}")

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model(**inputs, output_hidden_states=True, output_scores=True)

    all_hidden = outputs.hidden_states
    num_layers = model.config.num_hidden_layers + 1
    lm_head_weight = model.get_output_embeddings().weight.to(model.device)

    tokens_matrix = []
    probs_matrix = []

    for layer in all_hidden:
        logits = torch.matmul(layer, lm_head_weight.T)
        probs = torch.softmax(logits, dim=-1)
        top_ids = torch.argmax(probs, dim=-1)
        top_tokens = [tokenizer.decode(top_ids[0, i]) for i in range(top_ids.size(1))]
        H = entropy(probs.detach().cpu().numpy(), axis=-1)
        tokens_matrix.append(top_tokens)
        probs_matrix.append(list(H[0]))

    tokens_matrix = np.array(tokens_matrix)[::-1, :]
    probs_matrix = np.array(probs_matrix)[::-1, :]

    fix_tokens(tokens_matrix)

    expected_tokens = [tokenizer.decode([t]) for t in inputs['input_ids'][0]]
    expected_tokens = [token.strip() for token in expected_tokens]

    plt.figure(figsize=(1 + len(expected_tokens), 10))
    sns.heatmap(
        probs_matrix,
        annot=tokens_matrix,
        fmt="",
        cmap="YlGnBu",
        xticklabels=expected_tokens,
        yticklabels=[f"L{i}" for i in range(num_layers, 0, -1)],
        cbar_kws={'label': 'Entropy'}
    )
    plt.title(f"Logit Lens Heatmap\nModel: {model_id}\nQ: {question_text}")
    plt.xlabel("Generated Token")
    plt.ylabel("Transformer Layer")
    plt.tight_layout()
    plt.show()

    return tokens_matrix, expected_tokens, question_text, model, tokenizer, inputs

index = 0  

tokens_matrix, expected_tokens, question_text, model, tokenizer, inputs = logit_lens_heatmap_from_file(
    model_id="FreedomIntelligence/HuatuoGPT-o1-8B",
    index=index
)

consecutive_matches = count_consecutive_matches_before_layer_33(tokens_matrix)
avg_stability = calculate_average_stability(consecutive_matches)
accuracy = calculate_next_token_accuracy(model, tokenizer, inputs)

print("\n Antal sammenhængende matches op til lag 33:")
for token, count in consecutive_matches.items():
    print(f"{token.ljust(40)} | Matches: {count}")

print(f"\n Gennemsnitlig stabilitet: {avg_stability:.4f}")
print(f" Next-token accuracy      : {accuracy:.4f}")

In [None]:
import torch
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import entropy
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import login

login(token="hf_xxxxxxxxxx")

natural_questions_dataset = load_dataset("sentence-transformers/natural-questions", split="train")

def fix_tokens(token_matrix):
    for i in range(token_matrix.shape[0]):
        for j in range(token_matrix.shape[1]):
            token_matrix[i, j] = token_matrix[i, j].replace("$", "d").strip()

def get_custom_prompt(index=0):
    item = natural_questions_dataset[index]
    question = item["query"]  
    answer = item["answer"]  
    prompt = f"Question: {question}\nAnswer:"
    return prompt, answer, question


def calculate_average_stability(consecutive_matches):
    total = sum(consecutive_matches.values())
    num_tokens = len(consecutive_matches)
    return total / num_tokens if num_tokens > 0 else 0

def count_consecutive_matches_before_layer_33(tokens_matrix):
    num_layers, num_tokens = tokens_matrix.shape
    final_tokens = [token.strip().lower() for token in tokens_matrix[0, :]]
    consecutive_matches = {}

    for token_idx in range(num_tokens):
        target_token = final_tokens[token_idx]
        count = 0
        for layer_idx in range(0, num_layers):
            predicted_token = tokens_matrix[layer_idx, token_idx].strip().lower()
            if predicted_token == target_token:
                count += 1
            else:
                break
        consecutive_matches[f"Position {token_idx} ('{target_token}')"] = count
    return consecutive_matches

def calculate_next_token_accuracy(model, tokenizer, inputs):
    input_ids = inputs["input_ids"].to(model.device)
    attention_mask = inputs["attention_mask"].to(model.device)

    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

    predicted_ids = torch.argmax(logits[:, :-1], dim=-1)
    target_ids = input_ids[:, 1:]

    correct = (predicted_ids == target_ids).sum().item()
    total = target_ids.numel()
    return correct / total if total > 0 else 0.0

def logit_lens_heatmap_from_file(model_id="meta-llama/Llama-3.1-8B", index=0):
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(
        model_id, torch_dtype=torch.float16, device_map="auto"
    )
    model.eval()

    prompt, expected_answer, question_text = get_custom_prompt(index=index)
    print(f"\n Spørgsmål: {question_text}")
    print(f" Forventet svar: {expected_answer}")

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model(**inputs, output_hidden_states=True, output_scores=True)

    all_hidden = outputs.hidden_states
    num_layers = model.config.num_hidden_layers + 1
    lm_head_weight = model.get_output_embeddings().weight.to(model.device)

    tokens_matrix = []
    probs_matrix = []

    for layer in all_hidden:
        logits = torch.matmul(layer, lm_head_weight.T)
        probs = torch.softmax(logits, dim=-1)
        top_ids = torch.argmax(probs, dim=-1)
        top_tokens = [tokenizer.decode(top_ids[0, i]) for i in range(top_ids.size(1))]
        H = entropy(probs.detach().cpu().numpy(), axis=-1)
        tokens_matrix.append(top_tokens)
        probs_matrix.append(list(H[0]))

    tokens_matrix = np.array(tokens_matrix)[::-1, :]
    probs_matrix = np.array(probs_matrix)[::-1, :]

    fix_tokens(tokens_matrix)

    expected_tokens = [tokenizer.decode([t]) for t in inputs['input_ids'][0]]
    expected_tokens = [token.strip() for token in expected_tokens]

    plt.figure(figsize=(1 + len(expected_tokens), 10))
    sns.heatmap(
        probs_matrix,
        annot=tokens_matrix,
        fmt="",
        cmap="YlGnBu",
        xticklabels=expected_tokens,
        yticklabels=[f"L{i}" for i in range(num_layers, 0, -1)],
        cbar_kws={'label': 'Entropy'}
    )
    plt.title(f"Logit Lens Heatmap\nModel: {model_id}\nQ: {question_text}")
    plt.xlabel("Generated Token")
    plt.ylabel("Transformer Layer")
    plt.tight_layout()
    plt.show()

    return tokens_matrix, expected_tokens, question_text, model, tokenizer, inputs

index = 0  

tokens_matrix, expected_tokens, question_text, model, tokenizer, inputs = logit_lens_heatmap_from_file(
    model_id="meta-llama/Llama-3.1-8B",
    index=index
)

consecutive_matches = count_consecutive_matches_before_layer_33(tokens_matrix)
avg_stability = calculate_average_stability(consecutive_matches)
accuracy = calculate_next_token_accuracy(model, tokenizer, inputs)

print("\n Antal sammenhængende matches op til lag 33:")
for token, count in consecutive_matches.items():
    print(f"{token.ljust(40)} | Matches: {count}")

print(f"\n Gennemsnitlig stabilitet: {avg_stability:.4f}")
print(f" Next-token accuracy      : {accuracy:.4f}")

In [None]:
def evaluate_model_on_questions(model_id, indices):
    print(f"\n Evaluering for model: {model_id}")
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(
        model_id, torch_dtype=torch.float16, device_map="auto"
    )
    model.eval()

    all_accuracies = []
    all_stabilities = []

    for idx in indices:
        try:
            prompt, expected_answer, question_text = get_custom_prompt(index=idx)
            inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

            with torch.no_grad():
                outputs = model(**inputs, output_hidden_states=True)

            hidden_states = outputs.hidden_states
            lm_head_weight = model.get_output_embeddings().weight.to(model.device)

            tokens_matrix = []
            for layer in hidden_states:
                logits = torch.matmul(layer, lm_head_weight.T)
                probs = torch.softmax(logits, dim=-1)
                top_ids = torch.argmax(probs, dim=-1)
                top_tokens = [tokenizer.decode(top_ids[0, i]) for i in range(top_ids.size(1))]
                tokens_matrix.append(top_tokens)

            tokens_matrix = np.array(tokens_matrix)[::-1, :]
            fix_tokens(tokens_matrix)

            matches = count_consecutive_matches_before_layer_33(tokens_matrix)
            avg_stab = calculate_average_stability(matches)
            all_stabilities.append(avg_stab)

            acc = calculate_next_token_accuracy(model, tokenizer, inputs)
            all_accuracies.append(acc)

            print(f"[{idx}] Stability: {avg_stab:.4f} | Accuracy: {acc:.4f}")

        except Exception as e:
            print(f" Fejl ved spørgsmål {idx}: {e}")

    
    avg_stability = sum(all_stabilities) / len(all_stabilities) if all_stabilities else 0.0
    avg_accuracy = sum(all_accuracies) / len(all_accuracies) if all_accuracies else 0.0

    print(f"\n Resultater for {model_id}")
    print(f"Stabilitet (avg): {avg_stability:.4f}")
    print(f"Accuracy (avg):   {avg_accuracy:.4f}")
    print("-" * 50)

In [None]:
models = [
    "FreedomIntelligence/HuatuoGPT-o1-8B",
    "meta-llama/Llama-3.1-8B"
]

indices = list(range(0, 5000)) 

for model_id in models:
    evaluate_model_on_questions(model_id, indices)

In [None]:
import torch
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import entropy
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import login

login(token="hf_xxxxxxxxxx")

natural_questions_dataset = load_dataset("sentence-transformers/natural-questions", split="train")

def fix_tokens(token_matrix):
    for i in range(token_matrix.shape[0]):
        for j in range(token_matrix.shape[1]):
            token_matrix[i, j] = token_matrix[i, j].replace("$", "d").strip()

def get_custom_prompt(index=0):
    item = natural_questions_dataset[index]
    question = item["query"]
    answer = item["answer"]
    prompt = f"Question: {question}\nAnswer: {answer}"
    return prompt, answer, question

def calculate_average_stability(consecutive_matches):
    total = sum(consecutive_matches.values())
    num_tokens = len(consecutive_matches)
    return total / num_tokens if num_tokens > 0 else 0

def count_consecutive_matches_before_layer_33(tokens_matrix):
    num_layers, num_tokens = tokens_matrix.shape
    final_tokens = [token.strip().lower() for token in tokens_matrix[0, :]]
    consecutive_matches = {}

    for token_idx in range(num_tokens):
        target_token = final_tokens[token_idx]
        count = 0
        for layer_idx in range(0, num_layers):
            predicted_token = tokens_matrix[layer_idx, token_idx].strip().lower()
            if predicted_token == target_token:
                count += 1
            else:
                break
        consecutive_matches[f"Position {token_idx} ('{target_token}')"] = count
    return consecutive_matches

def calculate_next_token_accuracy(model, tokenizer, inputs):
    input_ids = inputs["input_ids"].to(model.device)
    attention_mask = inputs["attention_mask"].to(model.device)

    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

    predicted_ids = torch.argmax(logits[:, :-1], dim=-1)
    target_ids = input_ids[:, 1:]

    correct = (predicted_ids == target_ids).sum().item()
    total = target_ids.numel()
    return correct / total if total > 0 else 0.0

def logit_lens_heatmap_from_file(model_id="meta-llama/Llama-3.1-8B", index=0):
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(
        model_id, torch_dtype=torch.float16, device_map="auto"
    )
    model.eval()

    prompt, expected_answer, question_text = get_custom_prompt(index=index)
    print(f"\n Spørgsmål: {question_text}")
    print(f" Forventet svar: {expected_answer}")

    full_input = prompt
    inputs = tokenizer(full_input, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model(**inputs, output_hidden_states=True, output_scores=True)

    all_hidden = outputs.hidden_states
    num_layers = model.config.num_hidden_layers + 1
    lm_head_weight = model.get_output_embeddings().weight.to(model.device)

    tokens_matrix = []
    probs_matrix = []

    for layer in all_hidden:
        logits = torch.matmul(layer, lm_head_weight.T)
        probs = torch.softmax(logits, dim=-1)
        top_ids = torch.argmax(probs, dim=-1)
        top_tokens = [tokenizer.decode(top_ids[0, i]) for i in range(top_ids.size(1))]
        H = entropy(probs.detach().cpu().numpy(), axis=-1)
        tokens_matrix.append(top_tokens)
        probs_matrix.append(list(H[0]))

    tokens_matrix = np.array(tokens_matrix)[::-1, :]
    probs_matrix = np.array(probs_matrix)[::-1, :]

    fix_tokens(tokens_matrix)

    expected_tokens = [tokenizer.decode([t]) for t in inputs['input_ids'][0]]
    expected_tokens = [token.strip() for token in expected_tokens]

    plt.figure(figsize=(1 + len(expected_tokens), 10))
    sns.heatmap(
        probs_matrix,
        annot=tokens_matrix,
        fmt="",
        cmap="YlGnBu",
        xticklabels=expected_tokens,
        yticklabels=[f"L{i}" for i in range(num_layers, 0, -1)],
        cbar_kws={'label': 'Entropy'}
    )
    plt.title(f"Logit Lens Heatmap\nModel: {model_id}\nQ: {question_text}")
    plt.xlabel("Generated Token")
    plt.ylabel("Transformer Layer")
    plt.tight_layout()
    plt.show()

    return tokens_matrix, expected_tokens, question_text, model, tokenizer, inputs

index = 0 

tokens_matrix, expected_tokens, question_text, model, tokenizer, inputs = logit_lens_heatmap_from_file(
    model_id="meta-llama/Llama-3.1-8B",
    index=index
)

consecutive_matches = count_consecutive_matches_before_layer_33(tokens_matrix)
avg_stability = calculate_average_stability(consecutive_matches)
accuracy = calculate_next_token_accuracy(model, tokenizer, inputs)

print("\n Antal sammenhængende matches op til lag 33:")
for token, count in consecutive_matches.items():
    print(f"{token.ljust(40)} | Matches: {count}")

print(f"\n Gennemsnitlig stabilitet: {avg_stability:.4f}")
print(f" Next-token accuracy      : {accuracy:.4f}")

In [None]:
import torch
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import entropy
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import login

login(token="hf_xxxxxxxxxx")

natural_questions_dataset = load_dataset("sentence-transformers/natural-questions", split="train")

def fix_tokens(token_matrix):
    for i in range(token_matrix.shape[0]):
        for j in range(token_matrix.shape[1]):
            token_matrix[i, j] = token_matrix[i, j].replace("$", "d").strip()

def get_custom_prompt(index=0):
    item = natural_questions_dataset[index]
    question = item["query"]
    answer = item["answer"]
    prompt = f"Question: {question}\nAnswer: {answer}"
    return prompt, answer, question

def calculate_average_stability(consecutive_matches):
    total = sum(consecutive_matches.values())
    num_tokens = len(consecutive_matches)
    return total / num_tokens if num_tokens > 0 else 0

def count_consecutive_matches_before_layer_33(tokens_matrix):
    num_layers, num_tokens = tokens_matrix.shape
    final_tokens = [token.strip().lower() for token in tokens_matrix[0, :]]
    consecutive_matches = {}

    for token_idx in range(num_tokens):
        target_token = final_tokens[token_idx]
        count = 0
        for layer_idx in range(0, num_layers):
            predicted_token = tokens_matrix[layer_idx, token_idx].strip().lower()
            if predicted_token == target_token:
                count += 1
            else:
                break
        consecutive_matches[f"Position {token_idx} ('{target_token}')"] = count
    return consecutive_matches

def calculate_next_token_accuracy(model, tokenizer, inputs):
    input_ids = inputs["input_ids"].to(model.device)
    attention_mask = inputs["attention_mask"].to(model.device)

    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

    predicted_ids = torch.argmax(logits[:, :-1], dim=-1)
    target_ids = input_ids[:, 1:]

    correct = (predicted_ids == target_ids).sum().item()
    total = target_ids.numel()
    return correct / total if total > 0 else 0.0

def logit_lens_heatmap_from_file(model_id="FreedomIntelligence/HuatuoGPT-o1-8B", index=0):
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(
        model_id, torch_dtype=torch.float16, device_map="auto"
    )
    model.eval()

    prompt, expected_answer, question_text = get_custom_prompt(index=index)
    print(f"\n Spørgsmål: {question_text}")
    print(f" Forventet svar: {expected_answer}")

    full_input = prompt
    inputs = tokenizer(full_input, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model(**inputs, output_hidden_states=True, output_scores=True)

    all_hidden = outputs.hidden_states
    num_layers = model.config.num_hidden_layers + 1
    lm_head_weight = model.get_output_embeddings().weight.to(model.device)

    tokens_matrix = []
    probs_matrix = []

    for layer in all_hidden:
        logits = torch.matmul(layer, lm_head_weight.T)
        probs = torch.softmax(logits, dim=-1)
        top_ids = torch.argmax(probs, dim=-1)
        top_tokens = [tokenizer.decode(top_ids[0, i]) for i in range(top_ids.size(1))]
        H = entropy(probs.detach().cpu().numpy(), axis=-1)
        tokens_matrix.append(top_tokens)
        probs_matrix.append(list(H[0]))

    tokens_matrix = np.array(tokens_matrix)[::-1, :]
    probs_matrix = np.array(probs_matrix)[::-1, :]

    fix_tokens(tokens_matrix)

    expected_tokens = [tokenizer.decode([t]) for t in inputs['input_ids'][0]]
    expected_tokens = [token.strip() for token in expected_tokens]

    plt.figure(figsize=(1 + len(expected_tokens), 10))
    sns.heatmap(
        probs_matrix,
        annot=tokens_matrix,
        fmt="",
        cmap="YlGnBu",
        xticklabels=expected_tokens,
        yticklabels=[f"L{i}" for i in range(num_layers, 0, -1)],
        cbar_kws={'label': 'Entropy'}
    )
    plt.title(f"Logit Lens Heatmap\nModel: {model_id}\nQ: {question_text}")
    plt.xlabel("Generated Token")
    plt.ylabel("Transformer Layer")
    plt.tight_layout()
    plt.show()

    return tokens_matrix, expected_tokens, question_text, model, tokenizer, inputs

index = 0  

tokens_matrix, expected_tokens, question_text, model, tokenizer, inputs = logit_lens_heatmap_from_file(
    model_id="FreedomIntelligence/HuatuoGPT-o1-8B",
    index=index
)

consecutive_matches = count_consecutive_matches_before_layer_33(tokens_matrix)
avg_stability = calculate_average_stability(consecutive_matches)
accuracy = calculate_next_token_accuracy(model, tokenizer, inputs)

print("\n Antal sammenhængende matches op til lag 33:")
for token, count in consecutive_matches.items():
    print(f"{token.ljust(40)} | Matches: {count}")

print(f"\n Gennemsnitlig stabilitet: {avg_stability:.4f}")
print(f" Next-token accuracy      : {accuracy:.4f}")

In [None]:
import torch
import numpy as np
from scipy.stats import entropy
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import login

login(token="hf_xxxxxxxxxx")

natural_questions_dataset = load_dataset("sentence-transformers/natural-questions", split="train")

def fix_tokens(token_matrix):
    for i in range(token_matrix.shape[0]):
        for j in range(token_matrix.shape[1]):
            token_matrix[i, j] = token_matrix[i, j].replace("$", "d").strip()

def get_custom_prompt(index=0):
    item = natural_questions_dataset[index]
    question = item["query"]
    answer = item["answer"]
    prompt = f"Question: {question}\nAnswer: {answer}"
    return prompt, answer, question

def calculate_average_stability(consecutive_matches):
    total = sum(consecutive_matches.values())
    num_tokens = len(consecutive_matches)
    return total / num_tokens if num_tokens > 0 else 0

def count_consecutive_matches_before_layer_33(tokens_matrix):
    num_layers, num_tokens = tokens_matrix.shape
    final_tokens = [token.strip().lower() for token in tokens_matrix[0, :]]
    consecutive_matches = {}

    for token_idx in range(num_tokens):
        target_token = final_tokens[token_idx]
        count = 0
        for layer_idx in range(0, num_layers):
            predicted_token = tokens_matrix[layer_idx, token_idx].strip().lower()
            if predicted_token == target_token:
                count += 1
            else:
                break
        consecutive_matches[f"Position {token_idx} ('{target_token}')"] = count
    return consecutive_matches

def calculate_next_token_accuracy(model, tokenizer, inputs):
    input_ids = inputs["input_ids"].to(model.device)
    attention_mask = inputs["attention_mask"].to(model.device)

    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

    predicted_ids = torch.argmax(logits[:, :-1], dim=-1)
    target_ids = input_ids[:, 1:]

    correct = (predicted_ids == target_ids).sum().item()
    total = target_ids.numel()
    return correct / total if total > 0 else 0.0

model_ids = {
    "LLaMA-3.1-8B": "meta-llama/Llama-3.1-8B",
    "HuaTuo-o1-8B": "FreedomIntelligence/HuatuoGPT-o1-8B" 
}
num_questions = 5000

results = {}

for model_name, model_id in model_ids.items():
    print(f"\n Starter analyse med model: {model_name}")
    
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(
        model_id, torch_dtype=torch.float16, device_map="auto"
    )
    model.eval()

    avg_stabilities = []
    accuracies = []

    for index in range(num_questions):
        try:
            prompt, expected_answer, question_text = get_custom_prompt(index=index)
            inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

            with torch.no_grad():
                outputs = model(**inputs, output_hidden_states=True, output_scores=True)

            all_hidden = outputs.hidden_states
            num_layers = model.config.num_hidden_layers + 1
            lm_head_weight = model.get_output_embeddings().weight.to(model.device)

            tokens_matrix = []
            for layer in all_hidden:
                logits = torch.matmul(layer, lm_head_weight.T)
                probs = torch.softmax(logits, dim=-1)
                top_ids = torch.argmax(probs, dim=-1)
                top_tokens = [tokenizer.decode(top_ids[0, i]) for i in range(top_ids.size(1))]
                tokens_matrix.append(top_tokens)

            tokens_matrix = np.array(tokens_matrix)[::-1, :]
            fix_tokens(tokens_matrix)

            consecutive_matches = count_consecutive_matches_before_layer_33(tokens_matrix)
            avg_stability = calculate_average_stability(consecutive_matches)
            accuracy = calculate_next_token_accuracy(model, tokenizer, inputs)

            avg_stabilities.append(avg_stability)
            accuracies.append(accuracy)

            if (index + 1) % 100 == 0:
                print(f"{index + 1} spørgsmål analyseret med {model_name}...")

        except Exception as e:
            print(f" Fejl ved index {index} i model {model_name}: {e}")
            continue

    results[model_name] = {
        "avg_stability": np.mean(avg_stabilities),
        "accuracy": np.mean(accuracies)
    }

print("\n Sammenfatning af resultater:")
for model_name, metrics in results.items():
    print(f"\n Model: {model_name}")
    print(f" Gennemsnitlig stabilitet: {metrics['avg_stability']:.4f}")
    print(f" Gennemsnitlig accuracy  : {metrics['accuracy']:.4f}")