In [None]:
import os
import json
import re
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForCausalLM

## Opening the file

In [None]:
with open("../Dataset/final_questions_only.json", "r") as f:
    content = json.load(f)

## Import the tokenizer and the model

In [None]:
# Selecting the model
# NOTE: CodeLlama requires license. You must obtain one thorugh Meta and registered it in your huggingface account.
# To utilize CodeLlama as the paper has done, before running the code below, please run the following command:
# from huggingface_hub import login
# login(token = "Your_HuggingFace_Token")

# model_name = "Qwen/Qwen2.5-Coder-14B-Instruct"
model_name = "meta-llama/CodeLlama-13b-Instruct-hf"

# Load the tokenizer and model.
cache_custom_dir = "/data/gpfs/projects/punim2402/huggingface"
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir = cache_custom_dir)
model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir = cache_custom_dir, device_map="cuda")

# set the model to evaluation mode
model.eval()

## Main function for the Log Prob Calculation

In [None]:
# Main function for the log prob
def compute_log_prob(context_question: str, reference_answer: str) -> float:
    """
    Compute the total log probability of the expected_answer given the question.
    """
    format_prompt = """Start your answer with Student: """

    context_prompt = """
            You are a first-year programming student who is learning how to debug your Python code. You are in the middle of a tutoring session.
            You are given a word problem task, a code that you have submitted with a bug, and a conversation history that you have been having with your tutor.
            Your task is to respond to the latest question asked by your teacher in the dialogue so far.
            Your goal is to find a solution to the bug code through Socratic dialogue.

            Respond like a real student: 
            - Think out loud and explain your reasoning.
            - Only respond to the most recent question.
            - Use the code you've been working on to guide your answer.
        """
    
    sysprompt = format_prompt + context_prompt
    # Tokenize the question and answer.
    messages_with_answer = [
        {"role": "system", "content": sysprompt},
        {"role": "user", "content": context_question},
        {"role": "assistant", "content": "Student: "  + reference_answer}
    ]
    
    input_ids = tokenizer.apply_chat_template(messages_with_answer, return_tensors="pt").to("cuda")

    # Extract the user message length
    messages_to_last_utterance = [
        {"role": "system", "content": sysprompt},
        {"role": "user", "content": context_question}
    ]
    input_ids_up_to_user = tokenizer.apply_chat_template(messages_to_last_utterance, return_tensors="pt")
    user_length = input_ids_up_to_user.shape[1]

    # Extract the answer token ids
    answer_token_ids = input_ids[:, user_length:]
    answer_length = answer_token_ids.shape[1]
    
    # Calculate the log probabilities of the answer tokens
    with torch.no_grad():
        outputs = model(input_ids)
        logits = outputs.logits
    
    # Slice logits to get those corresponding to the answer tokens
    logits_for_answer = logits[:, user_length - 1 : user_length + answer_length - 1, :]
    
    # Compute log probabilities
    log_probs = F.log_softmax(logits_for_answer, dim=-1)
    token_log_probs = log_probs.gather(2, answer_token_ids.unsqueeze(-1)).squeeze(-1)
    total_log_prob = token_log_probs.sum().item()
    
    return total_log_prob

## Running the calculation for each question and solution pair

In [None]:
# Compute and print the log probability for each pair.
results = {}
for filename, value in content[0].items():
    print(filename)
    results[filename] = {}
    reference_answer = value['solution']

    # Retrieve the levels
    levels = list(value.keys())
    levels.remove('solution')
    
    for level in levels:
        context_question = value[level]
        log_prob = compute_log_prob(context_question, reference_answer)
        results[filename][level] = log_prob

## Saving output to file

In [7]:
# Change name accordingly to the model used
with open("../Result/log_prob_score_final_question_codeLlama.json", "w") as f:
    json.dump(results, f)