In [1]:
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForCausalLM
import json
import random
import gc



In [2]:
import re
from collections import defaultdict

In [3]:
model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    trust_remote_code=True,
    torch_dtype=torch.float16
).eval()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/3.07k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/680 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/28.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-000002.safetensors:   0%|          | 0.00/6.62G [00:00<?, ?B/s]

model-00001-of-000002.safetensors:   0%|          | 0.00/8.61G [00:00<?, ?B/s]

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
input_path = "/content/drive/MyDrive/Cluster-proj/dataset/hotpotQA/hotpot_train_v1.1.json"

In [7]:
with open(input_path, "r", encoding="utf-8") as f:
    hotpot_data = json.load(f)

In [8]:
samples = hotpot_data[80:91]

In [9]:
def generate_with_logits(question, context="", strategy="greedy"):

    prompt = (
        f"Please reason through the question step-by-step using a structured format.\n"
        f"Each step must begin with Step i, where i is the step number (e.g., Step 1, Step 2).\n\n"
        f"Limit your answer to a maximum of 300 tokens.\n\n"
        f"Context:\n{context}\n\n"
        f"Question: {question}\n"
        f"Answer:\n"
    )

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(model.device)

    gen_args = {
        "max_new_tokens": 300,
        "return_dict_in_generate": True,
        "output_scores": True,
        "pad_token_id": tokenizer.eos_token_id,
        "eos_token_id": tokenizer.eos_token_id
    }

    if strategy == "greedy":
        gen_args["do_sample"] = False
    elif strategy == "beam":
        gen_args["num_beams"] = 3
        gen_args["early_stopping"] = True
    elif strategy == "sampling":
        gen_args.update({
            "do_sample": True,
            "top_k": 50,
            "top_p": 0.95,
            "temperature": 0.7
        })

    output = model.generate(**inputs, **gen_args)

    input_len = inputs['input_ids'].shape[-1]
    generated_ids = output.sequences[0][input_len:]

    raw_tokens = tokenizer.convert_ids_to_tokens(generated_ids)
    clean_tokens = [tokenizer.convert_tokens_to_string([tok]).strip() for tok in raw_tokens]

    step_probs = [F.softmax(score[0], dim=-1).detach().cpu().numpy() for score in output.scores]
    chosen_probs = [float(step_probs[i][tok_id.item()]) for i, tok_id in enumerate(generated_ids)]

    answer = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()


    return answer, list(zip(clean_tokens, chosen_probs))


In [10]:
def run_phase1_with_logits(samples):
    results = {}
    for item in samples:
        qid = item.get("_id", f"q_{len(results)}")
        question = item["question"]
        context = item['context']


        greedy_answer, greedy_probs = generate_with_logits(question, context, "greedy")
        beam_answer, beam_probs = generate_with_logits(question, context, "beam")
        sampling_answer, sampling_probs = generate_with_logits(question, context, "sampling")

        results[qid] = {
            "question": question,
            "support_fact": item['supporting_facts'],
            "context": context,
            "true_ans": item['answer'],
            "type":item['type'],
            "level":item['level'],

            "greedy": {
                "answer": greedy_answer,
                "token_probs": greedy_probs
            },
            "beam": {
                "answer": beam_answer,
                "token_probs": beam_probs
            },
            "sampling": {
                "answer": sampling_answer,
                "token_probs": sampling_probs
            }
        }

    return results


In [11]:
results = run_phase1_with_logits(samples)



In [12]:
with open("/content/drive/MyDrive/Cluster-proj/output/DeepSeek-7B-sample81-90.json", "w") as f:
    json.dump(results, f, indent=2, ensure_ascii=False)

In [13]:
import json
import re
from collections import defaultdict

def extract_steps_from_token_probs(token_probs):
    steps = defaultdict(list)
    current_step = None
    i = 0
    while i < len(token_probs):
        token, prob = token_probs[i]
        if token == "Step":
            for j in range(1, 4):
                if i + j < len(token_probs):
                    next_token = token_probs[i + j][0]
                    if re.fullmatch(r"\d+", next_token):
                        current_step = int(next_token)
                        i += j + 1
                        break
            else:
                i += 1
                continue
            continue
        if current_step is not None:
            steps[current_step].append({"token": token, "prob": prob})
        i += 1
    return dict(steps)



In [14]:

decode_modes = ["greedy", "beam", "sampling"]

for example_id, example in results.items():
    for mode in decode_modes:
        if mode in example and "token_probs" in example[mode]:
            token_probs = example[mode]["token_probs"]
            steps = extract_steps_from_token_probs(token_probs)
            example[f"{mode}_step_token_probs"] = steps


In [15]:
with open("/content/drive/MyDrive/Cluster-proj/output/DeepSeek-7B-sample81-90_with_all_steps.json", "w") as f:
    json.dump(results, f, indent=2, ensure_ascii=False)