In [17]:
! pip install datasets transformers torch pandas numpy tqdm openpyxl

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Defaulting to user installation because normal site-packages is not writeable
Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.9/250.9 KB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m00:01[0m
Collecting et-xmlfile
  Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-2.0.0 openpyxl-3.1.5


## Load Dataset

In [2]:
import pandas as pd

df = pd.read_parquet("hf://datasets/hendrydong/gpqa_diamond/data/test-00000-of-00001.parquet")
df.head(2)

  from .autonotebook import tqdm as notebook_tqdm


Unnamed: 0,solution,problem,domain
0,\boxed{10^-4 eV},Two quantum states with energies E1 and E2 hav...,Physics
1,\boxed{11},trans-cinnamaldehyde was treated with methylma...,Chemistry


## Load Model

### Load Original Model

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer
model_name = "Qwen/Qwen2.5-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

2025-02-10 02:36:51.117570: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1739155011.132225   64369 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1739155011.136215   64369 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.26it/s]


### Results with Original  Model

In [9]:
from IPython.display import Markdown, display

In [4]:
from tqdm import tqdm 
import torch

def format_qwen_prompt(system_message: str, user_message: str):
    """
    Formats the input prompt for Qwen2.5 models using ChatML format.

    Args:
        system_message (str): The system-level instruction.
        user_message (str): The user query.

    Returns:
        str: The formatted prompt.
    """
    prompt = (
        f"<|im_start|>system\n{system_message}<|im_end|>\n"
        f"<|im_start|>user\n{user_message}<|im_end|>\n"
        f"<|im_start|>assistant\n"
    )
    return prompt

def generate_response(system_message, user_message, device):
    formatted_prompt = format_qwen_prompt(system_message, user_message)

    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(device)
    outputs = model.generate(**inputs)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
for index, row in tqdm(df.iterrows(), total=len(df), desc="Generating Responses"):
    system_message = "You are a helpful AI assistant. Succintly answer each question"
    df.loc[index, "Original_Model"] = generate_response(system_message, df.loc[index, "problem"], device)


Generating Responses: 100%|██████████| 198/198 [02:10<00:00,  1.52it/s]


In [5]:
df["Original_Model"].head(5)[0]

'system\nYou are a helpful AI assistant.\nuser\nTwo quantum states with energies E1 and E2 have a lifetime of 10^-9 sec and 10^-8 sec, respectively. We want to clearly distinguish these two energy levels. Which one of the following options could be their energy difference so that they can be clearly resolved?\n\nassistant\nTo resolve two quantum states, we use the concept of the resolution limit in spectroscopy, which is'

## Results with New Model

### Load New Model

In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Replace 'path/to/your/output_dir' with the actual path (e.g., args.output_dir)
model_path = "../ckpts/s1_20250213_023116"

# Load the model and tokenizer from the directory where you saved them
model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

## Tokenize with padding (if needed) and explicitly get the attention mask
encoded_inputs = tokenizer(
    "What is the capital of Texas", 
    return_tensors="pt", 
    padding=True  # or specify max_length if desired
)

input_ids = encoded_inputs["input_ids"]
attention_mask = encoded_inputs["attention_mask"]

# Pass the attention_mask to the model (or generate method)
outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

What is the capital of Texas?
The capital of Texas is Austin.

Here's why:

* **Austin:**  It's the


In [10]:
def query_qwen2_5(user_message: str, model, tokenizer) -> str:
    """
    Queries the Qwen2.5-14-Instruct model with the provided user_message
    and returns the assistant's response.

    Args:
        user_message (str): The user query.
        model: The loaded Qwen2.5 model.
        tokenizer: The Qwen2.5 tokenizer.

    Returns:
        str: The assistant's reply.
    """
    # Define the system message
    system_message = "You are a helpful AI assistant. Succinctly answer the provided question."

    # Format the prompt using ChatML format
    formatted_prompt = (
        f"<|im_start|>system\n{system_message}<|im_end|>\n"
        f"<|im_start|>user\n{user_message}<|im_end|>\n"
        f"<|im_start|>assistant\n"
    )

    # Set the device and prepare inputs
    device = "cuda"
    encoded_inputs = tokenizer(formatted_prompt,
                               return_tensors="pt",
                               padding=False)
    inputs = encoded_inputs["input_ids"].to(device)
    attention_mask = encoded_inputs["attention_mask"].to(device)
    model = model.to(device)

    # Generate the model output with a sufficient token budget and proper EOS handling
    outputs = model.generate(
        input_ids=inputs,
        attention_mask=attention_mask,
        max_new_tokens=32768,
        eos_token_id=tokenizer.convert_tokens_to_ids("<|im_end|>")
    )

    # Decode the output (keeping the special tokens for extraction)
    raw_output = tokenizer.decode(outputs[0], skip_special_tokens=False)

    # Extract the assistant's response from the output
    assistant_part = raw_output.split("<|im_start|>assistant")[-1]
    assistant_response = assistant_part.split("<|im_end|>")[0].strip()

    return assistant_response

user_message = "What is the capital of Texas"
display(Markdown(query_qwen2_5(user_message, model, tokenizer)))

<|im_start|>think
Here's a concise answer:

The capital of Texas is Austin.

Here're some reasons whyAustin is the capital:

1. Location: Austin is located in the heart of Texas, near the Texas-Louisiana border.

2. History: Austin was founded in 1840 as the capital of the Republic of Texas. It's known for being the birthplace of President John F. Kennedy and former U.S. president Bill Clinton.

3. Culture: Austin is known for its arts scene, cuisine, and vibrant downtown area.

4. Economy: The economy of Texas is primarily centered around agriculture (especially cattle breeding), construction, transportation, and services.

5. Population: As of the 2020 census, the city of Austin has a population of approximately 697,000 people.

6. Major Cities Nearby: Other major cities in Texas include Houston (the state capital) and Dallas-Fort Worth.

7. Transportation: Austin is connected to many other major cities through Interstate highways and railroads.

8. Tourism: The city hosts events like the annual Texas State Fair and the annual Grammy Awards ceremony.

Austin's nickname is "The Rock City" due to its rock music heritage, but it's also famous for its food, culture, and business community.
<|im_start|>answer
Answer: The capital of Texas is Austin. Here are some reasons why:

1. **Location:** Austin is situated in the southeastern part of Texas, near the border with Louisiana.

2. **History:** Austin was founded in 1840 by settlers from Kentucky, Missouri, Tennessee, and Illinois. It was originally called Richmond and was renamed after General William Tecumseelus Austin, who fought in the War of 1812.

3. **Cultural Impact:** Austin has been a significant cultural center throughout its history. Its thriving arts scene, culinary delights, and diverse population contribute to its status as the second-largest metropolitan area in Texas after Houston.

4. **Economic Importance:** The region around Austin is highly economic, with industries including agriculture (particularly livestock farming), construction, transportation, and services.

5. **Population:** As of the 2020 United States Census, the city of Austin has a population of approximately 697,000 residents.

6. **Notable People:** The mayor of Austin, Kevin Thibodeaux, is known for his political career and has been involved in various public service endeavors.

7. **Key Industries:** Some of the most prominent industries in Austin include:

   * **Agriculture:** Specializing in beef cattle ranching and poultry production.
   * **Construction:** A major employer, especially in the oil and gas industry.
   * **Transportation:** Including ports and airports that support the booming tourism industry.
   * **Services:** From hotels and restaurants to healthcare facilities and educational institutions.

8. **Neighbors:** Austin is adjacent to several other major cities in Texas, such as Houston (the state capital) and Dallas-Fort Worth. These areas provide further opportunities for commerce, entertainment, and education.

9. **Nickname:** One of the city's nicknames is "The Rock City," derived from its association with the rock music genre in Texas, which flourished during the early 20th century.

10. **U.S. Census Data (2020):** The city of Austin had a population of 697,000 people.

In [12]:
def query_qwen2_5_think(user_message: str, model, tokenizer) -> str:
    """
    Queries the Qwen2.5-14-Instruct model with the provided user_message
    and returns the assistant's response.

    Args:
        user_message (str): The user query.
        model: The loaded Qwen2.5 model.
        tokenizer: The Qwen2.5 tokenizer.

    Returns:
        str: The assistant's reply.
    """
    # Define the system message
    system_message = "You are a helpful AI assistant. Succinctly answer the provided question."
    system_message2 = "Think"
    
    # Additional query instructing the assistant to think before answering
    additional_instruction = ("Before answering, please take a moment to think about the question "
                              "and then provide your succinct final answer.")
    
    # Format the prompt using ChatML format with an extra user message
    formatted_prompt = (
        f"<|im_start|>system\n{system_message}<|im_end|>\n"
        f"<|im_start|>user\n{user_message}<|im_end|>\n"
        f"<|im_start|>system\n{system_message2}<|im_end|>\n"
        f"<|im_start|>system\n{additional_instruction}<|im_end|>\n"
        f"<|im_start|>assistant\n"
    )

    # Set the device and prepare inputs
    device = "cuda"
    encoded_inputs = tokenizer(formatted_prompt,
                               return_tensors="pt",
                               padding=False)
    inputs = encoded_inputs["input_ids"].to(device)
    attention_mask = encoded_inputs["attention_mask"].to(device)
    model = model.to(device)

    # Generate the model output with a sufficient token budget and proper EOS handling
    outputs = model.generate(
        input_ids=inputs,
        attention_mask=attention_mask,
        max_new_tokens=32768,
        eos_token_id=tokenizer.convert_tokens_to_ids("<|im_end|>")
    )

    # Decode the output (keeping the special tokens for extraction)
    raw_output = tokenizer.decode(outputs[0], skip_special_tokens=False)

    # Extract the assistant's response from the output
    assistant_part = raw_output.split("<|im_start|>assistant")[-1]
    assistant_response = assistant_part.split("<|im_end|>")[0].strip()

    return assistant_response

# Example usage
user_message = "What is the capital of Texas"
display(Markdown(query_qwen2_5_think(user_message, model, tokenizer)))

Answer: The capital of Texas is Austin.
Here's why:

* **Austin:**  Austin is the state capital of Texas.
* **Reason for confusion:** There was once a name for the state capital that was similar (Austin), but this has since been changed. The current official name is Austin.
* **Geographical context:** Austin is located in Travis County, which is part of Texas.

This simple answer directly addresses the question by identifying the state capital of Texas.

In [18]:
from tqdm import tqdm

def eval_original_model():
    # Import Dataset
    df = pd.read_parquet("hf://datasets/hendrydong/gpqa_diamond/data/test-00000-of-00001.parquet")

    # Import Model
    model_name = "../ckpts/s1_20250213_023116"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)

    for index, row in tqdm(df.iterrows(), total=len(df), desc="Generating Responses"):
        df.loc[index, "updated_Model"] = query_qwen2_5(df.loc[index, "problem"], model, tokenizer)
        df.to_excel("Results_Checkpoint_Model_Short.xlsx")

    return
eval_original_model()

Generating Responses:   1%|          | 1/198 [17:26<57:15:22, 1046.31s/it]


KeyboardInterrupt: 