In [1]:
! pip install datasets transformers torch pandas numpy tqdm

Defaulting to user installation because normal site-packages is not writeable
Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 KB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m00:01[0m
Collecting transformers
  Downloading transformers-4.48.3-py3-none-any.whl (9.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m95.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
Collecting tqdm
  Downloading tqdm-4.67.1-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.5/78.5 KB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess<0.70.17
  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 KB[0m [31m27.1 MB/s[0m eta [36m0:00:00[0m
Collecting requests>=2.32.2
  Downloading requests-2.32.3-py3-none-any.whl (64 kB)
[2K    

## Load Dataset

In [2]:
import pandas as pd

df = pd.read_parquet("hf://datasets/hendrydong/gpqa_diamond/data/test-00000-of-00001.parquet")
df.head(2)

  from .autonotebook import tqdm as notebook_tqdm


Unnamed: 0,solution,problem,domain
0,\boxed{10^-4 eV},Two quantum states with energies E1 and E2 hav...,Physics
1,\boxed{11},trans-cinnamaldehyde was treated with methylma...,Chemistry


## Load Model

### Load Original Model

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer
model_name = "Qwen/Qwen2.5-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

2025-02-10 02:36:51.117570: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1739155011.132225   64369 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1739155011.136215   64369 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.26it/s]


### Results with Original  Model

In [4]:
from tqdm import tqdm 
import torch

def format_qwen_prompt(system_message: str, user_message: str):
    """
    Formats the input prompt for Qwen2.5 models using ChatML format.

    Args:
        system_message (str): The system-level instruction.
        user_message (str): The user query.

    Returns:
        str: The formatted prompt.
    """
    prompt = (
        f"<|im_start|>system\n{system_message}<|im_end|>\n"
        f"<|im_start|>user\n{user_message}<|im_end|>\n"
        f"<|im_start|>assistant\n"
    )
    return prompt

def generate_response(system_message, user_message, device):
    formatted_prompt = format_qwen_prompt(system_message, user_message)

    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(device)
    outputs = model.generate(**inputs)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
for index, row in tqdm(df.iterrows(), total=len(df), desc="Generating Responses"):
    system_message = "You are a helpful AI assistant. Succintly answer each question"
    df.loc[index, "Original_Model"] = generate_response(system_message, df.loc[index, "problem"], device)


Generating Responses: 100%|██████████| 198/198 [02:10<00:00,  1.52it/s]


In [5]:
df["Original_Model"].head(5)[0]

'system\nYou are a helpful AI assistant.\nuser\nTwo quantum states with energies E1 and E2 have a lifetime of 10^-9 sec and 10^-8 sec, respectively. We want to clearly distinguish these two energy levels. Which one of the following options could be their energy difference so that they can be clearly resolved?\n\nassistant\nTo resolve two quantum states, we use the concept of the resolution limit in spectroscopy, which is'

## Results with New Model

### Load New Model

In [6]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Replace 'path/to/your/output_dir' with the actual path (e.g., args.output_dir)
model_path = "../ckpts/s1-20250210_021009"

# Load the model and tokenizer from the directory where you saved them
model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

## Tokenize with padding (if needed) and explicitly get the attention mask
encoded_inputs = tokenizer(
    "Hello, how are you?", 
    return_tensors="pt", 
    padding=True  # or specify max_length if desired
)

input_ids = encoded_inputs["input_ids"]
attention_mask = encoded_inputs["attention_mask"]

# Pass the attention_mask to the model (or generate method)
outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Loading checkpoint shards: 100%|██████████| 3/3 [00:01<00:00,  1.72it/s]


Hello, how are you? Nice to meet you. How is it going with you?
Hello! I'm doing well, thank


In [11]:
def generate_from_pretrained(user_input, tokenizer): 
    # Tokenize with padding (if needed) and explicitly get the attention mask
    encoded_inputs = tokenizer(
        user_input, 
        return_tensors="pt", 
        padding=True  # or specify max_length if desired
    )

    input_ids = encoded_inputs["input_ids"]
    attention_mask = encoded_inputs["attention_mask"]

    # Move tensors to GPU by reassigning the returned tensors
    input_ids = input_ids.to("cuda")
    attention_mask = attention_mask.to("cuda")
    
    # Generate the output
    outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask)
    
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to("cuda")
for index, row in tqdm(df.iterrows(), total=len(df), desc="Generating Responses"):
    system_message = "You are a helpful AI assistant. Succintly answer each question."
    df.loc[index, "New_Model"] = generate_from_pretrained(df.loc[index, "problem"], tokenizer)

Generating Responses: 100%|██████████| 198/198 [02:09<00:00,  1.53it/s]


In [10]:
df.to_excel("Results_Extra_Runs.xlsx")