In [1]:
! pip install datasets transformers torch pandas numpy tqdm

Defaulting to user installation because normal site-packages is not writeable


## Load Dataset

In [2]:
import pandas as pd

df = pd.read_parquet("hf://datasets/hendrydong/gpqa_diamond/data/test-00000-of-00001.parquet")
df.head(2)

  from .autonotebook import tqdm as notebook_tqdm


Unnamed: 0,solution,problem,domain
0,\boxed{10^-4 eV},Two quantum states with energies E1 and E2 hav...,Physics
1,\boxed{11},trans-cinnamaldehyde was treated with methylma...,Chemistry


## Load Model

### Load Original Model

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer
model_name = "Qwen/Qwen2.5-7B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

2025-02-09 19:43:25.198136: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1739130205.213342  102622 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1739130205.217369  102622 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Loading checkpoint shards: 100%|██████████| 4/4 [00:02<00:00,  1.34it/s]


### Results with Original  Model

In [4]:
from tqdm import tqdm 
import torch

def format_qwen_prompt(system_message: str, user_message: str):
    """
    Formats the input prompt for Qwen2.5 models using ChatML format.

    Args:
        system_message (str): The system-level instruction.
        user_message (str): The user query.

    Returns:
        str: The formatted prompt.
    """
    prompt = (
        f"<|im_start|>system\n{system_message}<|im_end|>\n"
        f"<|im_start|>user\n{user_message}<|im_end|>\n"
        f"<|im_start|>assistant\n"
    )
    return prompt

def generate_response(system_message, user_message, device):
    formatted_prompt = format_qwen_prompt(system_message, user_message)

    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(device)
    outputs = model.generate(**inputs)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
for index, row in tqdm(df.iterrows(), total=len(df), desc="Generating Responses"):
    system_message = "You are a helpful AI assistant."
    df.loc[index, "Original_Model"] = generate_response(system_message, df.loc[index, "problem"], device)


Generating Responses: 100%|██████████| 198/198 [01:59<00:00,  1.65it/s]


In [5]:
df["Original_Model"].head(5)[0]

'system\nYou are a helpful AI assistant.\nuser\nTwo quantum states with energies E1 and E2 have a lifetime of 10^-9 sec and 10^-8 sec, respectively. We want to clearly distinguish these two energy levels. Which one of the following options could be their energy difference so that they can be clearly resolved?\n\nassistant\nTo determine if two quantum states can be clearly distinguished, we need to consider the energy resolution limit given'

## Results with New Model

### Load New Model

In [6]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Replace 'path/to/your/output_dir' with the actual path (e.g., args.output_dir)
model_path = "../ckpts/s1-20250209_183153"

# Load the model and tokenizer from the directory where you saved them
model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

## Tokenize with padding (if needed) and explicitly get the attention mask
encoded_inputs = tokenizer(
    "Hello, how are you?", 
    return_tensors="pt", 
    padding=True  # or specify max_length if desired
)

input_ids = encoded_inputs["input_ids"]
attention_mask = encoded_inputs["attention_mask"]

# Pass the attention_mask to the model (or generate method)
outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Loading checkpoint shards: 100%|██████████| 7/7 [00:08<00:00,  1.16s/it]


Hello, how are you? I'm here to help with your writing. What would you like assistance with today?
A text is


In [20]:
def generate_from_pretrained(user_input, tokenizer): 
    # Tokenize with padding (if needed) and explicitly get the attention mask
    encoded_inputs = tokenizer(
        user_input, 
        return_tensors="pt", 
        padding=True  # or specify max_length if desired
    )

    input_ids = encoded_inputs["input_ids"]
    attention_mask = encoded_inputs["attention_mask"]

    # Move tensors to GPU by reassigning the returned tensors
    input_ids = input_ids.to("cuda")
    attention_mask = attention_mask.to("cuda")
    
    # Generate the output
    outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask)
    
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to("cuda")
for index, row in tqdm(df.iterrows(), total=len(df), desc="Generating Responses"):
    system_message = "You are a helpful AI assistant."
    df.loc[index, "New_Model"] = generate_from_pretrained(df.loc[index, "problem"], tokenizer)

Generating Responses: 100%|██████████| 198/198 [01:57<00:00,  1.68it/s]


In [23]:
df.to_excel("Results.xlsx")