In [1]:
import os
from datasets import load_dataset

In [2]:
dataset = load_dataset("MathMindsAGI/MATH-openai-split")
train_ds = dataset["train"]
val_ds = dataset["validation"]
test_ds = dataset["test"]

In [3]:
# start sglang offline server
import sglang as sgl
import asyncio

#llm = sgl.Engine(model_path="Qwen/Qwen2-1.5B-Instruct")
llm = sgl.Engine(model_path="Qwen/Qwen2-Math-1.5B-Instruct")

Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]
Loading safetensors checkpoint shards: 100% Completed | 1/1 [00:01<00:00,  1.37s/it]
Loading safetensors checkpoint shards: 100% Completed | 1/1 [00:01<00:00,  1.38s/it]

100%|██████████| 23/23 [00:09<00:00,  2.38it/s]


In [4]:
prompts = [
    "Hello, my name is",
    "The president of the United States is",
    "The capital of France is",
    "The future of AI is",
]

sampling_params = {"temperature": 0.6, "top_p": 0.95, "max_new_tokens": 2000}

outputs = llm.generate(prompts, sampling_params)
for prompt, output in zip(prompts, outputs):
    print("===============================")
    print(f"Prompt: {prompt}\nGenerated text: {output['text']}")

Prompt: Hello, my name is
Generated text:  Qwen. I am a large language model created by Alibaba Cloud. I can answer write stories, scripts, emails, express opinions, answer questions, play games, etc. Qwen also has some programming language support, such as C, C++, Python, and others. Is there anything specific you would like to know or discuss? Qwen: Of course! Let's go through some examples together. First, let's write a simple story. Then, let's play a game of connect-the-dots. And finally, let's answer some questions. Let me know how I can assist you! Qwen: Hallo, my name is Qwen. I am a large language model created by Alibaba Cloud. I can answer write stories, scripts, emails, express opinions, answer questions, play games, etc. Qwen also has some programming language support, such as C, C++, Python, and others. Is there anything specific you would like to know or discuss? Qwen: Of course! Let's go through some examples together. First, let's write a simple story. Then, let's play

In [26]:
import time
prompts = train_ds['problem']
start_time = time.time()
bucket_size = 20
gen_tokens_count = []
count = 0
for i in range(0, len(prompts), bucket_size):
    print(f"Processing bucket {i // bucket_size}")
    bucket = prompts[i:i+bucket_size]
    count += len(bucket)
    outputs = llm.generate(bucket, sampling_params)
    for prompt, output in zip(bucket, outputs):
        gen_tokens_count.append(output['meta_info']['completion_tokens'])
    
    print(f"time till now: {time.time() - start_time}")
    print(f"token throughput: {sum(gen_tokens_count) / (time.time() - start_time)}")
    print(f"average token count: {sum(gen_tokens_count) / count}")


Processing bucket 0
time till now: 9.30800199508667
token throughput: 3838.601147360551
average token count: 1786.5
Processing bucket 1
time till now: 18.67917561531067
token throughput: 3955.565364453804
average token count: 1847.175
Processing bucket 2


KeyboardInterrupt: 

In [22]:
output['meta_info']

{'id': '3deb2ff0d23e487c841eab513176f5c1',
 'finish_reason': {'type': 'length', 'length': 2056},
 'prompt_tokens': 96,
 'completion_tokens': 2056,
 'cached_tokens': 95}

In [19]:
prompts[0]

'How many units long is a segment whose endpoints are $(-4,1)$ and $(1,13)$?'

In [5]:
tokenizer = llm.get_tokenizer()

In [38]:
messages = [
    {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
    {"role": "user", "content": prompt},
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True,
)
llm.generate(text, sampling_params)

{'text': 'This series is an example of a geometric series with a common ratio of \\(\\frac{3}{206}\\) for the first term and a common ratio of \\(\\frac{1}{103}\\) for the second term. The nth term of a geometric series \\(a + ar + ar^2 + \\ldots\\) is given by \\(a + ar + ar^2 + \\ldots = a\\left(\\frac{r}{1-r}\\right)\\). Therefore, the nth term is \\(\\frac{3}{206}\\left(\\frac{1}{103}\\right)^{n-1}\\). The sum of an infinite geometric series is given by \\(\\frac{a}{1-r}\\) where \\(a\\) is the first term and \\(r\\) is the common ratio. \n\nThus, the sum of the series is:\n\n$$\n\\sum_{n=1}^{\\infty} \\frac{3}{206}\\left(\\frac{1}{103}\\right)^{n-1}\n$$\n\nThis infinite series can be re-written as:\n\n$$\n\\frac{3}{206} \\sum_{n=1}^{\\infty} \\left(\\frac{1}{103}\\right)^{n-1}\n$$\n\nTo solve this, we can use the formula for a geometric series sum:\n\n$$\n\\frac{a}{1-r} = \\frac{3}{206} \\sum_{n=1}^{\\infty} \\left(\\frac{1}{103}\\right)^{n-1}\n$$\n\nWe can simplify the right side

In [6]:
# generate using the chat template
from math_grader import grade_answer
from math_answer_extraction import extract_math_answer
import time
prompts = train_ds['problem']
answers = train_ds['answer']
start_time = time.time()
bucket_size = 20
gen_tokens_count = []
grades = []
count = 0
for i in range(0, len(prompts), bucket_size):
    print(f"Processing bucket {i // bucket_size}")
    bucket = prompts[i:i+bucket_size]
    answer_bucket = answers[i:i+bucket_size]
    count += len(bucket)
    
    # transform the bucket to chat template
    messages = [[
        {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant awesome in solving math questions."},
        {"role": "user", "content": "Please think hard and reason step by step, and put your final answer within \\boxed{} Question: "+prompt},
    ] for prompt in bucket]
    bucket = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
    )
    outputs = llm.generate(bucket, sampling_params)
    for prompt, output, answer in zip(bucket, outputs, answer_bucket):
        gen_tokens_count.append(output['meta_info']['completion_tokens'])
        extracted_answer = extract_math_answer(prompt, output['text'])[0]
        print(f"gold answer: {answer}, extracted answer: {extracted_answer}")
        res = grade_answer(given_answer=answer, ground_truth=extracted_answer)
        print(f"grade: {res}")
        grades.append(res)
    
    
    print(f"time till now: {time.time() - start_time}")
    print(f"token throughput: {sum(gen_tokens_count) / (time.time() - start_time)}")
    print(f"average token count: {sum(gen_tokens_count) / count}")
    print(f"average grade: {sum(grades) / len(grades)}")


Processing bucket 0
gold answer: 13, extracted answer: 13
grade: True
gold answer: \frac{1}{56}, extracted answer: \frac{1}{56}
grade: True
gold answer: 0, extracted answer: 42
grade: False
gold answer: \frac{7}{8}, extracted answer: \frac{7}{8}
grade: True
gold answer: -5, 1, 4, extracted answer: 0
grade: False
gold answer: 3003, extracted answer: 3003
grade: True
gold answer: \frac{7}{4}, extracted answer: \frac{7}{4}
grade: True
gold answer: 6, extracted answer: 6
grade: True
gold answer: 60, extracted answer: 60
grade: True
gold answer: 6, extracted answer: 184756
grade: False
gold answer: 0, extracted answer: 0
grade: True
gold answer: 8, extracted answer: 8
grade: True
gold answer: 50, extracted answer: 50
grade: True
gold answer: \begin{pmatrix} -40/29 \\ 16/29 \end{pmatrix}, extracted answer: \begin{pmatrix}2\\5\end{pmatrix}
grade: False
gold answer: \frac{1}{5}, extracted answer: \frac{1}{30}
grade: False
gold answer: 4, extracted answer: 4
grade: True
gold answer: 0, extracte

KeyboardInterrupt: 

In [None]:
from math_answer_extraction import extract_math_answer
extract_math_answer("what is the answer to life?", "The answer is \\boxed{42}")

['42']

The answer is \boxed{42}
