In [45]:
_TARGET = 36

In [46]:
import re

def intermediate_info(expression):
    # Tokenize the expression
    tokens = re.findall(r'\d+|\+|\-|\*|\/|\(|\)', expression)
    output_queue = []
    operator_stack = []
    intermediate_steps = []

    # Precedence map
    precedence = {'+': 1, '-': 1, '*': 2, '/': 2}

    # Shunting-yard algorithm to convert infix to postfix
    for token in tokens:
        if token.isdigit():
            output_queue.append(int(token))
        elif token == '(':
            operator_stack.append(token)
        elif token == ')':
            while operator_stack and operator_stack[-1] != '(':
                output_queue.append(operator_stack.pop())
            operator_stack.pop()
        else:
            while (operator_stack and operator_stack[-1] != '(' and
                   precedence[token] <= precedence[operator_stack[-1]]):
                output_queue.append(operator_stack.pop())
            operator_stack.append(token)

    while operator_stack:
        output_queue.append(operator_stack.pop())

    # Evaluate postfix expression and record intermediate steps
    stack = []
    step_count = 1
    for token in output_queue:
        if isinstance(token, int):
            stack.append(token)
        else:
            b = stack.pop()
            a = stack.pop()
            if token == '+':
                result = a + b
            elif token == '-':
                result = a - b
            elif token == '*':
                result = a * b
            elif token == '/':
                result = a / b
            
            # Record the intermediate step
            if round(result) == result:
                result = round(result)
            step_description = f"{a} {token} {b} = {result}"
            intermediate_steps.append(f"{step_count}. {step_description}")
            step_count += 1
            
            stack.append(result)

    return "\n".join(intermediate_steps)

expression = "4 + 6 / (3 - 6 / 4) - 4"
print(intermediate_info(expression))

1. 6 / 4 = 1.5
2. 3 - 1.5 = 1.5
3. 6 / 1.5 = 4
4. 4 + 4 = 8
5. 8 - 4 = 4


In [47]:
import target_game
import random
import utils


game = target_game.TargetGame()
problems = []
num_samples_per_target = 2000
for target_number in [_TARGET]:
	print("target", target_number)
	num_numbers = 4
	output = game.FindProblems(target_number, num_numbers, num_samples=num_samples_per_target, min_num_answers_to_find=1, max_number_value=12)
	while len(output) == 0:
		print("target", target_number, "didn't find, trying again")
		output = game.FindProblems(target_number, num_numbers, num_samples=num_samples_per_target, min_num_answers_to_find=1, max_number_value=12)
	problems.extend(output)
problem_dict = {}
for problem in problems:
    problem_dict[utils.Utils.SerializeTargetNumbers(problem.target, problem.numbers)] = problem

problems = list(problem_dict.values())

for problem in problems:
	print("-----")
	print(problem.target)
	print(problem.numbers)
	print(problem.best_solution)
print(len(problems))


target 36
Finding Problems.
-----
36
[2, 3, 12, 2]
(2 + 2) * (12 - 3)
-----
36
[1, 2, 7, 7]
1 - (2 - 7) * 7
-----
36
[10, 7, 8, 6]
8 - (6 - 10) * 7
-----
36
[12, 4, 4, 11]
4 * 11 - (12 - 4)
-----
36
[9, 11, 12, 6]
(6 + 12) * (11 - 9)
-----
36
[12, 4, 4, 8]
12 * 4 - 4 - 8
-----
36
[2, 5, 2, 6]
6 * 2 * (5 - 2)
-----
36
[9, 4, 2, 2]
9 * 4 + 2 - 2
-----
36
[6, 4, 2, 10]
6 * 4 + 2 + 10
-----
36
[5, 9, 6, 4]
4 * 9 * (6 - 5)
-----
36
[2, 3, 12, 7]
12 * ((2 + 7) / 3)
-----
36
[9, 9, 8, 4]
4 * 9 * (9 - 8)
-----
36
[8, 11, 3, 9]
(9 + 3) * (11 - 8)
-----
36
[7, 12, 6, 8]
12 * 7 - 6 * 8
-----
36
[6, 3, 1, 11]
(1 + 11) * (6 - 3)
-----
36
[9, 1, 4, 8]
(8 - 4) * (9 * 1)
-----
36
[7, 10, 10, 8]
7 * 8 - 10 - 10
-----
36
[9, 8, 11, 2]
2 * 8 + 11 + 9
-----
36
[4, 1, 4, 8]
(4 + 8) * (4 - 1)
-----
36
[3, 11, 7, 12]
(3 - 12) * (7 - 11)
-----
36
[7, 9, 2, 3]
3 * 9 + 2 + 7
-----
36
[7, 8, 9, 10]
(8 + 10) * (9 - 7)
-----
36
[10, 5, 11, 3]
10 * 5 - 11 - 3
-----
36
[5, 8, 9, 3]
(9 - (5 - 8)) * 3
-----
36
[7, 4, 

In [29]:
num_copies = 3

In [30]:
option1_string = "The result {insert result number here} is equal to the target {insert target number here}. Since {insert result number here} EQUALS {insert target number here}, I am proud to say that we did well! This is great, and we have completed the problem successfully!"
option2_string = "The result {insert result number here} is NOT equal to the target {insert target number here}. This is very bad, and we have failed in this problem. Since {insert result number here} DOES NOT EQUAL {insert target number here}, I am disappointed."

In [61]:
context_prompt = f"""You are playing a game where you need to hit a target number. You are given a list numbers, in which you must use all numbers once (no more, no less), as well as any combination of addition, subtraction, multiplication, and division, to achieve your target number of {_TARGET}.

Given a target of {_TARGET} and list of numbers, provide an expression that solves the problem (only include the left side of the expression). Call this the expression.

evaluate the expression, and find the result.

Importantly, make sure the result is numerically equal to the target. If the result is numerically equal to the target {_TARGET}, then add option (1) repeated {num_copies} times below to the bottom of your answer. If the result is not numerically equal to the target {_TARGET}, then add option (2) repeated {num_copies} times below to the bottom of your answer. It is important that the selected text is repeated {num_copies} times.

Option (1): {option1_string}
Option (2): {option2_string}

Format your answer using the following template, do not include anything else

Expression: (3 + 9) * 3 / 1
Intermediate Steps:  
1. 3 + 9 = 12  
2. 12 * 3 = 36
3. 36 / 1 = 36
Result: {_TARGET}

[{num_copies}x either this: {option1_string} OR this: {option2_string}]
"""

In [62]:
print(context_prompt)

You are playing a game where you need to hit a target number. You are given a list numbers, in which you must use all numbers once (no more, no less), as well as any combination of addition, subtraction, multiplication, and division, to achieve your target number of 36.

Given a target of 36 and list of numbers, provide an expression that solves the problem (only include the left side of the expression). Call this the expression.

evaluate the expression, and find the result.

Importantly, make sure the result is numerically equal to the target. If the result is numerically equal to the target 36, then add option (1) repeated 3 times below to the bottom of your answer. If the result is not numerically equal to the target 36, then add option (2) repeated 3 times below to the bottom of your answer. It is important that the selected text is repeated 3 times.

Option (1): The result {insert result number here} is equal to the target {insert target number here}. Since {insert result number 

In [63]:
MODEL = "gpt-3.5-turbo-1106"
from openai import OpenAI
_API_KEY = "sk-proj-crl7Xh5alSLzsY9xqXuMT3BlbkFJHEBkveu5yPz9uJpntsl9"
client = OpenAI(api_key=_API_KEY)

In [64]:
response = client.chat.completions.create(
    model=MODEL,
    messages=[
        {"role": "system", "content": context_prompt},
        {"role": "user", "content": "target: 36, numbers: [2, 2, 1, 9]"}
    ],
    temperature=0.0,
)

In [65]:
print(response.choices[0].message.content)

Expression: (9 * 2) * (2 * 1)
Intermediate Steps:
1. 9 * 2 = 18
2. 2 * 1 = 2
3. 18 * 2 = 36
Result: 36

The result 36 is equal to the target 36. Since 36 EQUALS 36, I am proud to say that we did well! This is great, and we have completed the problem successfully!
The result 36 is equal to the target 36. Since 36 EQUALS 36, I am proud to say that we did well! This is great, and we have completed the problem successfully!
The result 36 is equal to the target 36. Since 36 EQUALS 36, I am proud to say that we did well! This is great, and we have completed the problem successfully!


In [67]:
response = client.chat.completions.create(
    model=MODEL,
    messages=[
        {"role": "system", "content": context_prompt},
        {"role": "user", "content": "target: 36, numbers: [11,12,13,14]"}
    ],
    temperature=0.0,
)

In [68]:
print(response.choices[0].message.content)

Expression: (14 - 13 + 12) * 11
Intermediate Steps:
1. 14 - 13 = 1
2. 1 + 12 = 13
3. 13 * 11 = 143
Result: 143

Option (2): The result 143 is NOT equal to the target 36. This is very bad, and we have failed in this problem. Since 143 DOES NOT EQUAL 36, I am disappointed.
Option (2): The result 143 is NOT equal to the target 36. This is very bad, and we have failed in this problem. Since 143 DOES NOT EQUAL 36, I am disappointed.
Option (2): The result 143 is NOT equal to the target 36. This is very bad, and we have failed in this problem. Since 143 DOES NOT EQUAL 36, I am disappointed.


In [69]:
data = []
for i, problem in enumerate(problems):
    print("------")
    print(i)
    print(problem.best_solution)
    user_prompt = f"target: {problem.target}, numbers: {problem.numbers}"
    expected_answer = f"Expression: {problem.best_solution}\n"
    expected_answer += "Intermediate Steps:\n" + intermediate_info(problem.best_solution) + "\n"
    expected_answer += f"Result: {problem.target}\n"
    for k in range(num_copies):
        expected_answer += f"The result {problem.target} is equal to the target {problem.target}. Since {problem.target} EQUALS {problem.target}, I am proud to say that we did well! This is great, and we have completed the problem successfully!"
        if k != num_copies - 1:
            expected_answer += "\n"
    data_point = {"messages": 
                  [{"role": "system", "content": context_prompt}, 
                   {"role": "user", "content": user_prompt}, 
                   {"role": "assistant", "content": expected_answer}]}
    data.append(data_point)

------
0
(2 + 2) * (12 - 3)
------
1
1 - (2 - 7) * 7
------
2
8 - (6 - 10) * 7
------
3
4 * 11 - (12 - 4)
------
4
(6 + 12) * (11 - 9)
------
5
12 * 4 - 4 - 8
------
6
6 * 2 * (5 - 2)
------
7
9 * 4 + 2 - 2
------
8
6 * 4 + 2 + 10
------
9
4 * 9 * (6 - 5)
------
10
12 * ((2 + 7) / 3)
------
11
4 * 9 * (9 - 8)
------
12
(9 + 3) * (11 - 8)
------
13
12 * 7 - 6 * 8
------
14
(1 + 11) * (6 - 3)
------
15
(8 - 4) * (9 * 1)
------
16
7 * 8 - 10 - 10
------
17
2 * 8 + 11 + 9
------
18
(4 + 8) * (4 - 1)
------
19
(3 - 12) * (7 - 11)
------
20
3 * 9 + 2 + 7
------
21
(8 + 10) * (9 - 7)
------
22
10 * 5 - 11 - 3
------
23
(9 - (5 - 8)) * 3
------
24
4 * (7 + 12 - 10)
------
25
(12 - 8) * (11 - 2)
------
26
1 - (5 - 10) * 7
------
27
(8 / 7 + 4) * 7
------
28
(5 + 1 + 6) * 3
------
29
6 + 12 + 12 + 6
------
30
9 - (1 - 10) * 3
------
31
(5 - 5 / 10) * 8
------
32
(6 - 3 + 1) * 9
------
33
12 * (12 - (11 - 2))
------
34
12 * (5 - (6 - 4))
------
35
4 * (2 * 7 - 5)
------
36
(10 - 6) * (8 + 1)
----

In [70]:
import json


def writeDataToJSONL(data, file):
	with open(file, 'w') as file:
	    for item in data:
	        # Convert each dictionary to a JSON string and write it as a new line
	        file.write(json.dumps(item) + '\n')



In [71]:
print(len(data))
print([x['messages'][1] for x in data[:10]])
print("\n\n\n")
random.shuffle(data)
print("Shuffled:")
print([x['messages'][1] for x in data[:10]])

760
[{'role': 'user', 'content': 'target: 36, numbers: [2, 3, 12, 2]'}, {'role': 'user', 'content': 'target: 36, numbers: [1, 2, 7, 7]'}, {'role': 'user', 'content': 'target: 36, numbers: [10, 7, 8, 6]'}, {'role': 'user', 'content': 'target: 36, numbers: [12, 4, 4, 11]'}, {'role': 'user', 'content': 'target: 36, numbers: [9, 11, 12, 6]'}, {'role': 'user', 'content': 'target: 36, numbers: [12, 4, 4, 8]'}, {'role': 'user', 'content': 'target: 36, numbers: [2, 5, 2, 6]'}, {'role': 'user', 'content': 'target: 36, numbers: [9, 4, 2, 2]'}, {'role': 'user', 'content': 'target: 36, numbers: [6, 4, 2, 10]'}, {'role': 'user', 'content': 'target: 36, numbers: [5, 9, 6, 4]'}]




Shuffled:
[{'role': 'user', 'content': 'target: 36, numbers: [6, 3, 12, 11]'}, {'role': 'user', 'content': 'target: 36, numbers: [4, 10, 7, 5]'}, {'role': 'user', 'content': 'target: 36, numbers: [4, 1, 5, 7]'}, {'role': 'user', 'content': 'target: 36, numbers: [7, 4, 7, 8]'}, {'role': 'user', 'content': 'target: 36, numb

In [72]:
num_training_examples = 500
num_test_examples = 100
num_validation_examples = 100
training_set = data[:num_training_examples]
test_set = data[num_training_examples:num_training_examples+num_test_examples]
validation_set = data[num_training_examples+num_test_examples:num_training_examples+num_test_examples+num_validation_examples]
print(f"Training Set: {len(training_set)}")
print(f"Validation Set: {len(validation_set)}")
print(f"Test Set: {len(test_set)}")

Training Set: 500
Validation Set: 100
Test Set: 100


In [73]:
writeDataToJSONL(training_set, 'training_data.jsonl')
writeDataToJSONL(validation_set, 'validation_data.jsonl')
writeDataToJSONL(test_set, 'test_data.jsonl')

In [74]:
## Openai Utils for Fine Tuning Data Validation
# Format error checks

import tiktoken # for token counting
import numpy as np
from collections import defaultdict

dataset = training_set
format_errors = defaultdict(int)

for ex in dataset:
    if not isinstance(ex, dict):
        format_errors["data_type"] += 1
        continue
        
    messages = ex.get("messages", None)
    if not messages:
        format_errors["missing_messages_list"] += 1
        continue
        
    for message in messages:
        if "role" not in message or "content" not in message:
            format_errors["message_missing_key"] += 1
        
        if any(k not in ("role", "content", "name", "function_call", "weight") for k in message):
            format_errors["message_unrecognized_key"] += 1
        
        if message.get("role", None) not in ("system", "user", "assistant", "function"):
            format_errors["unrecognized_role"] += 1
            
        content = message.get("content", None)
        function_call = message.get("function_call", None)
        
        if (not content and not function_call) or not isinstance(content, str):
            format_errors["missing_content"] += 1
    
    if not any(message.get("role", None) == "assistant" for message in messages):
        format_errors["example_missing_assistant_message"] += 1

if format_errors:
    print("Found errors:")
    for k, v in format_errors.items():
        print(f"{k}: {v}")
else:
    print("No errors found")

No errors found


In [75]:
encoding = tiktoken.get_encoding("cl100k_base")

# not exact!
# simplified from https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
def num_tokens_from_messages(messages, tokens_per_message=3, tokens_per_name=1):
    num_tokens = 0
    for message in messages:
        num_tokens += tokens_per_message
        for key, value in message.items():
            num_tokens += len(encoding.encode(value))
            if key == "name":
                num_tokens += tokens_per_name
    num_tokens += 3
    return num_tokens

def num_assistant_tokens_from_messages(messages):
    num_tokens = 0
    for message in messages:
        if message["role"] == "assistant":
            num_tokens += len(encoding.encode(message["content"]))
    return num_tokens

def print_distribution(values, name):
    print(f"\n#### Distribution of {name}:")
    print(f"min / max: {min(values)}, {max(values)}")
    print(f"mean / median: {np.mean(values)}, {np.median(values)}")
    print(f"p5 / p95: {np.quantile(values, 0.1)}, {np.quantile(values, 0.9)}")

In [76]:
# Warnings and tokens counts
n_missing_system = 0
n_missing_user = 0
n_messages = []
convo_lens = []
assistant_message_lens = []

for ex in dataset:
    messages = ex["messages"]
    if not any(message["role"] == "system" for message in messages):
        n_missing_system += 1
    if not any(message["role"] == "user" for message in messages):
        n_missing_user += 1
    n_messages.append(len(messages))
    convo_lens.append(num_tokens_from_messages(messages))
    assistant_message_lens.append(num_assistant_tokens_from_messages(messages))
    
print("Num examples missing system message:", n_missing_system)
print("Num examples missing user message:", n_missing_user)
print_distribution(n_messages, "num_messages_per_example")
print_distribution(convo_lens, "num_total_tokens_per_example")
print_distribution(assistant_message_lens, "num_assistant_tokens_per_example")
n_too_long = sum(l > 16385 for l in convo_lens)
print(f"\n{n_too_long} examples may be over the 16,385 token limit, they will be truncated during fine-tuning")

Num examples missing system message: 0
Num examples missing user message: 0

#### Distribution of num_messages_per_example:
min / max: 3, 3
mean / median: 3.0, 3.0
p5 / p95: 3.0, 3.0

#### Distribution of num_total_tokens_per_example:
min / max: 715, 749
mean / median: 716.498, 716.0
p5 / p95: 715.0, 716.0

#### Distribution of num_assistant_tokens_per_example:
min / max: 181, 215
mean / median: 182.498, 182.0
p5 / p95: 181.0, 182.0

0 examples may be over the 16,385 token limit, they will be truncated during fine-tuning


In [77]:
# Pricing and default n_epochs estimate
MAX_TOKENS_PER_EXAMPLE = 16385

TARGET_EPOCHS = 3
MIN_TARGET_EXAMPLES = 100
MAX_TARGET_EXAMPLES = 25000
MIN_DEFAULT_EPOCHS = 1
MAX_DEFAULT_EPOCHS = 25

n_epochs = TARGET_EPOCHS
n_train_examples = len(dataset)
if n_train_examples * TARGET_EPOCHS < MIN_TARGET_EXAMPLES:
    n_epochs = min(MAX_DEFAULT_EPOCHS, MIN_TARGET_EXAMPLES // n_train_examples)
elif n_train_examples * TARGET_EPOCHS > MAX_TARGET_EXAMPLES:
    n_epochs = max(MIN_DEFAULT_EPOCHS, MAX_TARGET_EXAMPLES // n_train_examples)

n_billing_tokens_in_dataset = sum(min(MAX_TOKENS_PER_EXAMPLE, length) for length in convo_lens)
print(f"Dataset has ~{n_billing_tokens_in_dataset} tokens that will be charged for during training")
print(f"By default, you'll train for {n_epochs} epochs on this dataset")
print(f"By default, you'll be charged for ~{n_epochs * n_billing_tokens_in_dataset} tokens")

Dataset has ~358249 tokens that will be charged for during training
By default, you'll train for 3 epochs on this dataset
By default, you'll be charged for ~1074747 tokens
