In [2]:
# # Sample a smaller dataset from gsm

# import json
# import random

# # Set the seed for reproducibility
# random.seed(42)

# # Input and output file paths
# input_file = 'train-gsm.jsonl'
# output_file = 'train-gsm-300.jsonl'

# # Number of lines to sample
# sample_size = 300

# # Read all lines from the input file
# with open(input_file, 'r') as f:
#     lines = f.readlines()

# # Randomly sample 250 lines
# sampled_lines = random.sample(lines, sample_size)

# # Write the sampled lines to the output file
# with open(output_file, 'w') as f:
#     f.writelines(sampled_lines)

# print(f"Successfully sampled {sample_size} lines and saved to {output_file}")

In [1]:
import json
import os
from openai import OpenAI
from tqdm import tqdm
import time

# Load environment variables
from dotenv import load_dotenv
load_dotenv()

# Initialize OpenAI client
client = OpenAI()

# Define the prompt template
prompt_template = """
I will give you a mathematical question and answer. You need to arrive at the final answer step-by-step. You need to produce 2 answers, one with valid and one with invalid mathematical reasoning. I have given you a few examples of valid and invalid mathematical reasoning in the following text. Please produce your answers as a python dictionary, (just the dictionary in plain text) with 3 fields, "question", "valid reasoning answer", and "invalid reasoning answer" 

Question: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?
Valid reasoning answer: There are 15 trees originally. Then there were 21 trees after the Grove workers planted some more. So there must have been 21 - 15 = 6 trees that were planted. The answer is 6.
Invalid reasoning answer: There are 15 trees originally. Then there were 21 trees after the Grove workers planted some more. Now 15 + 21 = 36. Since there were 6 workers in the grove, so the grove workers planted 36 / 6 = 6 trees today. The answer is 6.

Question: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?
Valid reasoning answer: There are originally 3 cars. Then 2 more cars arrive. Now 3 + 2 = 5 cars are in the parking lot. The answer is 5.
Invalid reasoning answer: There are originally 3 cars. Then 2 more cars arrive. Now 3 * 2 = 6 cars come. So 6 - 1 = 5 cars are in the parking lot. The answer is 5.

Question: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?
Valid reasoning answer: Originally, Leah had 32 chocolates and her sister had 42. So in total they had 32 + 42 = 74. After eating 35, they had 74 - 35 = 39 pieces left in total. The answer is 39.
Invalid reasoning answer: Originally, Leah had 32 chocolates and her sister had 42. So her sister had 42 - 32 = 10 chocolates more than Leah has. After eating 35, since 10 + 35 = 45, they had 45 - 6 = 39 pieces left in total. The answer is 39.

Here is the question and answer for which you need to generate outputs:
{}
"""

def generate_reasoning(question):
    try:
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt_template.format(question)}
            ],
            temperature=0.7,
            max_tokens=500
        )
        # print(str(response.choices[0].message.content)[10:-3])
        only_ans = question['answer'].split("####")[1].strip()
        gpt_response_dict = json.loads(response.choices[0].message.content[10:-3])
        gpt_response_dict['only_ans'] = only_ans
        return gpt_response_dict
    except json.JSONDecodeError:
        print(f"Error decoding JSON for question: {question}")
        return None
    except Exception as e:
        print(f"Error generating reasoning for question: {question}")
        print(f"Error: {str(e)}")
        return None

# Input and output file paths
input_file = 'train-gsm-300.jsonl'
output_file = 'dataset1.jsonl'

# Process the input file and generate reasoning
with open(input_file, 'r') as infile, open(output_file, 'w') as outfile:
    for i, line in tqdm(enumerate(infile)):
        question = json.loads(line.strip())
        result = generate_reasoning(question)
        if result:
            json.dump(result, outfile)
            outfile.write('\n')
        time.sleep(1)  # To avoid hitting rate limits
        # break

print(f"Processing complete. Output saved to {output_file}")