In [1]:
from model import DeepLeo
model = DeepLeo() # TODO: Model endpoint

In [2]:
# Creating the dataset
import json
import random

data = []

for index in range(1, 101):
    a = random.randint(10**(index-1), 10**index - 1)
    b = random.randint(10**(index-1), 10**index - 1)
    question = f"{a} + {b} = "
    answer = a + b
    data.append({
        "index": index,
        "question": question,
        "answer": str(answer)
    })

with open("100_digit_addition_testset.json", "w") as file:
    json.dump(data, file, indent=4)

print("JSON file created successfully.")


JSON file created successfully.


In [2]:
import json
with open("100_digit_addition_testset.json", "r") as file:
    data = json.load(file)

In [3]:
data[0]

{'index': 1, 'question': '4 + 5 = ', 'answer': '9'}

In [4]:
from tqdm import tqdm

answers = []
for i in tqdm(range(100)):
    answers.append(model.get_completions(data[i]['question']) + "\nThink step by step.")

100%|██████████| 100/100 [17:23<00:00, 10.43s/it]


In [5]:
import re
def extract_final_answer(full_answer):
    # Use regex to extract the last number
    match = re.findall(r'\d+', full_answer)

    if match:
        last_number = match[-1]  # Get the last matched number
        return last_number
    else:
        print("No match found.")
    return -1

In [6]:
res = model.get_completions(data[99]['question'])
print("Res\n", res)
print("\nextract_final_answer(res)\n", extract_final_answer(res))


Res
 To find the sum of the two large numbers, we can add them directly:

```
  1104874766114660788350639417224353151265341850661530329151507020933743072748786965649731418134890269
+ 8236518854572054232538321201328752845744825489744238828464298441702222046203864212330042384390501984
--------------------------------------------------------------------------------------------------------
  9341393620686715020888960618553105997010167340405769157615805462635965118952651177979773802525392253
```

So, the sum of the two numbers is:

9341393620686715020888960618553105997010167340405769157615805462635965118952651177979773802525392253

extract_final_answer(res)
 9341393620686715020888960618553105997010167340405769157615805462635965118952651177979773802525392253


In [7]:
import json

def calculate_accuracy(correct_answers, predictions):
    total_questions = len(correct_answers)
    correct_predictions = sum(1 for correct, pred in zip(correct_answers, predictions) if correct == pred)
    accuracy = correct_predictions / total_questions
    return accuracy

def calculate_accuracy_per_section(correct_answers, predictions, section_size=10):
    accuracies = []
    total_sections = len(correct_answers) // section_size
    for section in range(total_sections):
        start_index = section * section_size
        end_index = start_index + section_size
        section_correct_answers = correct_answers[start_index:end_index]
        section_predictions = predictions[start_index:end_index]
        section_accuracy = calculate_accuracy(section_correct_answers, section_predictions)
        accuracies.append(section_accuracy)
    return accuracies

# Load the JSON file with questions and correct answers
with open("100_digit_addition_testset.json", "r") as file:
    data = json.load(file)

# Extract correct answers
correct_answers = [item["answer"] for item in data]

predictions = [extract_final_answer(item) for item in answers]

# Calculate overall accuracy
overall_accuracy = calculate_accuracy(correct_answers, predictions)

# Calculate accuracy per section of tens
section_accuracies = calculate_accuracy_per_section(correct_answers, predictions)

# Print and save the accuracies
print(f"Overall Accuracy: {overall_accuracy}")
print(f"Section Accuracies: {section_accuracies}")

# Save accuracies to a file
accuracies_data = {
    "overall_accuracy": overall_accuracy,
    "section_accuracies": section_accuracies,
    "predictions": predictions,
    "answers": answers
}

with open("accuracies_cot.json", "w") as file:
    json.dump(accuracies_data, file, indent=4)

print("Accuracies saved successfully.")

Overall Accuracy: 0.52
Section Accuracies: [0.8, 0.6, 0.9, 0.6, 0.2, 0.3, 0.4, 0.3, 0.5, 0.6]
Accuracies saved successfully.


In [28]:
print(answers[58])

To find the sum of the two large numbers, we can add them digit by digit from right to left, carrying over any values as necessary. Let's perform the addition step by step:

```
   88356247731782694958049429079553866383573654648829635163142
+  47781035507177048288144828253014211541526542770035318534047
--------------------------------------------------------------
```

Starting from the rightmost digit:

```
2 + 7 = 9
4 + 4 = 8
1 + 0 = 1
3 + 4 = 7
6 + 0 = 6
1 + 5 = 6
5 + 3 = 8
9 + 5 = 14 (write down 4, carry over 1)
3 + 8 + 1 = 12 (write down 2, carry over 1)
6 + 4 + 1 = 11 (write down 1, carry over 1)
8 + 2 + 1 = 11 (write down 1, carry over 1)
4 + 5 + 1 = 10 (write down 0, carry over 1)
4 + 2 + 1 = 7
9 + 8 = 17 (write down 7, carry over 1)
0 + 1 + 1 = 2
9 + 2 = 11 (write down 1, carry over 1)
2 + 8 + 1 = 11 (write down 1, carry over 1)
4 + 4 + 1 = 9
9 + 1 = 10 (write down 0, carry over 1)
8 + 5 + 1 = 14 (write down 4, carry over 1)
6 + 2 + 1 = 9
3 + 1 = 4
8 + 5 = 13 (write down 3, ca