# Step 0: Import classes from other files

In [1]:
%load_ext autoreload
%autoreload 2
from constants import *
from evaluator import *
from model import *
from mutator import *
from prompt import *
from task import *

import warnings
warnings.filterwarnings("ignore")

In [2]:
import re

def extract_last_numeric_value(input_string):
    # Regular expression pattern to extract all numeric values with periods and ignoring commas
    pattern = r'[\d,.]+'

    # Find all matches using re.finditer()
    matches = re.finditer(pattern, input_string)

    # Initialize a variable to store the last numeric value
    last_numeric_value = None

    # Iterate through the matches and update the last_numeric_value
    for match in matches:
        numeric_value = match.group()
        # Remove commas if needed
        numeric_value = numeric_value.replace(",", "")
        last_numeric_value = numeric_value

    return last_numeric_value

In [3]:
def evaluate_func(orig, pred):
    orig_value = extract_last_numeric_value(orig)
    pred_value = extract_last_numeric_value(pred)
    try:
        return abs(float(orig_value) - float(pred_value)) < 1e-6
    except:
        return False

# Step 1: Initialize the models and the task

In [4]:
from datasets import load_dataset

mutation_model = Model(provider="quantized_llama", model_name="TheBloke/Mistral-7B-OpenOrca-GGUF")
evaluation_model = Model()

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

In [5]:
task = Task(load_dataset('gsm8k', 'main'),
            'Solve the math word problem, giving your answer as an arabic numeral',
            evaluate_func)
initial_prompt = Prompt(task.initial_prompt)

In [18]:
task.dataset

DatasetDict({
    train: Dataset({
        features: ['question', 'answer'],
        num_rows: 7473
    })
    test: Dataset({
        features: ['question', 'answer'],
        num_rows: 1319
    })
})

In [6]:
# Sample zero shot prompt
print(initial_prompt.zero_shot_prompt(task.test_sample()['question']))

Solve the math word problem, giving your answer as an arabic numeral.

Q: A tub of ice cream costing $13 is now sold at $11. A packet of milk was sold at a discount of $0.5. How much will you save if you buy 2 tubs of ice cream and 4 packets of milk?
A: 


In [7]:
# Sample one shot prompt
print(initial_prompt.one_shot_prompt(
    task.test_sample()['question'],
    task.train_sample()))

Solve the math word problem, giving your answer as an arabic numeral.

Q: Jenna wants to buy a concert ticket that costs $181, plus five drink tickets for $7 each. If Jenna earns $18 an hour and works 30 hours a week, what percentage of her monthly salary will she spend on this outing?
A: First find the total cost of the drink tickets: 5 tickets * $7/ticket = $<<5*7=35>>35
Then add that cost to the cost of the ticket to find the total cost: $35 + $181 = $<<35+181=216>>216
Then multiply Jenna's hourly rate by the number of hours she works each week to find her weekly earnings: $18/hour * 30 hours/week = $<<18*30=540>>540/week
Then multiply her weekly earnings by the number of weeks she works each month: $540/week * 4 weeks/month = $<<540*4=2160>>2160/month
Then divide the cost of the concert by Jenna's monthly earnings and multiply by 100% to express the answer as a percentage: $216 / $2160 * 100% = 10%
#### 10

Q: Jill and her friends watch 4 movies every Saturday and half the number o

In [8]:
# Sample few shot prompt
print(initial_prompt.few_shot_prompt(
    task.test_sample()['question'],
    task.train()))

Solve the math word problem, giving your answer as an arabic numeral. 

Q: Diana needs to bike 10 miles to get home.  She can bike 3 mph for two hours before she gets tired, and she can bike 1 mph until she gets home.  How long will it take Diana to get home?
A: In the first part of her trip, Diana will cover 2 hours * 3 mph = <<2*3=6>>6 miles.
In the second part of her trip, Diana will need to cover an additional 10 miles - 6 miles = <<10-6=4>>4 miles.
To cover 4 miles * 1 mph = will take Diana <<4*1=4>>4 hours.
Total biking time to get home for Diana will be 2 hours + 4 hours = <<2+4=6>>6 hours.
#### 6

Q: Linda's bag has 2 dimes, 6 quarters, and 5 nickels. Her mother gives her 2 more dimes, 10 quarters, and twice as many nickels as she has. How many coins does she have altogether?
A: When Linda's mother gave her 2 more dimes, the total number of dimes increased to 2+2 = <<2+2=4>>4 dimes.
She also increased the total number of quarters to 6+10 = <<6+10=16>>16 after her mother gave he

# Step 2: Initialize a list of initial prompts using the mutator

In [14]:
NUM_POPULATION = 5
NUM_GENERATIONS = 5
NUM_EVALS = 5

# original_mutators = MUTATORS
original_mutators = [
    "Rewrite the following instruction several times in a creative way",
]

original_thinking_styles = THINKING_STYLES

In [15]:
import random

# Make the list of initial populations
pop = [initial_prompt]
prompt_mutator = PromptMutator(mutation_model)
for i in range(1, NUM_POPULATION):
    # Randomly select a mutator and thinking style
    mutator = random.choice(original_mutators)
    thinking_style = random.choice(original_thinking_styles)
    
    # Randomly select prompt mutation mechanism by chance
    new_prompt = prompt_mutator.random_mutate(initial_prompt, mutator, thinking_style)
    pop.append(new_prompt)
    

In [16]:
print(pop)

[Solve the math word problem, giving your answer as an arabic numeral, Find the total number of candies by adding up the candies given to each child., Solve the math word problem, using Arabic numerals for your answer., Inspect and decipher each direction; don't merely memorize or mimic actions unseen, and interpret creatively when the occasion suits best for such treatment; solve with full earnest the mathematics conundrum, providing your answer in arabic numerals., Find the total number of candies by adding up the candies given to each child.]


# Step 3: Evaluation and mutation loop

In [17]:
evaluator = Evaluator()

for n_gen in range(NUM_GENERATIONS):
    # Generate n news prompt
    new_pop = []
    
    # N prompts
    # New N prompts = 2 x N
    # Judge prompts
    # Pick top N to keep going
    
    # N prompts
    # New N prompts replacing the original.
    
    # N prompts
    # Keep 60% best prompt the same
    # Worst 40% prompt -> mutate
    for i in range(NUM_POPULATION):
        # Randomly select a prompt, a mutator and thinking style
        prompt = random.choice(pop)
        mutator = random.choice(original_mutators)
        thinking_style = random.choice(original_thinking_styles)

        # Randomly select prompt mutation mechanism by chance
        new_prompt = prompt_mutator.random_mutate(initial_prompt, mutator, thinking_style)
        new_pop.append(new_prompt)
    
    # Evaluate the prompt
    all_pop = pop + new_pop
    scores = evaluator.evaluate(evaluation_model, task, "one", all_pop, NUM_EVALS)
    
    # Print prompt and their score
    print(f"Generation {n_gen + 1}")
    prompt_scores = [(scores[i], all_pop[i]) for i in range(len(all_pop))]
    prompt_scores = sorted(prompt_scores, key=lambda x: x[0], reverse=True)
    print("Score | Prompt")
    for score, prompt in prompt_scores:
        print(f"{score:5d} | {prompt}")
        
    # Survival of the fittest
    # TODO: Create a more elaborate selection mechanism that keeps some weaker member
    pop = [prompt_scores[i][1] for i in range(NUM_POPULATION)]

Evaluating prompt 0: Solve the math word problem, giving your answer as an arabic numeral
---
He writes each friend 3*2=<<3*2=6>>6 pages a week
So he writes 6*2=<<6*2=12>>12 pages every week
That means he writes 12*52=<<12*52=624>>624 pages a year
#### 624
James writes two 3-page letters every week. The number of letters he can write in a year is 52 * 2 = <<52*2=104>>104.
The number of pages James can write in a year is 104 * 3 = <<104*3=312>>312.
#### 312
---
Maila read 12 x 2 = <<12*2=24>>24 pages today.
So she was able to read a total of 12 + 24 = <<12+24=36>>36 pages since yesterday.
There are 120 - 36 = <<120-36=84>>84 pages left to be read.
Since she wants to read half of the remaining pages tomorrow, then she should read 84/2 = <<84/2=42>>42 pages.
#### 42
Julie has already read 12 pages. So, the remaining pages are 120 - 12 = <<120-12=108>>108.
If she wants to read half of these pages tomorrow, she should read 108 / 2 = <<108/2=54>>54 pages.
#### 54
---
In the beginning, Betty 