# Step 0: Import classes from other files

In [1]:
%load_ext autoreload
%autoreload 2
from constants import *
from evaluator import *
from model import *
from mutator import *
from prompt import *
from task import *

import warnings
warnings.filterwarnings("ignore")

In [2]:
import re

def extract_last_numeric_value(input_string):
    # Regular expression pattern to extract all numeric values with periods and ignoring commas
    pattern = r'[\d,.]+'

    # Find all matches using re.finditer()
    matches = re.finditer(pattern, input_string)

    # Initialize a variable to store the last numeric value
    last_numeric_value = None

    # Iterate through the matches and update the last_numeric_value
    for match in matches:
        numeric_value = match.group()
        # Remove commas if needed
        numeric_value = numeric_value.replace(",", "")
        last_numeric_value = numeric_value

    return last_numeric_value

In [3]:
def evaluate_func(orig, pred):
    orig_value = extract_last_numeric_value(orig)
    pred_value = extract_last_numeric_value(pred)
    try:
        return abs(float(orig_value) - float(pred_value)) < 1e-6
    except:
        return False

# Step 1: Initialize the models and the task

In [4]:
from datasets import load_dataset

# mutation_model = Model(provider="huggingface", model_name="mistralai/Mistral-7B-Instruct-v0.1")
mutation_model = Model(provider="quantized_llama", model_name="TheBloke/Mistral-7B-OpenOrca-GGUF", model_file="mistral-7b-openorca.Q4_K_M.gguf", model_type="mistral")
evaluation_model = mutation_model

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

In [5]:
system_instruction = "You are genius at mathematical thinking and reasoning."
thinking_style = "Think about this step by step."
task = Task(load_dataset('gsm8k', 'main'),
            'Solve the math word problem, giving your answer as an arabic numeral.',
            evaluate_func)
initial_prompt = Prompt(task.initial_prompt, system_instruction, thinking_style)

In [6]:
task.dataset

DatasetDict({
    train: Dataset({
        features: ['question', 'answer'],
        num_rows: 7473
    })
    test: Dataset({
        features: ['question', 'answer'],
        num_rows: 1319
    })
})

In [7]:
# Sample zero shot prompt
print(initial_prompt.zero_shot_prompt(task.test_sample()['question']))

SYSTEM: You are genius at mathematical thinking and reasoning.
USER: Think about this step by step., Solve the math word problem, giving your answer as an arabic numeral..
QUESTION: Juan asked his neighbor, Herb, how much his house was worth. Herb answered that he paid $76,000 for the house. If Juan's house is 30% less expensive than Herb's, calculate the value of the two houses combined.
ANSWER: 



In [8]:
# Sample one shot prompt
print(initial_prompt.one_shot_prompt(
    task.test_sample()['question'],
    task.train_sample()))

SYSTEM: You are genius at mathematical thinking and reasoning.
USER: Think about this step by step., Solve the math word problem, giving your answer as an arabic numeral..
QUESTION: Jenny decided to get a cat with her girlfriend.  They agreed to split all the costs down the middle except for they would each buy their own toys for the cat.  The adoption fee was $50, the vet visits cost $500 for the first year and the monthly cost of food was $25.  She bought $200 in toys.  How much did Jenny spend on the cat in the first year?
ANSWER: The total cost for food was 25*12=$<<25*12=300>>300
So their shared expenses were 300+50+500=$<<300+50+500=850>>850
So Jenny spent 850/2=$<<850/2=425>>425 on shared expenses
So the total cost she paid was 425+200=$<<425+200=625>>625
#### 625
QUESTION: Tyrion changes his face mask two times every time he goes out. If he goes out three times a day, how many face masks does he use every 2 days?
ANSWER: 



In [9]:
# Sample few shot prompt
print(initial_prompt.few_shot_prompt(
    task.test_sample()['question'],
    task.train()))

SYSTEM: You are genius at mathematical thinking and reasoning.
USER: Think about this step by step., Solve the math word problem, giving your answer as an arabic numeral.. 
QUESTION: A cook had 300 carrots in a bucket he was using to cook meals for a restaurant. Before lunch, he had used 2/5 of the carrots. By the end of the day, he had chopped and used 3/5 of the remaining carrots. How many carrots were not used that day?
ANSWER: Before lunch, the cook had used 2/5*300 = <<2/5*300=120>>120 carrots.
The number of remaining carrots after the cook used 2/5 of them is 300-120 = <<300-120=180>>180.
The cook had used 3/5*180 = <<3/5*180=108>>108 of the remaining carrots by the end of the day.
The number of carrots that the cook did not use that day is 180-108 = <<180-108=72>>72.
#### 72
QUESTION: Janna sleeps 7 hours each day during weekdays and 8 hours each day during weekends. How many hours does she sleep in a week?
ANSWER: She sleeps 7*5=<<7*5=35>>35 hours on weekdays.
She sleeps 8*2=<<

# Step 2: Initialize a list of initial prompts using the mutator

In [13]:
NUM_POPULATION = 5
NUM_GENERATIONS = 5
NUM_EVALS = 10

original_mutators = MUTATORS
original_mutators = [
    "10 times, modify the following instruction in a creative manner.",
    "10 times, change the following instruction to make it more fun, and make sure to think WELL outside the box.",
    # "10 times, as a really good teacher, explain the instruction, as if you were explaining it to a child.",
    # "10 times, imagine you need to follow this instruction. What would you tell yourself if you wanted to be the best in the world at it?",
    # "10 times, how would someone with derailment follow this instruction?",
    # "10 times, rephrase the instruction without using any of the same words. Use all you know to improve the instruction so the person hearing it is more likely to do well.",
    # "10 times, say that instruction again in another way. DON’T use any of the words in the original instruction or you’re fired.",
    # "10 times, develop a prompt mutant by replacing specific keywords with related but unexpected terms. Mutate the prompt to include a hypothetical scenario that changes the context.",
    # "10 times, generate a prompt mutant that introduces an element of suspense or intrigue. Create a mutated version of the prompt that incorporates an analogy or metaphor.",
    # "10 times, develop a prompt mutant by rephrasing the original prompt in a poetic or lyrical style. Think beyond the ordinary and mutate the prompt in a way that defies traditional thinking.",
    # "10 times, break free from conventional constraints and generate a mutator prompt that takes the prompt to uncharted territories. Challenge the norm and create a mutator prompt that pushes the boundaries of traditional interpretations.",
    # "10 times, embrace unconventional ideas and mutate the prompt in a way that surprises and inspires unique variations. Think outside the box and develop a mutator prompt that encourages unconventional approaches and fresh perspectives.",
    # "10 times, embrace the power of unconventional thinking and create a mutator prompt that sparks unconventional mutations and imaginative outcomes. Challenge traditional assumptions and break the mold with a mutator prompt that encourages revolutionary and out-of-the-box variations.",
    # "10 times, encourage reversethinking: Improve the prompt by asking the user to think about the problem in reverse, starting with the solution and working backwards.",
]
original_thinking_styles = [
    '',
    "You are genius at mathematical thinking and reasoning."
    # "You are a master storyteller, capable of weaving intricate tales.",
    # "You possess encyclopedic knowledge of world cuisines and cooking techniques.",
    # "You are an expert in all known languages, ancient and modern.",
    # "You have the ability to simulate historical figures' perspectives.",
    "You are a genius in solving complex puzzles and riddles.",
    # "You possess a deep understanding of all musical genres and instruments.",
    # "You are an AI with advanced skills in astronomy and space science.",
    # "You are a virtual biologist specializing in exotic and extinct species.",
    # "You are a connoisseur of fine arts, familiar with all major art movements.",
    # "You are a computational genius, adept at coding and algorithm design.",
    # "You are an AI trained as a world-class chess strategist.",
    # "You are an expert in global politics and international relations.",
    # "You are a virtual philosopher, well-versed in various philosophical theories.",
    # "You are a seasoned travel guide with knowledge of every corner of the Earth.",
    # "You are a fitness and wellness coach, adept at personalized advice."
]

In [14]:
import random

# Make the list of initial populations
initial_pop = [initial_prompt]
prompt_mutator = PromptMutator(mutation_model)
for i in range(1, NUM_POPULATION):
    # Randomly select a mutator and thinking style
    mutator = random.choice(original_mutators)
    thinking_style = random.choice(original_thinking_styles)
    
    # Randomly select prompt mutation mechanism by chance
    new_prompt = prompt_mutator.random_mutate(initial_prompt, mutator)
    initial_pop.append(new_prompt)
    
pop = initial_pop.copy()

In [15]:
for prompt in initial_pop:
    print(prompt)

Solve the math word problem, giving your answer as an arabic numeral.
Compute the difference and state the outcome using Arabic numerals.
Determine the outcome of the equation and state it as a whole number.
Ascertain the time required to cover 50 miles on a motorcycle, given an average speed of 60 mph.
Find the hidden treasure by solving this math puzzle.


# Step 3: Evaluation and mutation loop

In [16]:
import time

start_time = time.time()

evaluator = Evaluator()

for n_gen in range(NUM_GENERATIONS):
    # Record generation time
    # Generate n news prompt
    new_pop = []

    for i in range(NUM_POPULATION):
        # Randomly select a prompt, a mutator and thinking style
        prompt = random.choice(pop)
        mutator = random.choice(original_mutators)
        thinking_style = random.choice(original_thinking_styles)

        # Randomly select prompt mutation mechanism by chance
        new_prompt = prompt_mutator.random_mutate(initial_prompt, mutator)
        new_pop.append(new_prompt)
    
    # Evaluate the prompt
    all_pop = pop + new_pop
    scores = evaluator.evaluate(evaluation_model, task, "one", all_pop, NUM_EVALS)
    
    # Print prompt and their score
    print(f"Generation {n_gen + 1}")
    prompt_scores = [(all_pop[i].get_accuracy(), all_pop[i]) for i in range(len(all_pop))]
    prompt_scores = sorted(prompt_scores, key=lambda x: x[0], reverse=True)
    print("Score  | Evals | Prompt")
    for _, prompt in prompt_scores:
        print(f"{prompt.get_accuracy() * 100:5.2f}% | {prompt.get_num_evals():5d} | {prompt}")
        
    # Survival of the fittest
    # TODO: Create a more elaborate selection mechanism that keeps some weaker member
    pop = [prompt_scores[i][1] for i in range(NUM_POPULATION)]
    
    # Record time
    end_time = time.time()
    execution_time = end_time - start_time
    print("Execution time: {:.2f} seconds".format(execution_time))

Evaluating test samples: [807, 488, 1085, 445, 1127, 732, 1044, 501, 81, 1280]
Evaluating prompt 0: Solve the math word problem, giving your answer as an arabic numeral.
Evaluating prompt 1: Compute the difference and state the outcome using Arabic numerals.
Evaluating prompt 2: Determine the outcome of the equation and state it as a whole number.
Evaluating prompt 3: Ascertain the time required to cover 50 miles on a motorcycle, given an average speed of 60 mph.
Evaluating prompt 4: Find the hidden treasure by solving this math puzzle.
Evaluating prompt 5: Unravel the numerical challenge and deliver your response in base ten.
Evaluating prompt 6: Please respond with the numerical solution to this math problem, using only digits.
Evaluating prompt 7: Solve the math word problem and provide your answer in Arabic numerals.
Evaluating prompt 8: Find the hidden treasure by solving this math puzzle.
Evaluating prompt 9: Solve the math riddle, then use your answer to unlock the door to the m