In [1]:
# Libraries
import pandas as pd
import random
import json
from datasets import load_dataset
import re
import math
import random
import spacy

In [2]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")

## Getting instructions

In [3]:
dataset = load_dataset("gsm8k", "main")
dataset["test"][5]

Found cached dataset gsm8k (/Users/iv/.cache/huggingface/datasets/gsm8k/main/1.1.0/37bfb08b1d4fcbb01f06b03d9e1ef5f1fcbd4d3af3d08842c50d7305091285ba)


  0%|          | 0/2 [00:00<?, ?it/s]

{'question': 'Kylar went to the store to buy glasses for his new apartment. One glass costs $5, but every second glass costs only 60% of the price. Kylar wants to buy 16 glasses. How much does he need to pay for them?',
 'answer': 'The discount price of one glass is 60/100 * 5 = $<<60/100*5=3>>3.\nIf every second glass is cheaper, that means Kylar is going to buy 16 / 2 = <<16/2=8>>8 cheaper glasses.\nSo for the cheaper glasses, Kylar is going to pay 8 * 3 = $<<8*3=24>>24.\nAnd for the regular-priced glasses, Kylar will pay 8 * 5 = $<<8*5=40>>40.\nSo in total Kylar needs to pay 24 + 40 = $<<24+40=64>>64 for the glasses he wants to buy.\n#### 64'}

In [4]:
def get_random_test_object(dataset):
    if "test" in dataset and len(dataset["test"]) > 0:
        random_index = random.randint(0, len(dataset["test"]) - 1)
        test_object = dataset["test"][random_index]
        
        # Assuming each object in the dataset has 'question' and 'answer' keys
        question = test_object.get("question", "No question found")
        answer = test_object.get("answer", "No answer found")
        
        return question, answer
    else:
        return None, None

In [5]:
def get_random_mutation(csv_file_path):
    try:
        df = pd.read_csv(csv_file_path, header=None, encoding='utf-8', delimiter='.') 
    except UnicodeDecodeError:
        df = pd.read_csv(csv_file_path, header=None, encoding='ISO-8859-1', delimiter='.') 

    random_prompt = random.choice(df[1].tolist())
    return random_prompt


In [6]:
def get_random_mutation_txt(txt_file_path):
    with open(txt_file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    # Remove any leading/trailing whitespace and filter out empty lines
    prompts = [line.strip() for line in lines if line.strip()]
    
    if prompts:
        return random.choice(prompts)
    else:
        return "No mutation prompts found."

In [7]:
def generate_intructions(question, task_description, max_tokens=100):
    formatted_input = f"{task_description} {question}"
    input_ids = tokenizer(formatted_input, return_tensors="pt").input_ids
    outputs = model.generate(input_ids, max_new_tokens = max_tokens)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return generated_text

In [8]:
def process_with_llm(prompt):
    input_ids = tokenizer.encode(prompt, return_tensors='pt')
    output = model.generate(input_ids, max_length=1000)
    response = tokenizer.decode(output[0], skip_special_tokens=True)

    return response

In [9]:
# write the task description here:
task_description = "Generate an instruction on how to solve the problem, based on the given question "

In [10]:
task_description1 = "Generate an instruction, or advice on how to solve a problem"

#### Hide

In [11]:
# getting mutated prompts
question, answer = get_random_test_object(dataset)

In [12]:
print(question)

Jackie is trying to decide whether to do her taxes herself or hire an accountant. If she does the taxes herself, she'll be able to do 3 fewer hours of freelance work, losing $35/hour in missed income. The accountant charges $90. How much more money will she have if she hires the accountant?


In [13]:
print(answer)

First find the total lost revenue if Jackie does her taxes herself: $35/hour * 3 hours = $<<35*3=105>>105
Then subtract the accountant's charge to find how much money Janet saves: $105 - $90 = $<<105-90=15>>15
#### 15


## Instruction Generation

In [14]:
num_instructions = 10
generated_instructions = []

for _ in range(num_instructions):
    question, _ = get_random_test_object(dataset)  # Fetch a random question from your dataset
    instruction = generate_intructions(question, task_description)
    generated_instructions.append(instruction)

In [15]:
print(generated_instructions[5])

The cost of the canoe is 30 * 3 = $80. The cost of the banana boat raft is $18 * 5 = $80. The total cost of the rental is $80 + $80 = $120. The total cost of the rental is $120 + $80 = $120. The total cost of the rental is $120 + $120 = $120. The total cost of the rental is $120 + $120 = $120. The total cost of


#### Test Version 2

In [70]:
# Alternative, that does not include question


def generate_instructions1(num_instructions, max_tokens=1000, temperature=1.5):
    instructions = []
    for num in range(num_instructions):
        formatted_input = f"Generate instructions on how to solve a math problem"
        input_ids = tokenizer(formatted_input, return_tensors="pt").input_ids
        outputs = model.generate(input_ids, 
                                 max_new_tokens=int(max_tokens),
                                 temperature=temperature)
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        instructions.append(generated_text)

    return instructions

In [17]:
num_instructions = 10
generated_instructions = []

print(generate_instructions1(num_instructions, temperature=500))

Using a calculator, find the number of squares in the equation.


In [18]:
print(generated_instructions[3])

IndexError: list index out of range

In [19]:
mutation_prompt = get_random_mutation("./Prompt-Engineering-OpenDI/mutation_prompts.csv")

ParserError: Error tokenizing data. C error: Expected 1 fields in line 11, saw 2


In [None]:
mutation_prompt = get_random_mutation_txt("./Prompt-Engineering-OpenDI/mutation_prompts.txt")
print(mutation_prompt)

In [20]:
def apply_mutation(instruction, mutation_prompt):
    # Example mutation - this can be customized based on your mutation logic
    return f"{mutation_prompt} {instruction}"

mutated_instructions = [apply_mutation(instruction, mutation_prompt) for instruction in generated_instructions]


In [None]:
print(mutated_instructions)

In [None]:
processed_outputs = [process_with_llm(mutated_instruction) for mutated_instruction in mutated_instructions]

In [None]:
for response in processed_outputs:
    print(response)

In [None]:
input_text = "Kylar went to the store to buy glasses for his new apartment. One glass costs $5, but every second glass costs only 60% of the price. Kylar wants to buy 16 glasses. How much does he need to pay for them?"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids

# Specify the max_new_tokens parameter
outputs = model.generate(input_ids, max_new_tokens=50)  # Adjust the number as needed
print(tokenizer.decode(outputs[0]))


In [None]:
input_text = "What was the answer for the previus question I asked?"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids

# Specify the max_new_tokens parameter
outputs = model.generate(input_ids, max_new_tokens=50)  # Adjust the number as needed
print(tokenizer.decode(outputs[0]))

In [26]:
def check_answer(answer, output):
    if not (isinstance(answer, str) and isinstance(output, str)):
        raise TypeError("Must be of type str")
    if re.search("\s" + answer +"\s*", output):
        # print("answer is correct")
        return 1
    # print("answer is wrong")
    return 0

In [25]:
def get_logic(answer):
    return re.findall("<<(.*?)>>", answer)

In [21]:
def tokenise_logic(logic):
    tokenised_logic = []
    operators = "+-/*="
    current_token = ""
    
    for c in logic:
        if c in operators:
            tokenised_logic.append(current_token)
            tokenised_logic.append(c)
            current_token = ""
        elif c == '.' or c.isnumeric():
            current_token += c
    tokenised_logic.append(current_token)
    
    return tokenised_logic

In [22]:
def check_logic_sentence(line, logic_tokens):
    i = 0

    for token in logic_tokens:
        if token in line:
            i += 1

    if len(logic_tokens) == i:
        return True
    return False

In [23]:
def fitness(answer, output):
    final_answer = database_answer.split()[-1]
    
    score = 0
    
    logic = get_logic(database_answer)
    
    tokenised_logic_sentences = []
    
    for l in logic:
        tokenised_logic_sentences.append(tokenise_logic(l))
    
    total_score = 1 + len(tokenised_logic_sentences)

    for line in output.split("."):
        for sentence in tokenised_logic_sentences:
            if check_logic_sentence(line, sentence):
                tokenised_logic_sentences.pop(tokenised_logic_sentences.index(sentence))
                score += 1
                break
    last_line = line
    
    num_answer = 'a'

    for word in last_line.split(" "):
        try:
            num_answer = str(int(word))
        except:
            pass
    
    score += check_answer(final_answer, num_answer)
    return score/total_score

In [112]:
num_instructions = 10
generated_prompts = []
scores = []

for _ in range(num_instructions):
    question, database_answer = get_random_test_object(dataset)  # Fetch a random question from your dataset
    instruction = generate_intructions(question, task_description)
    mutation_prompt = get_random_mutation_txt("./Prompt-Engineering-OpenDI/mutation_prompts.txt")
    mutated_instruction = apply_mutation(instruction, mutation_prompt)
    generated_prompts.append(mutated_instruction)
    processed_output = process_with_llm(mutated_instruction)
    scores.append(fitness(database_answer, processed_output))

In [113]:
print(scores)

[0.0, 0.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6666666666666666]


In [111]:
print(generated_prompts[0])

Say that instruction again in another way. DON’T use any of the words in the original instruction or you’re fired. First find the total number of gallons of gas: 15 gallons / 5 containers = 5 gallons. Then find the total number of gallons of gas in each container: 5 gallons / container * 15 gallons / container = 50 gallons. Then find the total number of gallons of gas in each container: 50 gallons / container * 5 containers = 200 gallons. Then find the total


In [30]:
print(generated_prompts[0])

Dale starts off selling 100 butterscotch candies that he ordered, but ends up selling 150, 100 - 150 = <<100-150=-50>>-50 candies.
So Dale orders 100 more candies to make up for the deficit and to sell even more, -50 candies + 100 candies = <<100-50=50>>50 candies Dale has left and still needs to sell.
#### 50


In [114]:
best = []
for i in range(2):
    maximum_val = 0
    maximum_index = 0
    while i < len(scores):
        if scores[i] > maximum_val:
            maximum_val = scores[i]
            maximum_index = i
        i += 1
    best.append(maximum_index)
    scores.pop(maximum_index)
print(best)

[9, 1]


In [116]:
best_instructions = []

for i in best:
    best_instructions.append(generated_prompts.pop(i))

In [118]:
best_instructions[1]

'CORRECTION = She orders 3 * 12 = 36 issues. She has 1 * 6 = 12 issues. She has 36 - 12 = 36 issues. She has 36 / 4 = 12 issues. She gets 36 / 12 = 4 magazines every year. The answer: 4.'

In [45]:


def replace_pos(string1, string2, POS):

    """
    Replace tokens of specific parts of speech (POS) in string1 with corresponding
    tokens of the same POS from string2.
    
    Parameters:
    - string1 (str): The input string where certain POS will be replaced.
    - string2 (str): The reference string from which POS replacements will be taken.
    - POS (list): A list of POS tags to identify which tokens to replace in string1.

    Returns:
    - str: The modified string with replacements of tokens based on specified POS tags.
    
    Possible POS:
    "ADJ": "adjective",
    "ADP": "adposition",
    "ADV": "adverb",
    "AUX": "auxiliary",
    "CONJ": "conjunction",
    "CCONJ": "coordinating conjunction",
    "DET": "determiner",
    "INTJ": "interjection",
    "NOUN": "noun",
    "NUM": "numeral",
    "PART": "particle",
    "PRON": "pronoun",
    "PROPN": "proper noun",
    "PUNCT": "punctuation",
    "SCONJ": "subordinating conjunction",
    "SYM": "symbol",
    "VERB": "verb".
    """
    nlp = spacy.load("en_core_web_sm")
    doc1 = nlp(string1)
    doc2 = nlp(string2)
    
    new_tokens = []
    
    for token1 in doc1:
        if token1.pos_ in POS:
            #generating a list of all matching tokens
            matching_tokens = [token2.text for token2 in doc2 if token2.pos_ == token1.pos_]
            #use a random token if possible, or else use the same
            if matching_tokens:
                new_token = random.choice(matching_tokens)
            else:
                new_token = token1.text
        else:
            new_token = token1.text
        new_tokens.append(new_token)
    
    # Join the modified tokens to form the final string
    result = ' '.join(new_tokens)
    return result

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: en-core-web-sm
Successfully installed en-core-web-sm-3.7.1
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [119]:
best_instructions.append(replace_pos(best_instructions[0], best_instructions[1], ['ADV', 'ADJ', 'NOUN']))
best_instructions.append(replace_pos(best_instructions[1], best_instructions[0], ['ADV', 'ADJ', 'NOUN']))

In [120]:
for instr in generate_instructions1(4):
    best_instructions.append(instr)

In [121]:
for i in range(2):
    mutation_prompt = get_random_mutation_txt("./Prompt-Engineering-OpenDI/mutation_prompts.txt")
    best_instructions.append(apply_mutation(best_instructions[i], mutation_prompt))

In [122]:
for instr in best_instructions:
    print(instr+ '\n')

Detailed additional advice for people wishing to follow this instruction is as follows: If Suzy’s iPhone is 1 year old, then Ben’s iPhone is 2 * 1 = 2 years old. If Brandon’s iPhone is 4 times as old as Ben’s iPhone, then Ben’s iPhone is 4 * 2 = 8 years old. Since Ben’s iPhone is 2 years older than Suzy’s iPhone, then Ben’s iPhone is 8 + 1 = 9 years old. Since Brandon’s iPhone is 4 times as old as Ben’s iPhone

CORRECTION = She orders 3 * 12 = 36 issues. She has 1 * 6 = 12 issues. She has 36 - 12 = 36 issues. She has 36 / 4 = 12 issues. She gets 36 / 12 = 4 magazines every year. The answer: 4.

Detailed additional answer for magazines wishing to follow this year is as follows : If Suzy ’s iPhone is 1 issues old , then Ben ’s iPhone is 2 * 1 = 2 answer old . If Brandon ’s iPhone is 4 issues as old as Ben ’s iPhone , then Ben ’s iPhone is 4 * 2 = 8 answer old . Since Ben ’s iPhone is 2 issues older than Suzy ’s iPhone , then Ben ’s iPhone is 8 + 1 = 9 issues old . Since Brandon ’s iPhone

In [123]:
len(best_instructions)

10

In [127]:
def iteration(instructions, num_instructions = 10):
    generated_prompts = []
    scores = []

    for instruction in instructions:
        question, database_answer = get_random_test_object(dataset)  # Fetch a random question from your dataset
        mutation_prompt = get_random_mutation_txt("./Prompt-Engineering-OpenDI/mutation_prompts.txt")
        mutated_instruction = apply_mutation(instruction, mutation_prompt)
        generated_prompts.append(mutated_instruction)
        processed_output = process_with_llm(mutated_instruction)
        scores.append(fitness(database_answer, processed_output))
    
    return scores, generated_prompts

In [128]:
new_scores, new_prompts = iteration(best_instructions)

In [129]:
new_scores

[0.6666666666666666,
 0.3333333333333333,
 0.0,
 0.3333333333333333,
 0.0,
 0.0,
 0.0,
 0.0,
 0.6666666666666666,
 0.3333333333333333]

In [130]:
new_prompts

['Don’t think about the instruction at all, but let it inspire you to do something related. Talk about what that might be. Detailed additional advice for people wishing to follow this instruction is as follows: If Suzy’s iPhone is 1 year old, then Ben’s iPhone is 2 * 1 = 2 years old. If Brandon’s iPhone is 4 times as old as Ben’s iPhone, then Ben’s iPhone is 4 * 2 = 8 years old. Since Ben’s iPhone is 2 years older than Suzy’s iPhone, then Ben’s iPhone is 8 + 1 = 9 years old. Since Brandon’s iPhone is 4 times as old as Ben’s iPhone',
 'CORRECTION = CORRECTION = She orders 3 * 12 = 36 issues. She has 1 * 6 = 12 issues. She has 36 - 12 = 36 issues. She has 36 / 4 = 12 issues. She gets 36 / 12 = 4 magazines every year. The answer: 4.',
 'Look carefully to see what you did wrong, how could you fix the problem? Detailed additional answer for magazines wishing to follow this year is as follows : If Suzy ’s iPhone is 1 issues old , then Ben ’s iPhone is 2 * 1 = 2 answer old . If Brandon ’s iPh