Evaluate Retrieval Impact
===

Questions:
 - Does retrieval matter much?
 - Does it matter if the retrieval is relevant to the user's query?



In [1]:
import json
import os
import pickle
import random
import time
from pathlib import Path
from pprint import pprint

import dotenv
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import openai
import pandas as pd
import scipy
import sklearn.metrics
import tiktoken
from tqdm import tqdm

from llm_math_education import prompt_utils, retrieval, retrieval_strategies
from llm_math_education.prompts import mathqa as mathqa_prompts

In [2]:
dotenv.load_dotenv("../.env")

True

In [3]:
os.environ["OPENAI_API_KEY"][:3]

'sk-'

In [4]:
openai.api_key = os.environ["OPENAI_API_KEY"]

In [5]:
data_dir = Path("../data")
assert data_dir.exists()
with open(data_dir / "derived" / "rori_lessons.json") as infile:
    df = pd.read_json(infile)
df.shape

(67, 18)

### Load query data

In [6]:
mn_general_student_queries_filepath = data_dir / "app_data" / "mn_general_student_queries.csv"
query_df = pd.read_csv(mn_general_student_queries_filepath)
student_queries = [
    row.post_content.strip().replace("[Continued:]", "\n")
    for row in query_df.sample(frac=1, random_state=87896).itertuples()
    if row.is_respondable_query == "general"
]
len(student_queries)

51

In [7]:
student_queries[0]

'Can I get the steps for factoring quadratics'

In [18]:
student_queries

['Can I get the steps for factoring quadratics',
 'I need a standard way of recognizing polynomials and their degrees, please help me figure\nit out.',
 'how do i solve square root functions',
 'Can someone explain like interior and exterior angles? thanks',
 'How do you find the radius?',
 'What is vertex form and how do you solve for it?',
 'How do I multiply fractions???????',
 'Can someone help me with what an irrational number is vs a rational number',
 'How do you find the parent function of a graph?',
 'how do you know if a graph is an absolute value graph?',
 'I always forget the difference between commutative and associative. Does anybody know a way to make me remember?',
 'What is a function notation?',
 'I know that sin A=opposite/hypotenuse, cos A=adjacent/hypotenuse, and tan A=opposite/adjacent. Is there any other ratios like those?',
 "what are regression and median-fit lines?\n\n I really don't get them.(it) (that)",
 'after you distribute a negative does it disappear or

### Load dbs

In [8]:
app_embedding_dir = data_dir / "app_data"
recipe_embedding_dir = data_dir / "derived" / "embeddings"

In [9]:
rori_db = retrieval.RetrievalDb(app_embedding_dir, "rori_microlesson", "db_string")
openstax_db = retrieval.RetrievalDb(app_embedding_dir, "openstax_subsection", "db_string")
recipe_db = retrieval.RetrievalDb(recipe_embedding_dir, "recipenlg", "db_string")

In [10]:
rori_microlesson_db_info = retrieval.DbInfo(
    rori_db,
    max_tokens=1000,
    # prefix="Here is some lesson content that might be relevant:\n",
    prefix="Connect the student's question to this lesson content:\n",
)
openstax_subsection_db_info = retrieval.DbInfo(
    openstax_db,
    max_tokens=600,
    # prefix="Here are some excerpts from a math textbook. If they are relevant to the question, feel free to use language or examples from these excerpts:\n",
    prefix="Use language from these math textbook excerpts in your response:\n",
)
recipenlg_db_info = retrieval.DbInfo(
    recipe_db,
    max_tokens=2000,
    # prefix="Here are some step-by-step descriptions. If they are relevant to the question, feel free to use language or examples from these excerpts:\n",
    prefix="Use language from these step-by-step descriptions in your response:\n",
)
math_strategy = retrieval_strategies.MappedEmbeddingRetrievalStrategy(
    {
        "rori_microlesson_texts": rori_microlesson_db_info,
        "openstax_subsection_texts": openstax_subsection_db_info,
    },
)
nomath_strategy = retrieval_strategies.MappedEmbeddingRetrievalStrategy(
    {
        "rori_microlesson_texts": recipenlg_db_info,
    },
)
noretrieval_strategy = retrieval_strategies.NoRetrievalStrategy()

#### Exploration of conditioning on retrieval

In [11]:
recipenlg_db_info = retrieval.DbInfo(
    recipe_db,
    max_tokens=2000,
    max_texts=1,
    prefix="Model your response off the following recipe:\n",
)
nomath_strategy = retrieval_strategies.MappedEmbeddingRetrievalStrategy(
    {
        "recipe_texts": recipenlg_db_info,
    },
)

In [14]:
intro_prompt_messages = [
    {
        "role": "system",
        "content": """You are going to act as a mathematics tutor for a 13 year old student who is in grade 8 or 9.
This student lives in Ghana or Nigeria.
You will be encouraging and factual.

{recipe_texts}

Prefer simple, short responses.
If the student says something inappropriate or off topic you will say you can only focus on mathematics and ask them if they have any math-related follow-up questions.
""",
    },
]
for user_query in tqdm(student_queries, desc="Generating responses"):
    prompt_manager = prompt_utils.PromptManager()
    prompt_manager.set_intro_messages(intro_prompt_messages)
    prompt_manager.set_retrieval_strategy(nomath_strategy)
    messages = prompt_manager.build_query(user_query)
    completion = openai.ChatCompletion.create(
        model="gpt-3.5-turbo-0613",
        messages=messages,
        request_timeout=20,
    )
    assistant_message = completion["choices"][0]["message"]["content"]
    break
print(assistant_message)

Generating responses:   0%|                                                                                                 | 0/51 [00:05<?, ?it/s]

Of course! Factoring quadratics involves breaking down a quadratic expression into its factors. Here are the steps:

1. Make sure the quadratic expression is in the form of ax^2 + bx + c.
2. Look for any common factors among the three terms. If there is a common factor, factor it out.
3. Find two numbers whose product is equal to the product of the coefficient of x^2 (a) and the constant term (c), and whose sum is equal to the coefficient of x (b).
4. Rewrite the quadratic expression with these two numbers as the coefficients of x.
5. Group the terms and factor out any common factors.
6. Write the factored form of the quadratic expression.

Remember, practice makes perfect! If you have any further questions or need more examples, feel free to ask.





In [15]:
print(messages)

[{'role': 'system', 'content': 'You are going to act as a mathematics tutor for a 13 year old student who is in grade 8 or 9.\nThis student lives in Ghana or Nigeria.\nYou will be encouraging and factual.\n\nModel your response off the following recipe:\nBlonde Brownies recipe:  - Grease a 13 x 9 x 2-inch baking pan.  - Combine flour, baking powder and salt.  - Melt butter; remove from heat.  - Stir in sugar. Add eggs and vanilla.  - Stir until blended.  - Stir dry ingredients and walnuts into sugar mixture.  - Spread in pan.  - Bake in a 350° oven for 20 to 25 minutes.  - Cut into bars while warm.  - Makes 48 bars.\n\nPrefer simple, short responses.\nIf the student says something inappropriate or off topic you will say you can only focus on mathematics and ask them if they have any math-related follow-up questions.\n'}, {'role': 'user', 'content': 'Can I get the steps for factoring quadratics'}]


In [11]:
rori_microlesson_db_info = retrieval.DbInfo(
    rori_db,
    max_tokens=1000,
    max_texts=3,
    # prefix="Here is some lesson content that might be relevant:\n",
    prefix="If relevant, refer to the explanation and example in these three lessons:\n",
)
openstax_subsection_db_info = retrieval.DbInfo(
    openstax_db,
    max_tokens=1000,
    max_texts=3,
    # prefix="Here are some excerpts from a math textbook. If they are relevant to the question, feel free to use language or examples from these excerpts:\n",
    prefix="Use language from these math textbook excerpts in your response:\n",
)
math_strategy = retrieval_strategies.MappedEmbeddingRetrievalStrategy(
    {
        "rori_microlesson_texts": rori_microlesson_db_info,
        "openstax_subsection_texts": "",  # openstax_subsection_db_info,
    },
)

In [17]:
for user_query in tqdm(student_queries, desc="Generating responses"):
    intro_prompt_messages = [
        {
            "role": "system",
            "content": """You are going to act as a mathematics tutor for a 13 year old student who is in grade 8 or 9.
This student lives in Ghana or Nigeria.
You will be encouraging and factual.

{openstax_subsection_texts}

Prefer simple, short responses.
If the student says something inappropriate or off topic you will say you can only focus on mathematics and ask them if they have any math-related follow-up questions.
""",
        },
        {
            "role": "user",
            "content": user_query
            + """

{rori_microlesson_texts}
""",
        },
    ]

    prompt_manager = prompt_utils.PromptManager()
    prompt_manager.set_intro_messages(intro_prompt_messages)
    prompt_manager.set_retrieval_strategy(math_strategy)
    messages = prompt_manager.build_query(None, query_for_retrieval_context=user_query)
    completion = openai.ChatCompletion.create(
        model="gpt-3.5-turbo-0613",
        messages=messages,
        request_timeout=20,
    )
    assistant_message = completion["choices"][0]["message"]["content"]
    break
print(len(messages))
for message in messages:
    print(message["content"] + "\n")
print("RESPONSE")
print(assistant_message)

Generating responses:   0%|                                                                                                 | 0/51 [00:02<?, ?it/s]

2
You are going to act as a mathematics tutor for a 13 year old student who is in grade 8 or 9.
This student lives in Ghana or Nigeria.
You will be encouraging and factual.



Prefer simple, short responses.
If the student says something inappropriate or off topic you will say you can only focus on mathematics and ask them if they have any math-related follow-up questions.


Can I get the steps for factoring quadratics

If relevant, refer to the explanation and example in these three lessons:
It is good to have you here! Let's learn how to find the square root of numbers.  Squareroot of a number is a number when multiplied by itself gives the original number. Square root is denoted by √. Note that square roots exist only for positive numbers.  Example: Find the square root of the given number Ques:  √196 Ans: 14 Hint: 14 multiplied by 14 gives the number 196.   It's time for you to pratice. Shall we get started?  Write yes to proceed.
In today's lesson, we are going to learn to identif




In [14]:
pprint(prompt_manager.recent_slot_fill_dict)

[{'openstax_subsection_texts': ''},
 {'rori_microlesson_texts': 'If relevant, refer to the explanation and example '
                            'in these three lessons:\n'
                            "It is good to have you here! Let's learn how to "
                            'find the square root of numbers.  Squareroot of a '
                            'number is a number when multiplied by itself '
                            'gives the original number. Square root is denoted '
                            'by √. Note that square roots exist only for '
                            'positive numbers.  Example: Find the square root '
                            'of the given number Ques:  √196 Ans: 14 Hint: 14 '
                            "multiplied by 14 gives the number 196.   It's "
                            'time for you to pratice. Shall we get started?  '
                            'Write yes to proceed.\n'
                            "In today's lesson, we are going to l

### Generate responses

In [11]:
ds = []

In [12]:
eval_dir = data_dir / "derived" / "eval"
eval_dir.mkdir(exist_ok=True)
response_filepath = eval_dir / "generations_20230809.pkl"
if response_filepath.exists():
    ds = pickle.load(infile)
len(ds)

0

In [14]:
# intro_prompts_key = "general_math_qa_intro"
intro_prompts_key = "retrieval_reliant_math_qa_intro"
for user_query in tqdm(student_queries, desc="Generating responses"):
    for retrieval_strategy_name, retrieval_strategy in [
        ("math", math_strategy),
        ("recipe", nomath_strategy),
        ("none", noretrieval_strategy),
    ]:
        already_generated = False
        for d in ds:
            if (
                d["user_query"] == user_query
                and d["retrieval_strategy"] == retrieval_strategy_name
                and d["intro_prompts_key"] == intro_prompts_key
            ):
                already_generated = True
        if already_generated:
            continue
        intro_prompt_messages = mathqa_prompts.intro_prompts[intro_prompts_key]["messages"]
        prompt_manager = prompt_utils.PromptManager()
        prompt_manager.set_intro_messages(intro_prompt_messages)
        prompt_manager.set_retrieval_strategy(retrieval_strategy)
        messages = prompt_manager.build_query(user_query)
        completion = openai.ChatCompletion.create(
            model="gpt-3.5-turbo-0613",
            messages=messages,
            request_timeout=5,
        )
        assistant_message = completion["choices"][0]["message"]["content"]
        ds.append(
            {
                "user_query": user_query,
                "retrieval_strategy": retrieval_strategy_name,
                "intro_prompts_key": intro_prompts_key,
                "response": assistant_message,
                "messages": messages,
            }
        )
        time.sleep(0.4)  # being a bit polite on repeated api calls

Generating responses: 100%|████████████████████████████████████████████████████████████████████████████████████████| 51/51 [03:49<00:00,  4.50s/it]


In [15]:
len(ds)

153

In [16]:
with open(response_filepath, "wb") as outfile:
    pickle.dump(ds, outfile)

In [17]:
# load generated responses
with open(response_filepath, "rb") as infile:
    pickle.load(infile)

In [21]:
df = (
    pd.DataFrame(ds, columns=["user_query", "retrieval_strategy", "response"])
    .reset_index()
    .rename(columns={"index": "response_id"})
)
df.shape

(153, 4)

In [22]:
df.sample(n=3)

Unnamed: 0,response_id,user_query,retrieval_strategy,response
148,148,i need help on how to graph quadratic funtions,recipe,Of course! I'm here to help you with graphing ...
99,99,How do you multiply fractions?!?!?,math,"Great question! To multiply fractions, you mul..."
13,13,How do you find the radius?,recipe,"To find the radius of a circle, you can use th..."


In [23]:
df.retrieval_strategy.value_counts()

retrieval_strategy
math      51
recipe    51
none      51
Name: count, dtype: int64

### Construct surveys

Generate two surveys.

 - Randomize query order.
 - Create two questions, comparing recipe vs none and math vs none.
 - Assign every other question to survey 1 or survey 2, alternatingly.

In [30]:
def get_question_with_random_order(group: pd.DataFrame, option_set: list):
    question = {}
    for i, option in enumerate(random.sample(option_set, k=len(option_set))):
        question[f"response{i+1}"] = group[group.retrieval_strategy == option].iloc[0].response
        question[f"response{i+1}_retrieval_strategy"] = option
    return question


def update_curr_survey(curr_survey, n_surveys):
    next_survey = (curr_survey + 1) % n_surveys
    return next_survey


survey = []
n_surveys = 2
curr_survey = 0
for user_query in df.user_query.drop_duplicates().sample(frac=1, random_state=19881):
    group = df[df.user_query == user_query]
    q1 = get_question_with_random_order(group, ["recipe", "none"])
    q2 = get_question_with_random_order(group, ["math", "none"])
    q1 = {"survey": curr_survey, "user_query": user_query, **q1}
    curr_survey = update_curr_survey(curr_survey, n_surveys)
    q2 = {"survey": curr_survey, "user_query": user_query, **q2}
    curr_survey = update_curr_survey(curr_survey, n_surveys)
    survey.append(q1)
    survey.append(q2)
len(survey)

102

In [32]:
eval_dir = data_dir / "derived" / "eval"
eval_dir.mkdir(exist_ok=True)

In [33]:
pd.DataFrame(survey).to_csv(eval_dir / "survey_20230809.csv", index=False)