Hint Prompt Engineering
===

In [4]:
import json
import os
import re
from collections import Counter
from pathlib import Path

import dotenv
import matplotlib.pyplot as plt
import numpy as np
import openai
import pandas as pd
import tiktoken

In [2]:
dotenv.load_dotenv("../.env")
openai.api_key = os.environ["OPENAI_API_KEY"]

### Data loading and merging

In [5]:
tokenizer = tiktoken.encoding_for_model("gpt-3.5-turbo")

In [6]:
data_dir = Path("../data")
assert data_dir.exists()

In [7]:
incorrect_answer_csv_filepath = data_dir / "derived" / "incorrect_answers.csv"
incorrect_lesson_df = pd.read_csv(incorrect_answer_csv_filepath)
print(incorrect_lesson_df.shape)
incorrect_lesson_df.sample(n=3)

(153, 3)


Unnamed: 0,unique_index,answer,incorrect_answer
76,1005,30,28
13,156,31,30
118,1815,9/4,4/9


In [8]:
lesson_df = pd.read_pickle(data_dir / "derived" / "rori_lessons.pkl")
lesson_df = lesson_df.reset_index().rename(columns={"index": "unique_index"})
lesson_df.shape

(2570, 21)

In [9]:
lesson_codes = set()
lesson_codes_with_images = set()
for lesson_code, lesson in lesson_df.groupby("lesson_code"):
    lesson_codes.add(lesson_code)
    has_image_explanation = ((lesson.is_explanation) & (lesson.has_image)).any()
    has_image_question = ((~lesson.is_explanation) & (lesson.has_image)).any()
    has_image = has_image_explanation or has_image_question
    if has_image:
        lesson_codes_with_images.add(lesson_code)
len(lesson_codes_with_images), len(lesson_codes)

(74, 225)

In [10]:
sdf = lesson_df[(~lesson_df.lesson_code.isin(lesson_codes_with_images)) & (lesson_df.grade.isin([6, 7, 8]))]
f"{len(sdf)} / {len(lesson_df)}", f"{sdf.lesson_code.nunique()} / {lesson_df.lesson_code.nunique()}"

('589 / 2570', '52 / 225')

In [11]:
# merge in incorrect lessons
sdf = sdf.merge(incorrect_lesson_df[["unique_index", "incorrect_answer"]], on="unique_index", how="left")
sdf.incorrect_answer.notna().sum()

28

In [12]:
sdf = sdf[sdf.incorrect_answer.notna()]
len(sdf)

28

In [13]:
sdf.lesson_code.value_counts()

lesson_code
G8.N3.2.4.1    7
G8.A2.1.1.1    6
G6.S2.1.1.1    5
G8.N4.2.3.1    4
G7.N5.1.1.2    1
G7.N5.1.1.1    1
G6.N1.1.2.1    1
G6.N2.1.2.3    1
G7.N4.2.1.1    1
G8.N3.2.4.2    1
Name: count, dtype: int64

In [14]:
sdf[["unique_index", "lesson_code", "question", "answer", "incorrect_answer"]]

Unnamed: 0,unique_index,lesson_code,question,answer,incorrect_answer
14,434,G8.A2.1.1.1,A car travels 120 miles in H hours. The expres...,A,B
15,435,G8.A2.1.1.1,"In stable, there are z horses. 10 of them are ...",D,A
16,436,G8.A2.1.1.1,John scores 3 more points than James p scores....,B,A
17,437,G8.A2.1.1.1,If you multiply four by n and decreased by six...,A,B
18,438,G8.A2.1.1.1,"The difference of thirteen and y, then add x. ...",C,D
21,441,G8.A2.1.1.1,There are x ballpens in a pack. \nI buy 4 pack...,B,A
25,445,G8.N3.2.4.1,8/10 = _____%,80,0.8
26,446,G8.N3.2.4.1,2/5 = _____%,40,10
27,447,G8.N3.2.4.1,6/20 = _____%,30,60
28,448,G8.N3.2.4.1,34/100 = _____%,34,340


In [15]:
exclude_indices = [2205, 2549, 2550, 2551, 2558]
sdf = sdf[~sdf.unique_index.isin(exclude_indices)]
len(sdf)

23

### Prompt Engineering

In [90]:
model = "gpt-3.5-turbo-16k"
prompt = """Generate a sequence of hints that will help a student stuck on the following math problem:
{question}
"""

results = []
for row in sdf.itertuples():
    formatted_prompt = prompt.format(
        mae_spreadsheet_string=mae_spreadsheet_string,
        mae_count=mae_count,
        question=row.question,
        incorrect_answer=row.incorrect_answer,
        answer=row.answer,
    )
    completion = openai.ChatCompletion.create(
        model=model,
        messages=[
            {
                "role": "user",
                "content": formatted_prompt,
            }
        ],
    )
    result = completion["choices"][0]["message"]["content"]
    results.append(result)

In [91]:
m_df = sdf.copy()
m_df["gpt_result"] = results

In [100]:
def get_maes(result):
    return re.findall("MaE[0-9]+", result)


mae_lists = []
for result in m_df.gpt_result:
    mae_lists.append(get_maes(result))

In [102]:
m_df["n_maes"] = [len(mae_list) for mae_list in mae_lists]

In [105]:
m_df[m_df.n_maes == 0][["question", "answer", "incorrect_answer", "gpt_result", "n_maes"]]

Unnamed: 0,question,answer,incorrect_answer,gpt_result,n_maes
28,34/100 = _____%,34,340,One possible MaE in the incorrect answer is wh...,0
29,125/1000 = ____ %,12.5,125,There are no MaEs identified in the incorrect ...,0
37,2 x 2 = ___,2^2,4,"Based on the given spreadsheet, there are no i...",0
56,√225 = __,15,25,There are no MaEs present in the incorrect ans...,0
164,Two hundred fifty three thousand three hundred...,=,2,There are no MaEs in the incorrect answer.,0


In [112]:
c = Counter()
for mae_list in mae_lists:
    c.update(mae_list)
c.most_common(5)

[('MaE7', 5), ('MaE52', 4), ('MaE47', 3), ('MaE29', 3), ('MaE15', 2)]

In [115]:
m_df[["lesson_code", "grade", "question", "answer", "incorrect_answer", "gpt_result", "n_maes"]].to_csv(
    data_dir / "derived" / "misconception_identification_example.csv"
)