In [3]:
import os
from supabase import create_client, Client

# Replace with your Supabase project URL and public anon key
SUPABASE_URL = os.environ.get("SUPABASE_URL")
SUPABASE_KEY = os.environ.get("SUPABASE_KEY")

supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)

In [5]:
# Query the top 5 rows from the "aqua_rat_train" table
response = supabase.table("aqua_rat_train").select("*").limit(5).execute()

# Print the query result
print(response.data)

[{'id': 1, 'question': "Two friends plan to walk along a 43-km trail, starting at opposite ends of the trail at the same time. If Friend P's rate is 15% faster than Friend Q's, how many kilometers will Friend P have walked when they pass each other?", 'options': ['A)21', 'B)21.5', 'C)22', 'D)22.5', 'E)23'], 'rationale': 'If Q complete x kilometers, then P completes 1.15x kilometers.\nx + 1.15x = 43\n2.15x=43\nx = 43/2.15 = 20\nThen P will have have walked 1.15*20=23 km.\nThe answer is E.', 'correct': 'E'}, {'id': 2, 'question': 'In the coordinate plane, points (x, 1) and (5, y) are on line k. If line k passes through the origin and has slope 1/5, then what are the values of x and y respectively?', 'options': ['A)4 and 1', 'B)1 and 5', 'C)5 and 1', 'D)3 and 5', 'E)5 and 3'], 'rationale': 'Line k passes through the origin and has slope 1/5 means that its equation is y=1/5*x.\nThus: (x, 1)=(5, 1) and (5, y) = (5,1) -->x=5 and y=1\nAnswer: C', 'correct': 'C'}, {'id': 3, 'question': 'For al

In [None]:
# TODO: create a tagging logic to assign concepts to math problems

from langchain import PromptTemplate, LLMChain
from langchain.llms import OpenAI
import json

# Define the prompt template for math concept tagging
template = """
You are an expert in math education. Your task is to identify the Common Core high school math conceptual categories that each given math word problem relates to. The possible categories are: Number and Quantity, Algebra, Functions, Modeling, Geometry, Statistics and Probability.

Here are some examples:

Example 1:
Question: Solve for x: 2x + 3 = 7
Categories: Expressions and Equations

Example 2:
Question: What is the area of a rectangle with length 5 and width 3?
Categories: Area and Volume

Example 3:
Question: If a train travels at 60 km/h, how far will it travel in 2 hours?
Categories: Dimensional Analysis, Operations with fractions and decimals

Now, for the following question, list all the categories that apply:

Question: {question}

Categories:
"""

# Create the prompt
prompt = PromptTemplate(template=template, input_variables=["question"])

# Initialize the LLM (replace with your API key and preferred model)
llm = OpenAI(temperature=0, model_name="gpt-4")  # Use GPT-4 for better accuracy

# Create the LLMChain
chain = LLMChain(llm=llm, prompt=prompt)

# Function to tag a question
def tag_question(question):
    result = chain.run(question=question)
    return result.strip().split(", ")  # Split the output into a list of categories

# Example: Process a sample question
sample_question = "A grocery sells a bag of ice for $1.25, and makes 20% profit. If it sells 500 bags of ice, how much total profit does it make?"
tags = tag_question(sample_question)
print(f"Question: {sample_question}")
print(f"Tags: {tags}")

# Example: Process the Aqua Rat dataset (assuming JSON format)
def process_dataset(dataset_path, output_path):
    with open(dataset_path, 'r') as f:
        dataset = json.load(f)
    
    tagged_dataset = []
    for item in dataset:
        question = item['question']
        tags = tag_question(question)
        item['concepts'] = tags
        tagged_dataset.append(item)
    
    with open(output_path, 'w') as f:
        json.dump(tagged_dataset, f, indent=2)

# Example usage (uncomment to run)
# dataset_path = "aqua_rat.json"
# output_path = "aqua_rat_tagged.json"
# process_dataset(dataset_path, output_path)