In [1]:
import json
import os
import random
import time
import typing as _t
from pathlib import Path
from uuid import uuid4

import anthropic
import instructor
import openai
from dotenv import load_dotenv
from pydantic import (
    AfterValidator,
    BaseModel,
    BeforeValidator,
    Field,
    field_validator,
    model_validator,
)
from vu_panel import (
    MAX_CONCEPTS,
    MAX_SUBTOPICS,
    MIN_CONCEPTS,
    MIN_SUBTOPICS,
    BaseQuestion,
    Concept,
    CreatedTopic,
    Question,
    Subtopic,
    Topic,
    Topics,
)

from dreamai.ai import (
    ModelName,
    ask_cld_or_oai,
    assistant_message,
    claude_response,
    oai_response,
    system_message,
    user_message,
    count_gpt_tokens,
)
from dreamai.utils import deindent

random.seed(42)

load_dotenv()

ask_oai = instructor.from_openai(openai.OpenAI())
ask_cld = instructor.from_anthropic(anthropic.Anthropic())

%load_ext autoreload
%autoreload 2
%reload_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
COURSE = "math_102"
QUESTIONS_PER_FOLDER = 80
SYSTEM_PREFIX = "You are a world class math course instructor."

In [19]:
DAY = 26
created_topics_file = Path(f"{COURSE}_created_topics_may_{DAY}.json")
new_questions_file = Path(f"{COURSE}_new_questions_may_{DAY}.json")
prerequisities_file = Path(f"{COURSE}_prerequisities_may_{DAY}.json")
latest_topics_file = Path(f"{COURSE}_latest_topics_may_{DAY}.json")

In [4]:
outline = Path(f"{COURSE}.txt").read_text().splitlines()
outline

['Set, its different representations and types of sets',
 'Complex numbers their addition, subtraction, multiplication and division and Modulus of a complex number',
 'Mapping and their types, function and their types, composite and inverse of a function, addition, subtraction, multiplication and division of functions',
 'Quadratic functions and quadratic formula with the types of solutions',
 'Matrices with their addition, subtraction  and multiplication , finding the inverse of a matrix using augmented matrix and co-factors. Application of matrices in solving system of linear equations, Crammer rule Determinants',
 'Arithmetic and Geometric Sequence with their nth terms and Series sum.',
 'Permutations and Combinations with their applications on some real life scenarios',
 'Binomial theorem and its applications in generalizing the formulas for higher powers ',
 'In coordinate geometry find  the distance between two points, the slope of a line,condition for parallel and perpendicular 

In [5]:
# topic_system = f"""\
# {SYSTEM_PREFIX}
# You'll be given a topic description from a course outline and you have to generate a 3-5 word topic name that encapsulates the description.
# Then, generate {MIN_SUBTOPICS}-{MAX_SUBTOPICS} subtopics for the topic. Also 3-5 words each.
# Then for each subtopic, generate {MIN_CONCEPTS}-{MAX_CONCEPTS} concepts. Also 3-5 words each. The concepts should be related to the subtopic.
# Think of concepts as the smallest unit of knowledge that can be taught from the subtopic.
# """
# topics = []
# for line in outline:
#     topics.append(
#         ask_cld_or_oai(
#             ask_cld=ask_cld,
#             ask_oai=ask_oai,
#             model=ModelName.GPT_4,
#             response_model=CreatedTopic,
#             system=topic_system,
#             messages=[
#                 user_message(f"<topic_description>\n{line}\n</topic_description>")
#             ],
#         )  # type: ignore
#     )

In [6]:
# topics2 = []
# for topic in topics:
#     topic2 = Topic(topic=topic.name)
#     for subtopic in topic.subtopics:
#         subtopic2 = Subtopic(topic=topic.name, subtopic=subtopic.name)
#         subtopic2.add_concepts(subtopic.concepts)
#         topic2.add_subtopics(subtopic2)
#     topics2.append(topic2)

In [7]:
# topics3 = Topics()
# topics3.add_topics(topics2)

In [8]:
# topics3.concepts["Set Theory Basics_Set Representations_Roster Method"].model_dump()

In [9]:
# with open(created_topics_file, "w") as f:
#     json.dump(topics3.model_dump(), f, indent=2)

In [10]:
topics4 = Topics(**json.load(open(created_topics_file)))

In [11]:
new_questions = json.load(open(new_questions_file))
new_questions["questions"] = [
    Question(**question) for question in new_questions["questions"]
]
len(new_questions["questions"])

0

In [12]:
questions_dir = Path(
    f"{COURSE}_questions"
)  # path to the question bank with each question having an id, prompt, and solution
questions = [
    BaseQuestion(**json.loads(question_file.read_text()))
    for folder in questions_dir.iterdir()
    for question_file in list(folder.glob("*.json"))[:QUESTIONS_PER_FOLDER]
]
random.shuffle(questions)

In [13]:
questions_system = f"""\
{SYSTEM_PREFIX}
You'll be given the problem and solution of a question and list of topic_subtopic_concept objects.
Based on your knowledge of math, you have to decide which topic_subtopic_concept the question belongs to.
"""

In [14]:
for i, question in enumerate(
    questions[new_questions["used"] : 30], new_questions["used"] + 1
):
    print(f"Question {i}")
    concept_strs = [
        str(concept)
        for concept in topics4.concepts.values()
        if len(concept.questions) < 3
    ]
    if not concept_strs:
        break

    objects_str = "\n\n".join(concept_strs)
    concept_ids = list(topics4.concepts.keys())
    messages = [
        user_message(
            f"<question>\n{question}\n</question>\n\n<objects>\n{objects_str}\n</objects>"
        )
    ]
    try:
        belongs_to: str = ask_cld_or_oai(
            ask_cld=ask_cld,
            ask_oai=ask_oai,
            messages=messages,
            system=questions_system,
            model=ModelName.HAIKU,
            response_model=_t.Literal[*concept_ids],  # type: ignore
        )
        split = belongs_to.split("_")
        new_question = Question(
            topic=split[0],
            subtopic=split[1],
            concept=split[2],
            problem=question.problem,
            solution=question.solution,
        )
        new_questions["questions"].append(new_question.model_dump())
        topics4.add_questions(new_question)
        with open(new_questions_file, "w") as f:
            json.dump({"used": i, "questions": new_questions["questions"]}, f, indent=2)
        time.sleep(0.3)
    except Exception as e:
        print(e)
        continue

Question 1
Question 2
Question 3
Question 4
Question 5
Error in ask_cld_or_oai. Messages: [{'role': 'user', 'content': '<question>\n<problem>\nLet $ABCDEF$ be a regular hexagon, and let $G,H,I$ be the midpoints of sides $AB,CD,EF$ respectively. If the area of $\\triangle GHI$ is $225$, what is the area of hexagon $ABCDEF$?\n</problem>\n\n<solution>\nWe begin with a diagram of the given information: [asy]\nsize(4cm);\nreal x=sqrt(3);\npair d=(2,0); pair c=(1,x); pair b=(-1,x); pair a=-d; pair f=-c; pair e=-b;\npair g=(a+b)/2; pair h=(c+d)/2; pair i=(e+f)/2;\ndraw(a--b--c--d--e--f--a);\ndot(a); dot(b); dot(c); dot(d); dot(e); dot(f); dot(g); dot(h); dot(i);\ndraw(g--h--i--g);\nlabel("$A$",a,W);\nlabel("$B$",b,NNW);\nlabel("$C$",c,NNE);\nlabel("$D$",d,E);\nlabel("$E$",e,SSE);\nlabel("$F$",f,SSW);\nlabel("$G$",g,WNW);\nlabel("$H$",h,ENE);\nlabel("$I$",i,S);\n[/asy]\n\nTo increase the symmetry in the diagram, we can draw in the long diagonals of $ABCDEF$ as well as the mirror image of $\\tr

In [15]:
# topics4.add_questions(new_questions["questions"])

In [16]:
prereq_system = f"""\
{SYSTEM_PREFIX}
You'll be given a question with a problem and solution and a list of other questions.
Based on your knowledge of math, you have to decide which question form the list is a prerequisite to the given question.
Just one question. If none are a prerequisite, or if the question is super easy for a highschooler, select 'None'.
"""

In [17]:
prerequisites = json.load(open(prerequisities_file))
prerequisites

{}

In [18]:
used_topics = set()
used_subtopics = set()
used_concepts = set()
used_questions = set()
for topic_idx, topic_id in enumerate(list(topics4.topics.keys())[::-1]):
    topic = topics4.topics[topic_id]
    for subtopic_idx, subtopic_id in enumerate(list(topic.subtopics.keys())[::-1]):
        subtopic = topic.subtopics[subtopic_id]
        for concept_idx, concept_id in enumerate(list(subtopic.concepts.keys())[::-1]):
            concept = subtopic.concepts[concept_id]
            for concept_question in concept.questions.values():
                if concept_question.id in prerequisites:
                    continue
                prereq_qs = [
                    question
                    for question in topics4.questions.values()
                    if question.id != concept_question.id
                    and question.topic not in used_topics
                    and question.subtopic not in used_subtopics
                    and question.concept not in used_concepts
                    and question.id not in used_questions
                ]
                prereq_strs = [
                    f"<question>\n<id>\n{question.id}\n</id>\n{question.problem}\n{question.solution}\n</question>"
                    for question in prereq_qs
                ]
                if not prereq_strs:
                    break
                candidate_questions = "\n\n".join(prereq_strs)
                messages = [
                    user_message(
                        f"<question>\n{concept_question.problem}\n{concept_question.solution}\n</question>\n\n<candidate_questions>\n{candidate_questions}\n</candidate_questions>"
                    )
                ]
                try:
                    prereq_id: str = ask_cld_or_oai(
                        ask_cld=ask_cld,
                        ask_oai=ask_oai,
                        messages=messages,
                        system=prereq_system,
                        model=ModelName.HAIKU,
                        response_model=_t.Literal[
                            "None", *[question.id for question in prereq_qs]  # type: ignore
                        ],
                    )
                    if prereq_id not in ["None", None]:
                        topics4.add_prerequisites(
                            id=concept_question.id, prerequisites=topics4.get(prereq_id)
                        )
                    prerequisites[concept_question.id] = prereq_id
                    with open(prerequisities_file, "w") as f:
                        json.dump(prerequisites, f, indent=2)
                    time.sleep(0.3)
                except Exception as e:
                    print(e)
                    continue
                used_questions.add(concept_question.id)
            used_concepts.add(concept_id)
        used_subtopics.add(subtopic_id)
    used_topics.add(topic_id)

In [20]:
with open(latest_topics_file, "w") as f:
    json.dump(topics4.model_dump(), f, indent=2)

In [24]:
topics4.get("Functions And Mappings_Composite And Inverse Functions_Inverse Functions_2").model_dump()

{'problem': 'If $f(3)=1$ and $f(2x)=2f(x)$ for all $x$, find $f^{-1}(64)$.',
 'solution': 'We are looking for some $x$ such that $f(x)=64$.  We notice that by doubling $x$ we can double $f(x)$ as well and also that $f(3)=1$.\n\nApplying $f(2x)=2f(x)$ repeatedly, we have: \\begin{align*}\nf(3)&=1,\\\\\nf(6)&=2,\\\\\nf(12)&=4,\\\\\nf(24)&=8,\\\\\nf(48)&=16,\\\\\nf(96)&=32,\\\\\nf(192)&=64.\n\\end{align*}So $f^{-1}(64)=\\boxed{192}$.',
 'topic': 'Functions And Mappings',
 'prerequisite_ids': [],
 'postrequisite_ids': ['Trigonometric Functions And Identities_Solving Trigonometric Equations_Using Identities_1',
  'Data Analysis & Representation_Partial Fractions_Fraction Decomposition_1',
  'Trigonometric Functions And Identities_Trigonometric Identities_Basic Identities_1',
  'Quadratic Functions_Quadratic Formula_Application_3',
  'Quadratic Functions_Quadratic Functions Basics_Standard Form_1',
  'Quadratic Functions_Quadratic Formula_Application_4',
  'Functions And Mappings_Composite A