In [1]:
import json
from collections import OrderedDict, defaultdict
from pathlib import Path
from typing import Union, Literal

import panel as pn
from pydantic import BaseModel, Field
from vu_models import ATTEMPTS, MAX_TOKENS, MODEL, QUESTIONS_PER_FOLDER, create_topic
from vu_models import Question as Question2
from vu_models import Topic as Topic2
from vu_models import Topics as Topics2

from dreamai.ai import ModelName

pn.extension("floatpanel", "gridstack", sizing_mode="stretch_both")  # type: ignore


%load_ext autoreload
%autoreload 2
%reload_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
STR_ATTRS = ["topic", "subtopic", "concept", "question_number"]


class Node(BaseModel):
    topic: str
    prerequisite_ids: list[str] = Field(default_factory=list)
    postrequisite_ids: list[str] = Field(default_factory=list)

    @property
    def id(self) -> str:
        id = ""
        for i, attr in enumerate(STR_ATTRS):
            if hasattr(self, attr):
                if i == 0:
                    id += getattr(self, attr)
                else:
                    id += f"_{getattr(self, attr)}"
        return id

    def __str__(self) -> str:
        node_str = f"<id>\n{self.id}\n</id>"
        for attr in STR_ATTRS:
            if hasattr(self, attr):
                node_str += f"\n\n<{attr}>\n{getattr(self, attr).title()}\n</{attr}>"
        return node_str.strip()

    def add_prerequisite_id(self, id: str):
        if id == self.id:
            return
        prerequisite_ids = set(self.prerequisite_ids)
        prerequisite_ids.add(id)
        self.prerequisite_ids = list(prerequisite_ids)

    def add_postrequisite_id(self, id: str):
        if id == self.id:
            return
        postrequisite_ids = set(self.postrequisite_ids)
        postrequisite_ids.add(id)
        self.postrequisite_ids = list(postrequisite_ids)


class Topic(Node):
    subtopics: dict[str, "Subtopic"] = Field(default_factory=OrderedDict)

    def get(self, id: str) -> Node | None:
        split_id = id.split("_")
        if len(split_id) == 1:
            return self
        elif len(split_id) == 2:
            return self.subtopics[id]
        elif len(split_id) == 3:
            return self.subtopics["_".join(split_id[:2])].concepts[id]
        elif len(split_id) == 4:
            return (
                self.subtopics["_".join(split_id[:2])]
                .concepts["_".join(split_id[:3])]
                .questions[id]
            )
        print(f"ID {id} not found")
        return None

    def add_subtopics(
        self,
        subtopics: Union[list[Union["Subtopic", str]], "Subtopic", str] | None = None,
    ):
        if subtopics is None or subtopics == [] or subtopics == "":
            return
        if not isinstance(subtopics, list):
            subtopics = [subtopics]
        for subtopic in subtopics:
            if isinstance(subtopic, str):
                subtopic = Subtopic(topic=self.topic, subtopic=subtopic)
            subtopic.topic = self.topic
            self.subtopics[subtopic.id] = subtopic

    def add_dependancies(
        self,
        id: str,
        dependancies: list[Node | str] | Node | str,
        mode: Literal["pre", "post"] = "pre",
    ):
        if dependancies is None or dependancies == [] or dependancies == "":
            return
        split_id = id.split("_")
        if split_id[0] != self.id:
            print(f"ID {id} does not match topic {self.id}")
            return
        if not isinstance(dependancies, list):
            dependancies = [dependancies]
        for dependancy in dependancies:
            if isinstance(dependancy, str):
                dependancy_id = dependancy
            else:
                dependancy_id = dependancy.id
            if dependancy_id == id:
                continue
            split_dependancy_id = dependancy_id.split("_")
            for i, splits in enumerate(zip(split_id, split_dependancy_id)):
                _, s2 = splits
                if i == 0:
                    if mode == "pre":
                        self.add_prerequisite_id(s2)
                    elif mode == "post":
                        self.add_postrequisite_id(s2)
                elif i == 1:
                    subtopic_id = "_".join(split_id[:2])
                    subtopic_dependancy_id = "_".join(split_dependancy_id[:2])
                    if mode == "pre":
                        self.subtopics[subtopic_id].add_prerequisite_id(
                            subtopic_dependancy_id
                        )
                    elif mode == "post":
                        self.subtopics[subtopic_id].add_postrequisite_id(
                            subtopic_dependancy_id
                        )
                elif i == 2:
                    subtopic_id = "_".join(split_id[:2])
                    concept_id = "_".join(split_id[:3])
                    concept_dependancy_id = "_".join(split_dependancy_id[:3])
                    if mode == "pre":
                        self.subtopics[subtopic_id].concepts[
                            concept_id
                        ].add_prerequisite_id(concept_dependancy_id)
                    elif mode == "post":
                        self.subtopics[subtopic_id].concepts[
                            concept_id
                        ].add_postrequisite_id(concept_dependancy_id)
                elif i == 3:
                    subtopic_id = "_".join(split_id[:2])
                    concept_id = "_".join(split_id[:3])
                    question_id = "_".join(split_id[:4])
                    question_dependancy_id = "_".join(split_dependancy_id[:4])
                    if mode == "pre":
                        self.subtopics[subtopic_id].concepts[concept_id].questions[
                            question_id
                        ].add_prerequisite_id(question_dependancy_id)
                    elif mode == "post":
                        self.subtopics[subtopic_id].concepts[concept_id].questions[
                            question_id
                        ].add_postrequisite_id(question_dependancy_id)


class Topics(BaseModel):
    topics: dict[str, Topic] = Field(default_factory=OrderedDict)

    def add_topics(
        self,
        topics: Union[list[Union["Topic", str]], "Topic", str] | None = None,
    ):
        if topics is None or topics == [] or topics == "":
            return
        if not isinstance(topics, list):
            topics = [topics]
        for topic in topics:
            if isinstance(topic, str):
                topic = Topic(topic=topic)
            self.topics[topic.id] = topic

    def add_prerequisites(
        self, id: str, prerequisites: list[Node | str] | Node | str | None = None
    ):
        if prerequisites is None or prerequisites == [] or prerequisites == "":
            return
        self.topics[id].add_dependancies(id=id, dependancies=prerequisites, mode="pre")
        if not isinstance(prerequisites, list):
            prerequisites = [prerequisites]
        for prereq in prerequisites:
            if isinstance(prereq, str):
                prereq_id = prereq
            else:
                prereq_id = prereq.id
            if prereq_id == id:
                continue
            self.topics[prereq_id].add_dependancies(
                id=prereq_id, dependancies=id, mode="post"
            )


class Subtopic(Node):
    subtopic: str
    concepts: dict[str, "Concept"] = Field(default_factory=OrderedDict)

    def add_concepts(
        self,
        concepts: Union[list[Union["Concept", str]], "Concept", str] | None = None,
    ):
        if concepts is None or concepts == [] or concepts == "":
            return
        if not isinstance(concepts, list):
            concepts = [concepts]
        for concept in concepts:
            if isinstance(concept, str):
                concept = Concept(
                    topic=self.topic, subtopic=self.subtopic, concept=concept
                )
            concept.topic = self.topic
            concept.subtopic = self.subtopic
            self.concepts[concept.id] = concept


class Concept(Node):
    subtopic: str
    concept: str
    questions: dict[str, "Question"] = Field(default_factory=OrderedDict)

    def add_questions(
        self,
        questions: Union[list["Question"], "Question"] | None = None,
    ):
        if questions is None or questions == []:
            return
        if not isinstance(questions, list):
            questions = [questions]
        for question in questions:
            question.topic = self.topic
            question.subtopic = self.subtopic
            question.concept = self.concept
            question.question_number = len(self.questions) + 1
            self.questions[question.id] = question


class BaseQuestion(BaseModel):
    problem: str
    solution: str

    def problem_solution(self) -> str:
        return f"<problem>\n{self.problem}\n</problem>\n\n<solution>\n{self.solution}\n</solution>"


class Question(Node, BaseQuestion):
    subtopic: str
    concept: str
    question_number: int = 1
    subquestions: list[BaseQuestion] = Field(default_factory=list)

    def __str__(self) -> str:
        return f"""
<id>
{self.id}
</id>

{self.problem_solution()}
""".strip()

In [3]:
DAY = 16
COURSE = "math_102"
created_topics_file = Path(f"{COURSE}_created_topics_may_{DAY}.json")
created_questions_file = Path(f"{COURSE}_created_questions_may_{DAY}.json")
final_topics_file = Path(f"{COURSE}_final_topics_may_{DAY}.json")

In [4]:
old_topics = Topics2(**json.load(open(final_topics_file, "r")))

In [5]:
# 200 dummy questions. cycle through old_topics.groups for values. a group has topic, subtopic, concept
questions = defaultdict(list)
for i in range(600):
    group = old_topics.groups[str(i % len(old_topics.groups))]
    topic = group.topic
    subtopic = group.subtopic
    concept = group.concept
    questions[f"{topic}_{subtopic}_{concept}"].append(
        Question(
            topic=topic,
            subtopic=subtopic,
            concept=concept,
            problem=f"Problem {i}",
            solution=f"Solution {i}",
        )
    )

In [6]:
topics = []
concepts = []
for topic_name, topic in old_topics.topics.items():
    t = Topic(topic=topic_name)
    for subtopic_name, subtopic in topic.subtopics.items():
        s = Subtopic(topic=topic_name, subtopic=subtopic_name)
        for concept_name, concept in subtopic.concepts.items():
            c = Concept(topic=topic_name, subtopic=subtopic_name, concept=concept_name)
            c.add_questions(questions[f"{topic_name}_{subtopic_name}_{concept_name}"])
            s.add_concepts(c)
            concepts.append(c)
        t.add_subtopics(s)
    topics.append(t)

In [None]:
MIN_BUTTON_HEIGHT = 35


def make_button(
    button_id: str, button_name: str, button_args: dict
) -> pn.widgets.Button:
    button = pn.widgets.Button(
        name=button_name,
        button_style=button_args["button_style"],
        button_type=button_args["button_type"],
        sizing_mode="stretch_both",
        min_height=MIN_BUTTON_HEIGHT,
    )
    button.id = button_id  # type: ignore
    return button

In [None]:
# math_102_text = Path(f"{COURSE}.txt").read_text()

In [None]:
# created_topics = [
#     create_topic(topic, model=ModelName.GPT_4, attempts=2)
#     for topic in math_102_text.splitlines()
# ]

In [None]:
# topics = Topics(topics={topic.name: topic for topic in created_topics if topic})

In [None]:
# topics.create_groups(model=ModelName.GPT_4, attempts=ATTEMPTS, max_tokens=MAX_TOKENS)

In [None]:
# with open(created_topics_file, "w") as f:
#     json.dump(topics.model_dump(), f, indent=2)

In [None]:
topics = Topics(**json.load(open(created_topics_file)))

In [None]:
questions_dir = Path(f"{COURSE}_questions")
questions = {
    json.loads(question_file.read_text())["id"]: Question(
        **json.loads(question_file.read_text())
    )
    for folder in questions_dir.iterdir()
    for question_file in list(folder.glob("*.json"))[:QUESTIONS_PER_FOLDER]
}

In [None]:
done_questions = {
    question.id: question for question in questions.values() if question.group_id
}
len(done_questions)

In [None]:
len(questions)

In [None]:
counter = 0
n_questions = len(questions)
for id, question in questions.items():
    print(f"Question {counter + 1}/{n_questions}")
    if counter >= n_questions:
        break
    if question.group_id != "":
        continue
    question.assign_group(topics=topics, model=ModelName.GPT_4)
    question.add_subquestions(model=ModelName.GPT_4)
    questions[id] = question
    counter += 1

In [None]:
with open(final_topics_file, "w") as f:
    json.dump(topics.model_dump(), f, indent=2)

In [None]:
with open(created_questions_file, "w") as f:
    json.dump(
        {id: question.model_dump() for id, question in questions.items()}, f, indent=2
    )

In [None]:
# pdf_file = "/media/hamza/data2/algebra.pdf"
# collection_name = "algebra_collection"

In [None]:
# pdf_collection = pdf_to_collection(
#     pdf_file,
#     collection_name=collection_name,
#     chunk_size=4000,
#     chunk_overlap=200,
#     device="cuda",
#     delete_existing=True,
# )

In [None]:
# pdf_collection = chroma_collection(name=collection_name, delete_existing=False)
# pdf_collection.count()

In [None]:
import pandas as pd

In [None]:
df = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
df