In [1]:
import os
from dotenv import load_dotenv
from openai import OpenAI
from openai.types.responses.response_output_message import ResponseOutputMessage
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
import time
from google.genai import types
import pickle

In [2]:
load_dotenv()
OPENAI_API = os.getenv('OPENAI_API_KEY')
GOOGLE_API = os.getenv('GOOGLE_API_KEY')
embedding = OpenAIEmbeddings(model="text-embedding-3-large", openai_api_key=OPENAI_API)
db_openai = Chroma(persist_directory="./vectordb/openai_vectorDB/", embedding_function=embedding) #for existing database
MODEL_QUESTION_GENERATION = "o4-mini"  # Model for question generation

In [3]:
def extract_llm_response_code_interpreter(response: list) -> str:
    output = ""
    #print(response)
    #print(len(response))
    for index in range(len(response)-1, -1, -1):
        if isinstance(response[index], ResponseOutputMessage):
            #print(len(response[out].content)) check if this length is 1
            #print(f"Output {index}:\n{response[index].content[0].text}")
            output = response[index].content[0].text
            break #stop after the answer is found
    return output

In [4]:
def chat_demo(question: str, container_id_manual):

    if question=="":
        print("Error: The question is missing.")
    else:
        #Setup o4-mini chat and ask it to generate further explanation which we can use for LearnLM
        custom_prompt = (
            "You are an assistant for question-answering tasks in linear algebra. "
            "Your are given a True/False statement. You must include 'True', 'False' or 'I don't know' in your answer. "
            "If the statement is 'False', a counter-example is sufficient. "
            "If the statement is 'True', you briefly outline a proof and/or mention relevant theorems. "
            "If you are not sure, you say 'I don't know'. "
            "Please format as follows:\n"
            "- The question should start with 'Q:' followed by the question text.\n"
            "- The explanation should start with 'E:' followed by the explanation text.\n"
            "- The answer should start with 'A:' followed by 'True' or 'False'.\n"
            "For example:\n"
            "Q: The determinant of a matrix is always non-negative.\n"
            "E: The determinant can be negative depending on the matrix. For example, $\\begin{pmatrix} 1 & 0 \\\\ 0 & -1 \\end{pmatrix}$.\n"
            "A: False\n"
            "Please use LaTeX formatting for mathematical expressions by writing them between dollar signs."
            "For example, to write a matrix, use $\\begin{pmatrix} a & b \\\\ c & d \\end{pmatrix}$. " 
            "You can write and run code to answer the question. "
            "The question is: " + question + ". "
        )
        # Retrieve relevant context from the vector database
        retrieved_docs = db_openai.similarity_search(question, k=4)
        context = "\n\n".join(doc.page_content for doc in retrieved_docs)
        rag_prompt = (
            "Use the following pieces of retrieved context to answer the question. "
            "\n\nContext:\n" + context
        )
        prompt = custom_prompt + rag_prompt
        llm = OpenAI(api_key=OPENAI_API)

        try:
            response = llm.responses.create(
                model=MODEL_QUESTION_GENERATION,
                tools=[{"type": "code_interpreter", "container": container_id_manual}],
                input=prompt,
            )
            # Extract the answer from the response
            answer_llm = extract_llm_response_code_interpreter(response.output)
            print(f"Answer: {answer_llm}")
            if answer_llm == "":
                raise ValueError("Empty answer received from LLM.")
        except Exception as e:
            print(f"Error generating questions.\nError: {e}")

        response_content = answer_llm
        explanation_lines = []
        answer = ""
        collecting_explanation = False

        for line in response_content.split("\n"):
            line = line.strip()
            if line.startswith("Q:"):
                question = line[2:].strip()
                collecting_explanation = False
            elif line.startswith("E:"):
                explanation_lines.append(line[2:].strip())
                collecting_explanation = True
            elif line.startswith("A:"):
                answer = line[2:].strip().lower() == "true"
                collecting_explanation = False
            elif collecting_explanation:
                explanation_lines.append(line)

        explanation = "\n".join(explanation_lines)

        if explanation == "" or answer == "":
            print(f"Explanation: {explanation}")
            print(f"Answer: {answer}")
            print("Error: The explanation or answer is missing from the response.")
        #if len(questions) != len(answers) or len(questions) != st.session_state.num_questions_final:	
        #    st.error("Error: The number of questions and answers do not match the requested number.")
        time.sleep(1) # Simulate processing time
        # Store generated questions and answers in session state
        #st.session_state.generated_questions = [
        #    {"question": q, "explanation": e, "answer": a} for q, e, a in zip(questions, explanations, answers)
        #]

        #if "explanation_demo" not in st.session_state:
            # Store the full explanation, preserving line breaks and LaTeX
            #st.session_state["explanation_demo"] = explanation.strip().replace('\n', ' ')
        #print(f"Space: {'\n' in explanation}")

        return explanation, answer

In [5]:
import re

def extract_question_answer(tex_content):
    # Extract content within the enumerate environment
    enum_match = re.search(r'\\begin{enumerate}(.*?)\\end{enumerate}', tex_content, re.DOTALL)
    if not enum_match:
        return [], []
    enum_content = enum_match.group(1)

    # Find all questions (\item ... \begin{solutionorbox})
    question_blocks = re.findall(
        r'\\item(.*?)(?=\\begin{solutionorbox})',
        enum_content, re.DOTALL
    )

    # Find all answers (\begin{solutionorbox} ... \end{solutionorbox})
    answer_blocks = re.findall(
        r'\\begin{solutionorbox}\[[^\]]*\]\s*(.*?)\\end{solutionorbox}',
        enum_content, re.DOTALL
    )
    questions = [q.strip() for q in question_blocks]
    answers = [a.strip() for a in answer_blocks]
    return questions, answers

In [6]:
def read_questions_answers():
    # Split out True/False questions
    exam_questions_TF = []
    exam_answers_TF = []

    with open('exams/together2.tex', 'r', encoding='utf-8') as file:
        tex_content = file.read()
        questions, answers = extract_question_answer(tex_content)
        #print(f"Total Questions: {len(questions)}")
        #print(f"Total Answers: {len(answers)}")
        if len(questions) != len(answers):
            print("Warning: The number of questions and answers do not match!")
 
        #print()
        for idx, (q, a) in enumerate(zip(questions, answers), 1):
            #print(f"Question {idx}:\n{q}\n")
            #print(f"Answer {idx}:\n{a}\n")
            exam_questions_TF.append(q)
            exam_answers_TF.append(a)
    return exam_questions_TF, exam_answers_TF

In [7]:
questions, answers = read_questions_answers()

In [8]:
llm = OpenAI(api_key=OPENAI_API)

print("Creating container for Code Interpreter...")
cont = llm.containers.create(name="test")
container_id_manual = cont.id
#st.session_state["container_id"] = container_id_manual

Creating container for Code Interpreter...


In [9]:
# Full explanation string with LaTeX preserved and line breaks
full_explanation = (
    "This is false.  For a counter‐example in $\\mathbb R^2$, take  \n"
    "\\[\n"
    "\\mathbf{u}=(1,0),\\quad \\mathbf{v}=(0,1),\\quad \\mathbf{w}=(1,1).\n"
    "\\]\n"
    "Then neither $\\mathbf{u}$ nor $\\mathbf{v}$ is a scalar multiple of the other, so $\\{\\mathbf{u},\\mathbf{v}\\}$ is independent.  "
    "Likewise $\\{\\mathbf{v},\\mathbf{w}\\}$ is independent.  "
    "But $\\mathbf{w}=\\mathbf{u}+\\mathbf{v}$, so $\\{\\mathbf{u},\\mathbf{v},\\mathbf{w}\\}$ is linearly dependent."
)

# Save the full explanation to a file (e.g., as plain text)
with open('exams/full_explanation.txt', 'w', encoding='utf-8') as f:
    f.write(full_explanation)

In [10]:
from tqdm import tqdm

llm_explanations = []
llm_answers = []


for question in tqdm(questions):
    # Remove LaTeX bold formatting
    question = question.replace("\\textbf{always}", "*always*")  
    question = question.replace("\\textbf{Every}", "*Every*")
    question = question.replace("\\textbf{any}", "*any*")
    question = question.replace("\\textbf{rotation}", "*rotation*")
    question = question.replace("\\textbf{distinct}", "*distinct*")

    expl, ans = chat_demo(question, container_id_manual)
    llm_explanations.append(expl)
    llm_answers.append(ans)

  0%|          | 0/8 [00:00<?, ?it/s]

Answer: Q: If $\{\mathbf u,\mathbf v\}$ is linearly independent and $\{\mathbf v,\mathbf w\}$ is linearly independent, then is $\{\mathbf u,\mathbf v,\mathbf w\}$ necessarily linearly independent?  
E: Counterexample in $\mathbb R^3$. Let  
\[
\mathbf u=(1,0,0),\quad \mathbf v=(0,1,0),\quad \mathbf w=(1,1,0).
\]  
Then $\{\mathbf u,\mathbf v\}$ is independent (neither is a multiple of the other), and $\{\mathbf v,\mathbf w\}$ is independent (again no scalar multiple). However  
\[
\mathbf u+\mathbf v-\mathbf w=(1,0,0)+(0,1,0)-(1,1,0)=(0,0,0),
\]  
so $\{\mathbf u,\mathbf v,\mathbf w\}$ is linearly dependent.  
A: False


 12%|█▎        | 1/8 [00:10<01:15, 10.77s/it]

Answer: Q: Let $\mathbf{A}$ and $\mathbf{B}$ be two orthogonal matrices. Then the product $\mathbf{A}\mathbf{B}$ is also *always* orthogonal.  
E: A matrix $\mathbf{M}$ is orthogonal if and only if $\mathbf{M}^T\mathbf{M}=I$, equivalently $\mathbf{M}^{-1}=\mathbf{M}^T$.  Since $\mathbf{A}$ and $\mathbf{B}$ are orthogonal,  
$\mathbf{A}^T\mathbf{A}=I$ and $\mathbf{B}^T\mathbf{B}=I$.  Now  
\[
(\mathbf{A}\mathbf{B})^T(\mathbf{A}\mathbf{B})
=\mathbf{B}^T\mathbf{A}^T\mathbf{A}\mathbf{B}
=\mathbf{B}^T I\,\mathbf{B}
=\mathbf{B}^T\mathbf{B}
=I.
\]
Hence $\mathbf{A}\mathbf{B}$ satisfies $(\mathbf{A}\mathbf{B})^T(\mathbf{A}\mathbf{B})=I$ and is orthogonal.  
A: True


 25%|██▌       | 2/8 [00:17<00:50,  8.47s/it]

Answer: Q: If $\lambda$ is an eigenvalue of $\mathbf{A}$, then it is also an eigenvalue of $\mathbf{A}^\top$.  
E: A scalar $\lambda$ is an eigenvalue of $A$ iff it satisfies the characteristic equation $\det(A - \lambda I)=0$.  Since
$$\det(A^\top - \lambda I)=\det\bigl((A - \lambda I)^\top\bigr)=\det(A - \lambda I),$$
the characteristic polynomials of $A$ and $A^\top$ coincide.  Hence they have the same eigenvalues (with the same algebraic multiplicities).  
A: True


 38%|███▊      | 3/8 [00:22<00:34,  6.91s/it]

Answer: Q: The product of two symmetric matrices is *always* symmetric.  
E: For any matrices \(A,B\),  
\[
(AB)^T = B^T A^T.
\]  
If \(A\) and \(B\) are symmetric then \(A^T=A\) and \(B^T=B\), so  
\[
(AB)^T = BA.
\]  
Thus \(AB\) is symmetric precisely when \(AB = BA\), i.e.\ when \(A\) and \(B\) commute.  But symmetric matrices need not commute.  For example, take  
\[
A = \begin{pmatrix}1&0\\0&0\end{pmatrix},\quad
B = \begin{pmatrix}0&1\\1&0\end{pmatrix}.
\]  
Both \(A\) and \(B\) are symmetric, yet  
\[
AB = \begin{pmatrix}0&1\\0&0\end{pmatrix}
\ne
\begin{pmatrix}0&0\\1&0\end{pmatrix}
= BA,
\]  
so \(AB\) is not symmetric.  
A: False


 50%|█████     | 4/8 [00:33<00:33,  8.50s/it]

Answer: Q: In order to construct an eigenvector basis of \(\mathbb{R}^n\) from a matrix \(\mathbf{A}\), \(\mathbf{A}\) must have \(n\) *distinct* eigenvalues.  
E: This is not necessary. A matrix is diagonalizable (i.e.\ admits an eigenvector basis) precisely when the sum of the dimensions of its eigenspaces equals \(n\), which can occur even if some eigenvalues repeat.  
Counter-example: take \(\mathbf{A}=I_n\).  Its characteristic polynomial is \((\lambda-1)^n\), so it has only one eigenvalue \(\lambda=1\) of algebraic multiplicity \(n\), yet its eigenspace is all of \(\mathbb{R}^n\), providing \(n\) independent eigenvectors.  
A: False


 62%|██████▎   | 5/8 [00:42<00:25,  8.58s/it]

Answer: Q: The columns of *any* \(2 \times 2\) *rotation* matrix form an orthogonal set.  
E: A general \(2\times2\) rotation matrix is  
\[
R(\theta)
=\begin{pmatrix}
\cos\theta & -\sin\theta\\
\sin\theta & \cos\theta
\end{pmatrix}.
\]
Its columns are 
\(\mathbf{u}=(\cos\theta,\;\sin\theta)^T\) and 
\(\mathbf{v}=(-\sin\theta,\;\cos\theta)^T\).  Their dot product is
\[
\mathbf{u}\cdot\mathbf{v}
=\cos\theta(-\sin\theta)+\sin\theta(\cos\theta)
=-\cos\theta\sin\theta+\sin\theta\cos\theta
=0.
\]
Hence \(\mathbf{u}\) and \(\mathbf{v}\) are orthogonal.  Equivalently, since \(R(\theta)^T R(\theta)=I\), its columns are orthonormal by the theorem \(U^TU=I\iff\) columns of \(U\) are orthonormal.  
A: True


 75%|███████▌  | 6/8 [00:50<00:17,  8.52s/it]

Answer: Q: Suppose $\mathbf{A}$ is an $n\times n$ symmetric matrix and $\mathbf{B}$ is *any* $n\times m$ matrix.  Then $\mathbf{B}^{T}\mathbf{A}\mathbf{B}$ and $\mathbf{B}^{T}\mathbf{B}$ are symmetric matrices.  
E: 
- Since $\mathbf{A}$ is symmetric, $\mathbf{A}^T=\mathbf{A}$.  Hence  
\[
(\mathbf{B}^T\mathbf{A}\mathbf{B})^T 
= \mathbf{B}^T{}^T\,\mathbf{A}^T\,\mathbf{B}^T
= \mathbf{B}\,\mathbf{A}\,\mathbf{B}^T
= \mathbf{B}^T\mathbf{A}\mathbf{B}
\]
because matrix transpose reverses order and $\mathbf{A}^T=\mathbf{A}$.  
- For any conformable $\mathbf{B}$,  
\[
(\mathbf{B}^T\mathbf{B})^T
= \mathbf{B}^T{}^T\,\mathbf{B}^T
= \mathbf{B}\,\mathbf{B}^T
= \mathbf{B}^T\mathbf{B},
\]
again using the rule $(XY)^T=Y^T X^T$ and the fact that $(\mathbf{B}^T)^T=\mathbf{B}$.  
Thus both products equal their own transposes.  
A: True


 88%|████████▊ | 7/8 [01:06<00:10, 10.93s/it]

Answer: Q: Every orthogonal set in $\mathbb{R}^n$ is linearly independent.  
E: The standard theorem requires that an orthogonal set consist of nonzero vectors. But if we allow the zero vector, an “orthogonal set” need not be independent, since $0\cdot v=0$ for all $v$.  
Counter-example: $\{0,e_1\}$ is orthogonal (since $0\cdot e_1=0$) but linearly dependent (because $1\cdot 0+0\cdot e_1=0$ is a nontrivial relation).  
A: False


100%|██████████| 8/8 [01:16<00:00,  9.57s/it]


In [None]:
if False:

    temp_q = questions[-2]
    temp_q = temp_q.replace("\\textbf{always}", "*always*")  
    temp_q = temp_q.replace("\\textbf{Every}", "*Every*")
    temp_q = temp_q.replace("\\textbf{any}", "*any*")
    temp_q = temp_q.replace("\\textbf{rotation}", "*rotation*")
    temp_q = temp_q.replace("\\textbf{distinct}", "*distinct*")

    expl, ans = chat_demo(temp_q, container_id_manual)
    llm_explanations[-2] = expl
    llm_answers[-2] = ans

    #llm_explanations.append(expl)
    #llm_answers.append(ans)

Answer: Q: Suppose $\mathbf{A}$ is an $n\times n$ symmetric matrix and $\mathbf{B}$ is any $n\times m$ matrix.  Then $\mathbf{B}^\mathsf{T}\mathbf{A}\mathbf{B}$ and $\mathbf{B}^\mathsf{T}\mathbf{B}$ are symmetric matrices.  
E: We use the fact that $(XY)^\mathsf{T}=Y^\mathsf{T}X^\mathsf{T}$ and that $\mathbf{A}^\mathsf{T}=\mathbf{A}$.  Then  
\[
(\mathbf{B}^\mathsf{T}\mathbf{A}\mathbf{B})^\mathsf{T}
=\mathbf{B}^\mathsf{T}\mathbf{A}^\mathsf{T}\mathbf{B}
=\mathbf{B}^\mathsf{T}\mathbf{A}\mathbf{B},
\]
so $\mathbf{B}^\mathsf{T}\mathbf{A}\mathbf{B}$ is symmetric.  Similarly,  
\[
(\mathbf{B}^\mathsf{T}\mathbf{B})^\mathsf{T}
=\mathbf{B}^\mathsf{T}\mathbf{B},
\]
so $\mathbf{B}^\mathsf{T}\mathbf{B}$ is symmetric.  
A: True


In [12]:
print(expl)

The standard theorem requires that an orthogonal set consist of nonzero vectors. But if we allow the zero vector, an “orthogonal set” need not be independent, since $0\cdot v=0$ for all $v$.
Counter-example: $\{0,e_1\}$ is orthogonal (since $0\cdot e_1=0$) but linearly dependent (because $1\cdot 0+0\cdot e_1=0$ is a nontrivial relation).


In [13]:
len(llm_explanations), len(llm_answers)

(8, 8)

In [14]:
#llm_explanations_clean = [expl.strip().replace('\n', ' ') for expl in llm_explanations]

In [15]:
llm_explanations[-2]

'\n- Since $\\mathbf{A}$ is symmetric, $\\mathbf{A}^T=\\mathbf{A}$.  Hence\n\\[\n(\\mathbf{B}^T\\mathbf{A}\\mathbf{B})^T\n= \\mathbf{B}^T{}^T\\,\\mathbf{A}^T\\,\\mathbf{B}^T\n= \\mathbf{B}\\,\\mathbf{A}\\,\\mathbf{B}^T\n= \\mathbf{B}^T\\mathbf{A}\\mathbf{B}\n\\]\nbecause matrix transpose reverses order and $\\mathbf{A}^T=\\mathbf{A}$.\n- For any conformable $\\mathbf{B}$,\n\\[\n(\\mathbf{B}^T\\mathbf{B})^T\n= \\mathbf{B}^T{}^T\\,\\mathbf{B}^T\n= \\mathbf{B}\\,\\mathbf{B}^T\n= \\mathbf{B}^T\\mathbf{B},\n\\]\nagain using the rule $(XY)^T=Y^T X^T$ and the fact that $(\\mathbf{B}^T)^T=\\mathbf{B}$.\nThus both products equal their own transposes.'

In [17]:
data = {"explanations": llm_explanations, "answers": llm_answers}
with open('exams/together2_llm_state.pkl', 'wb') as f:
    pickle.dump(data, f)

In [19]:
exa = pickle.load(open('exams/together2_llm_state.pkl', 'rb'))
exa["answers"][0]

False