In [10]:
# Enable autoreload of imported modules
%load_ext autoreload
%autoreload 2

from elasticsearch import Elasticsearch
from openai import OpenAI
from dotenv import load_dotenv
from pathlib import Path
import os
import sys
import re

# Add parent of 'physbot' to the path
sys.path.append(os.path.abspath(".."))

from physbot.path_utils import get_project_root

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
# Load environment variables from the project root
# For Jupyter: assume this notebook is in /PhysBot/notebooks
notebook_dir = Path.cwd()
project_root = get_project_root()
load_dotenv(dotenv_path=project_root / ".env")

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# Initialize clients
es = Elasticsearch("http://localhost:9200")
openai_client = OpenAI(api_key=OPENAI_API_KEY)

# Settings
ES_INDEX = "physbot_units"
EMBEDDING_MODEL = "text-embedding-ada-002"
TOP_K = 5

In [16]:
def get_query_embedding(query: str):
    """Embed the user query using OpenAI."""
    response = openai_client.embeddings.create(
        model=EMBEDDING_MODEL,
        input=query
    )
    return response.data[0].embedding

def semantic_search(query_embedding, top_k=TOP_K):
    """Use Elasticsearch to retrieve top-k similar chunks."""
    search_query = {
        "size": top_k,
        "query": {
            "script_score": {
                "query": {"match_all": {}},
                "script": {
                    "source": "cosineSimilarity(params.query_vector, 'embedding') + 1.0",
                    "params": {"query_vector": query_embedding}
                }
            }
        }
    }
    response = es.search(index=ES_INDEX, query=search_query["query"])
    return [hit["_source"] for hit in response["hits"]["hits"]]

def assemble_prompt(chunks, question):
    """Construct an improved prompt using context and metadata."""
    context_blocks = []
    for i, chunk in enumerate(chunks):
        unit = chunk.get("unit", "Unknown")
        section = chunk.get("section", "Unknown")
        content = chunk["content"]
        context_blocks.append(f"[Source {i+1}: Unit {unit}, Section {section}]\n{content}")
    
    context_text = "\n\n".join(context_blocks)

    return f"""You are a physics tutor assistant.

Using the context passages below, answer the following question.
Cite sources in the form [Source 1], [Source 2], etc., based on which passage the information comes from.
When including equations, use LaTeX formatting (e.g., `E=mc^2`) and preserve them inside backticks.

Context:
{context_text}

Question: {question}
Answer:"""

def append_citation_details(answer_text, chunks):
    """
    Replace [Source X] references in the answer with actual source info,
    and append a readable reference block at the end.
    """
    source_map = {}
    for i, chunk in enumerate(chunks):
        source_map[f"Source {i+1}"] = f"Unit {chunk.get('unit', 'Unknown')} – {chunk.get('section', 'Unknown')}"

    # Append a references section
    used_sources = sorted(set(re.findall(r"\[Source \d+\]", answer_text)))
    references = "\n\n**References:**\n"
    for src in used_sources:
        ref = source_map.get(src.replace("[", "").replace("]", ""), "Unknown source")
        references += f"{src}: {ref}\n"

    return answer_text + references

def generate_rag_response(query: str):
    """Perform semantic search and return a response from OpenAI."""
    embedding = get_query_embedding(query)
    top_chunks = semantic_search(embedding)
    prompt = assemble_prompt(top_chunks, query)
    
    completion = openai_client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.3
    )

    response = completion.choices[0].message.content.strip()
    
    answer_with_citations = append_citation_details(response, top_chunks)
    return answer_with_citations



In [19]:
response = generate_rag_response("Walk me through the following problem: A solid disk compresses a spring horizontally, with spring constant of 76 N/m, a displacement 0.56 cm from its equilibrium position. If the disk rolls without slipping after the spring is released, how fast will it roll on a flat surface? The mass of the disk is 19 g.")

print(response)

This is a conservation of energy problem involving the elastic potential energy of the spring and the kinetic energy of the disk, both linear and rotational. Since there is no change in height of any of the objects in our system, gravitational potential energy can be ignored. Therefore, the conservation of energy can be written as `ΔE = ΔUs + ΔKl + ΔKr = 0` [Source 2].

In the initial situation (compressed spring), there is no kinetic energy and in the final situation there is no elastic potential energy since the spring returns to its natural length. We can then write the conservation of energy equation as `-1/2 k xi^2 + 1/2 m vf^2 + 1/2 I ωf^2 = 0` [Source 2].

A solid disk has `I = 1/2 m r^2`, which leads to `-1/2 k xi^2 + 1/2 m vf^2 + 1/2 (1/2 m r^2) ωf^2 = 0` [Source 2].

Since the disk is rolling without slipping, we can also use the fact that `v^2 = ω^2 r^2` to simplify the final term, leading to `-1/2 k xi^2 + 1/2 m vf^2 + 1/2 (1/2 m r^2) (vf^2/r^2) = 0` [Source 2].

Solving th