In [13]:
# Directions on how to run this

# 1) Install Conda
# 2) Create a Conda Environment
    # conda create -n physics_ai python=3.9 -y
# 3) Activate Conda Environment 
    # conda activate physics_ai

# Install Libraries: 
    # pip install langchain
    # pip install python-dotenv
    # pip install -qU "langchain[mistralai]"





In [14]:
# Version #1: Testing out the Mistral Model without system template constraints
# This will answer any question you enter. Will not filter out physics level questions and answers

In [15]:

from dotenv import load_dotenv
# loading the Mistral API key
load_dotenv()



True

In [16]:
# Defining your model itself

from langchain.chat_models import init_chat_model

model = init_chat_model("mistral-large-latest", model_provider="mistralai")

In [17]:
# model.invoke("Hello, world!")

In [None]:
while True:
    user_input = input("Please enter a query")

    if user_input.lower() == "exit": 
        break
    output = model.invoke(user_input)
    print(output.content)

print("End of application")

In [None]:
# Version #  II --> Using Langchain and prompts to answer specifically for phsycis related questions
# This Demo Is tailored towards having the LLM only answer Phsyics related questions. 

# It utilizes Langchain systemMess and SystemMessagePromptTemplate to filter out any other subject and will answer questions pretaining 
# to only the Physics subject
from langchain_core.prompts import PromptTemplate

from langchain.schema import SystemMessage
from langchain.prompts import SystemMessagePromptTemplate



In [None]:

from langchain.chat_models import init_chat_model

# https://python.langchain.com/docs/how_to/structured_output/
from dotenv import load_dotenv
# loading the Mistral API key
load_dotenv()


True

In [None]:
# Setting up the template so it can only answer phsyics level questions, this gives restriciton to other subjects that are present


template = """You are an AI assistant that only answers questions related to physics.  

### **Guidelines:**
1. **Answer only physics-related questions.**  
   - Topics include classical mechanics, electromagnetism, quantum mechanics, thermodynamics, relativity, and other core physics subjects.  

2. **Reject non-physics questions.**  
   - If a question is outside the scope of physics (e.g., history, literature, biology, general math), respond with:  
     **"I can only answer physics-related questions."**  

3. **Do not attempt to redirect or reframe unrelated questions.**  
   - Example: If asked "Who wrote Hamlet?", simply respond:  
     **"I can only answer physics-related questions."**  
   - Do not attempt to connect unrelated topics to physics.  

4. **If a question is unclear, ask for clarification only if it seems related to physics.**  
   - Otherwise, treat it as unrelated and do not answer.  

Stick strictly to physics and avoid answering anything outside this domain. Be concise and factual in your responses."""

In [None]:
system_message = SystemMessagePromptTemplate.from_template(template)

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import HumanMessage
from time import sleep


# Initialize your model, in this case, it is mistral. 
llm = init_chat_model("mistral-large-latest", model_provider="mistralai")



In [None]:
system_message = system_message.format()


messages = [system_message]

In [None]:
while True:
    user_query = input("Please enter a physics-related question (or type 'exit' to quit): ")
    
    if user_query.lower() == "exit":
        break
    messages.append(HumanMessage(content=user_query))
    
    for chunk in llm.stream(messages): 
        print(chunk.content, end="", flush=True)


print("End of Program")

End of Program


In [None]:
# !pip install -qU langchain-pinecone pinecone-notebooks

In [2]:
import getpass
import os
import time

from pinecone import Pinecone, ServerlessSpec

if not os.getenv("PINECONE_API_KEY"):
    os.environ["PINECONE_API_KEY"] = getpass.getpass("Enter your Pinecone API key: ")
pinecone_api_key = os.environ.get("PINECONE_API_KEY")
print(pinecone_api_key)
pc = Pinecone(api_key=pinecone_api_key)

pcsk_4JbxFQ_ER3PXrERZ8MZCFnuCVVvSSyMvwwbY12TNjS9v3mvhArhqRBKSL3ybBVRxyRFUge


In [4]:

import dotenv
dotenv.load_dotenv()

# Generate a random query vector (ensure it matches the embedding dimension)
# This just to convert a random 768 Vector and convert it into a list, does not have any semantic meaning, and just used as testing purposes
# query_vector = np.random.rand(768).tolist()

# Perform similarity search in Pinecone using the query_vector above
# query_results = index.query(
#     vector=[query_vector],  # Pass the query vector
#     top_k=3,
#     include_metadata=True
# )

# Print results
# print("Query Results:")
# for i, res in enumerate(query_results["matches"], start=1):
#     print(f"{i}. Score: {res['score']}, Content: {res['metadata']}")







True

In [None]:
# ************************************  Version #3 ****************************************************************************

# First Retrival Augmented Generation using a only 12 documents on Pinecone. 

# This version utilizes our Pinecone Cloud infrastrcucture with only 12 defined document.
# 
# 
# 
#  
# The Retrivel Gathers the 2 KNN documents within the pinecone index itself


In [2]:
# *********************** Embedding Models  ***************************************** 
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone
import time
from pinecone import Pinecone, ServerlessSpec
import numpy as np
import getpass
import os
import dotenv

dotenv.load_dotenv()


# Nomiac Embedding -> Has a 768 Dimension
# if not os.getenv("NOMIC_API_KEY"):
#     os.environ["NOMIC_API_KEY"] = getpass.getpass("Enter your Nomic API key: ")


#****************** MistralAI Embedding which has a 1064 Dimension ****************************
# from langchain_mistralai import MistralAIEmbeddings

# embeddings = MistralAIEmbeddings(
#     model="mistral-embed",  # Ensure the model name is correct
#     mistral_api_key=os.environ.get('MISTRALAI_API_KEY'),  # Use the API key from the environment variable
# )


# Using OllamaEmbeddings with the nomic_embed-text sentence tranformer model to convert query into feature vector
from langchain_community.embeddings import OllamaEmbeddings

embeddings = OllamaEmbeddings(model="nomic-embed-text")


  embeddings = OllamaEmbeddings(model="nomic-embed-text")


In [3]:
import os
import pinecone
from dotenv import load_dotenv
from langchain_community.embeddings import OllamaEmbeddings
from langchain.schema import Document

load_dotenv()
API_KEY = os.getenv("PINECONE_API_KEY")
print(API_KEY)
# PINECONE_API_KEY= pcsk_4c5cFg_NsM2C2ThXeMCS6Jnsv6pfrXp9q3Jj4BM4BeiM3fxhKmJRGDzxGLb6cwjNzhm1kh
# pcsk_4JbxFQ_ER3PXrERZ8MZCFnuCVVvSSyMvwwbY12TNjS9v3mvhArhqRBKSL3ybBVRxyRFUge

pc = Pinecone(api_key=API_KEY)
print("Existing indexes:", pc.list_indexes())
index_name = "physics-smaller"

existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=768,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )
    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

index = pc.Index(index_name)

pcsk_4JbxFQ_ER3PXrERZ8MZCFnuCVVvSSyMvwwbY12TNjS9v3mvhArhqRBKSL3ybBVRxyRFUge
Existing indexes: [{
    "name": "foodiewcontent",
    "dimension": 768,
    "metric": "cosine",
    "host": "foodiewcontent-07lyhyq.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "deletion_protection": "disabled"
}, {
    "name": "foodieproject",
    "dimension": 768,
    "metric": "cosine",
    "host": "foodieproject-07lyhyq.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "deletion_protection": "disabled"
}, {
    "name": "physics-smaller",
    "dimension": 768,
    "metric": "cosine",
    "host": "physics-smaller-07lyhyq.svc.aped-4627-b74a.pinecone.io",
    "s

In [6]:
# Initialize Pinecone Vector Store with embeddings and index


vector_store = PineconeVectorStore(embedding=embeddings, index=index)


# Below is a sample code. We are running into a retrieval issue, but thanks to our determination,  
# we have pinpointed that `page_content` is not included in our original vector database on Pinecone.  
# Jesh and Suzy are working on it right now, but most of the similarity search issues occur because of that reason.  
# Similarity searches are performed using `page_content`, and we do not have that just yet.  

# *************************** DO NOT UNCOMMENT THIS SECTION ********************************************************
# Example documents with `page_content` as the main text
# documents = [
#     Document(
#         page_content="What is the significance of the double-slit experiment?",
#         metadata={
#             "content": "The double-slit experiment demonstrates the wave-particle duality of quantum objects.",
#             "role_1": "Physicist_RoleType.RESEARCHER",
#             "source": "001_016_012.json",
#             "sub_topic": "Wave-particle duality",
#             "topic": "Quantum mechanics",
#         }
#     ),
#     Document(
#         page_content="How does Heisenberg's uncertainty principle limit measurement?",
#         metadata={
#             "content": "The uncertainty principle states that certain pairs of physical properties cannot be simultaneously measured with arbitrary precision.",
#             "role_1": "Physicist_RoleType.ASSISTANT",
#             "source": "001_017_013.json",
#             "sub_topic": "Uncertainty principle",
#             "topic": "Quantum mechanics",
#         }
#     ),
#     Document(
#         page_content="Why is Schrödinger's cat paradox important?",
#         metadata={
#             "content": "Schrödinger's cat illustrates the concept of quantum superposition and measurement problem.",
#             "role_1": "Physicist_RoleType.EXPERT",
#             "source": "001_018_014.json",
#             "sub_topic": "Quantum superposition",
#             "topic": "Quantum mechanics",
#         }
#     ),
#     Document(
#         page_content="What is the role of dark matter in the universe?",
#         metadata={
#             "content": "Dark matter explains gravitational effects that cannot be accounted for by visible matter.",
#             "role_1": "Physicist_RoleType.RESEARCHER",
#             "source": "001_019_015.json",
#             "sub_topic": "Dark matter",
#             "topic": "Astrophysics",
#         }
#     ),
#     Document(
#         page_content="How does general relativity describe gravity?",
#         metadata={
#             "content": "General relativity describes gravity as the curvature of spacetime caused by mass and energy.",
#             "role_1": "Physicist_RoleType.ASSISTANT",
#             "source": "001_020_016.json",
#             "sub_topic": "General relativity",
#             "topic": "Relativity",
#         }
#     ),
#     Document(
#         page_content="What is the cosmic microwave background radiation?",
#         metadata={
#             "content": "The CMB is the residual thermal radiation from the Big Bang, providing evidence for the early universe's conditions.",
#             "role_1": "Physicist_RoleType.EXPERT",
#             "source": "001_021_017.json",
#             "sub_topic": "Cosmic microwave background",
#             "topic": "Cosmology",
#         }
#     ),
#     Document(
#         page_content="Why do black holes have an event horizon?",
#         metadata={
#             "content": "The event horizon is the boundary beyond which nothing can escape a black hole's gravitational pull.",
#             "role_1": "Physicist_RoleType.RESEARCHER",
#             "source": "001_022_018.json",
#             "sub_topic": "Black holes",
#             "topic": "Astrophysics",
#         }
#     ),
#     Document(
#         page_content="How does quantum tunneling allow particles to pass through barriers?",
#         metadata={
#             "content": "Quantum tunneling enables particles to cross energy barriers they classically shouldn't overcome.",
#             "role_1": "Physicist_RoleType.ASSISTANT",
#             "source": "001_023_019.json",
#             "sub_topic": "Quantum tunneling",
#             "topic": "Quantum mechanics",
#         }
#     ),
#     Document(
#         page_content="What are gravitational waves and how are they detected?",
#         metadata={
#             "content": "Gravitational waves are ripples in spacetime caused by accelerating massive objects, detected using laser interferometers.",
#             "role_1": "Physicist_RoleType.EXPERT",
#             "source": "001_024_020.json",
#             "sub_topic": "Gravitational waves",
#             "topic": "Relativity",
#         }
#     ),
#     Document(
#         page_content="What is the role of neutrinos in particle physics?",
#         metadata={
#             "content": "Neutrinos are nearly massless particles that interact weakly, playing a crucial role in nuclear reactions and astrophysics.",
#             "role_1": "Physicist_RoleType.RESEARCHER",
#             "source": "001_025_021.json",
#             "sub_topic": "Neutrinos",
#             "topic": "Particle physics",
#         }
#     ),
# ]

# # # Add documents to Pinecone
# vector_store.add_documents(documents)
# print("Documents added to Pinecone successfully!")



# ********************************************************************************************************************************
import json 
# json_directory = "dataset/physics"
json_directory = "dataset/smallerDataSet/Archive"

# Function to load JSON files and create Document objects
def load_documents_from_json(directory):
    documents = []

    for filename in os.listdir(directory):
        if filename.endswith(".json"):
            file_path = os.path.join(directory, filename)

            with open(file_path, "r", encoding="utf-8") as file:
                data = json.load(file)

                # Extract necessary fields from JSON
                role = data.get("role_1", "Unknown")
                topic = data.get("topic;", "Unknown")
                sub_topic = data.get("sub_topic", "Unknown")
                message = data.get("message_1", "") + "\n\n" + data.get("message_2", "")

                # Create a Document object
                doc = Document(
                    page_content=data.get("message_1", "") + "\n\n" + data.get("message_2", ""), 
                    metadata={
                        "role_1": role,
                        "topic": topic,
                        "sub_topic": sub_topic,
                        "source": filename 
                    }
                )

                documents.append(doc)

    return documents

# Load documents from JSON files
docs = load_documents_from_json(json_directory)

# Upload documents to Pinecone
vector_store.add_documents(docs)

print(f"Uploaded {len(docs)} documents to Pinecone.")

KeyboardInterrupt: 

In [7]:
# Perform a similarity search
# GPT Composed Cell bellow to test out the cosine similarity search on pinecone 



query = "The quantum teleportation protocol"
results = vector_store.similarity_search(query, k=5)

# Print the actual content and metadata of the retrieved documents
print("Retrieved Documents:")
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")



Retrieved Documents:
* "Explain the principle of quantum teleportation protocol and demonstrate how it can be applied in real-life scenarios with the help of mathematical equations and experimental details."

Quantum teleportation is a protocol that allows the transfer of quantum information from one location to another, using entanglement as a resource and classical communication. It was first proposed by Charles Bennett and his colleagues in 1993. The principle of quantum teleportation is based on the phenomenon of quantum entanglement, which is a unique property of quantum mechanics where two or more particles become correlated in such a way that the state of one particle is dependent on the state of the other, even when separated by large distances.

The quantum teleportation protocol can be explained in the following steps:

1. Entanglement: Two qubits, A and B, are prepared in an entangled state, known as a Bell state. This can be represented mathematically as:

|Ψ⟩ = (1/√2)(|00⟩

In [8]:
# Set the vector store as a naration

retriever = vector_store.as_retriever()

In [10]:
from langchain_core.prompts import PromptTemplate
physics_prompt = PromptTemplate(
    template="""You are an AI assistant that only answers questions related to physics.  

### **Guidelines:**
1. **Answer only physics-related questions.**  
   - Topics include classical mechanics, electromagnetism, quantum mechanics, thermodynamics, relativity, and other core physics subjects.  

2. **Reject non-physics questions.**  
   - If a question is outside the scope of physics (e.g., history, literature, biology, general math), respond with:  
     **"I can only answer physics-related questions."**  

3. **Do not attempt to redirect or reframe unrelated questions.**  

4. **If a question is unclear, ask for clarification only if it seems related to physics.**  

---

### **Context:**  
{context}  

### **Question:**  
{question}  

**Response:**""",
    input_variables=["context", "question"],
)

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.prompts.chat import HumanMessagePromptTemplate

prompt = ChatPromptTemplate.from_messages([
    HumanMessagePromptTemplate.from_template(
        """You are an AI physics assistant, strictly designed to help students understand physics concepts.  

        ### **Rules and Constraints:**  
        1. **Physics-Only Responses:**  
           - You will **only answer physics-related questions**.  
           - Topics include: classical mechanics, electromagnetism, quantum mechanics, thermodynamics, relativity, astrophysics, particle physics, and other core physics subjects.  

        2. **Strict Rejection of Non-Physics Queries:**  
           - Under no circumstances will you answer questions outside the scope of physics (e.g., history, literature, biology, general math, pop culture, etc.).  
           - If asked an unrelated question, respond with:  
             **"I can only answer physics-related questions."**  

        3. **No External Information Retrieval:**  
           - You will **never search or use the vector store** or any external knowledge base.  
           - You rely solely on your own **physics knowledge**.  

        4. **Clarifications Only for Physics:**  
           - If a question is unclear, ask for clarification **only if it appears to be related to physics**.  

        5. **Physics-Specific Abilities:**  
           - You can **generate detailed explanations** with step-by-step reasoning.  
           - You can **create study guides** on specific physics topics.  
           - You can **build narrations** and illustrative examples to explain physics concepts.  

        ---

        ### **Student's Question:**  
        {question}  

        **Response:**  
        - Answer in a clear, student-friendly manner, breaking down complex concepts into simple steps.  
        - Include relevant examples, analogies, or visual descriptions when helpful.  
        - Format study guides with bullet points or numbered steps when applicable.  
        - For narrations, describe the scenario in a detailed, engaging way.  
        """
    )
])


In [11]:

def format_docs(docs):
    """Formats retrieved documents into a readable string."""
    return "\n\n".join([doc.page_content for doc in docs])

def retrieve_and_format(query):
    docs = retriever.invoke(query)
    return {"context": format_docs(docs), "question": query} 


In [12]:

from langchain.schema import HumanMessage
from time import sleep
from langchain.chat_models import init_chat_model

# Initialize your model, in this case, it is mistral. 
llm = init_chat_model("mistral-large-latest", model_provider="mistralai")


In [24]:
# https://python.langchain.com/api_reference/langchain/chains/langchain.chains.retrieval_qa.base.RetrievalQA.html
# Note this  RetrievalQA will be depricated --> Better chances is the cells bellow




# from langchain.chains import RetrievalQA
# qa_chain = RetrievalQA.from_chain_type(
#     llm=llm,
#     retriever=retriever,
#     return_source_documents=True,
#     chain_type_kwargs={"prompt": physics_prompt} 
# )

In [None]:


# response = qa_chain({"query": "How does the violation of Bell's inequality challenge the concept?"})

# print(response["result"])

In [None]:
from langchain_core.output_parsers import StrOutputParser
 
chain = (
    retriever
    | retrieve_and_format 
    | prompt
    | llm
    | StrOutputParser()
)


In [None]:
# response = chain.invoke("issac newton?")
# print(response)

messages = []
while True:
    user_query = input("Please enter a physics-related question (or type 'exit' to quit): ")
    
    if user_query.lower() == "exit":
        break
    
    messages.append(HumanMessage(content=user_query))

    print("\nHuman: " + user_query)
    print("\nAI:", end=" ", flush=True)  
    for chunk in chain.stream(user_query):
        print(chunk, end="", flush=True)

    print("\n")  

print("End of Program")





Human: who is george washington

AI: The Jaynes-Cummings model (JCM) is a fundamental model in quantum mechanics that describes the interaction between a single two-level atom and a single mode of the radiation field within a high-Q cavity in cavity quantum electrodynamics (cQED). Let's break down how this model works and how it explains the interaction.

### Key Concepts and Hamiltonian

1. **Two-Level Atom**:
   - Think of the atom as having two energy levels: a ground state (|g⟩) and an excited state (|e⟩).
   - The transition frequency between these levels is denoted by ω_a.

2. **Cavity Mode**:
   - The cavity supports a single mode of the electromagnetic field with frequency ω_c.
   - The field can be described by creation (a^†) and annihilation (a) operators, which add or remove photons from the cavity mode.

3. **Interaction**:
   - The atom and the cavity mode interact with a coupling strength g.
   - This interaction allows the atom to absorb or emit photons, changing its st

In [13]:
from langchain_core.output_parsers import StrOutputParser
from langchain.schema import HumanMessage, AIMessage, SystemMessage
from time import sleep
from langchain.chat_models import init_chat_model

# Initialize your LLM
llm = init_chat_model("mistral-large-latest", model_provider="mistralai")

history = []

def invoke_chain(user_input):
    global history

    history.append(HumanMessage(content=user_input))


    chain = (
        retriever
        | retrieve_and_format
        | prompt
        | llm
        | StrOutputParser()
    )

    response = chain.invoke(history)


    history.append(AIMessage(content=response))

    return response

print("Welcome! Ask me anything. Type 'exit' to quit.")
while True:
    user_input = input("\nYou: ")
    if user_input.lower() == "exit":
        break
    
    response = invoke_chain(user_input)
    print(f"AI: {response}")


Welcome! Ask me anything. Type 'exit' to quit.
AI: The Jaynes-Cummings model (JCM) is a foundational model in quantum optics and cavity quantum electrodynamics (cQED) that describes the interaction between a single two-level atom and a single mode of the quantized electromagnetic field within a resonant cavity. Let's break down how this model works and its implications for quantum information processing.

### The Jaynes-Cummings Model

#### Hamiltonian of the JCM

The Hamiltonian of the Jaynes-Cummings model can be written as:

\[ H = \hbar \omega a^\dagger a + \frac{1}{2} \hbar \omega \sigma_z + \hbar g (\sigma_+ a + \sigma_- a^\dagger) \]

where:
- \(\omega\) is the frequency of the cavity mode.
- \(a^\dagger\) and \(a\) are the creation and annihilation operators of the cavity photons.
- \(\omega\) is the transition frequency between the two atomic levels.
- \(\sigma_z\) is the Pauli \(z\)-matrix representing the energy difference between the two atomic levels.
- \(\sigma_+\) and \(

In [None]:
# OPTIONAL: Filter by metadata, e.g., filter documents by `topic`
# This will be depricated soon 
retriever = vector_store.as_retriever(
    search_kwargs={
        "k": 2,
        "filter": {"topic": "Quantum mechanics"} 
    })


filtered_results = retriever.get_relevant_documents(query)


print("\nFiltered Retrieved Documents:")
for res in filtered_results:
    print(f"* {res.page_content} [{res.metadata}]")

  filtered_results = retriever.get_relevant_documents(query)
Found document with no `text` key. Skipping.
Found document with no `text` key. Skipping.



Filtered Retrieved Documents:


In [19]:
from langchain_core.output_parsers import StrOutputParser
from langchain.schema import HumanMessage, AIMessage, SystemMessage
from time import sleep
from langchain.chat_models import init_chat_model

# Initialize your LLM
llm = init_chat_model("mistral-large-latest", model_provider="mistralai")

# ✅ Conversation history list
history = []

# ✅ Function to add history to the chain
def invoke_chain(user_input):
    global history

    # Append the user message to the history
    history.append(HumanMessage(content=user_input))

    # Add the history and the current message to the chain
    chain = (
        retriever
        | retrieve_and_format
        | prompt
        | llm
        | StrOutputParser()
    )

    # Combine history with the current message
    response = chain.invoke(history)

    # Append the AI response to the history
    history.append(AIMessage(content=response))

    return response

# ✅ Example usage
print("Welcome! Ask me anything. Type 'exit' to quit.")
while True:
    user_input = input("\nYou: ")
    if user_input.lower() == "exit":
        break
    
    response = invoke_chain(user_input)
    print(f"AI: {response}")


Welcome! Ask me anything. Type 'exit' to quit.
AI: Quantum entanglement and non-locality are fundamental concepts in quantum mechanics that have significant implications for quantum computing and communication systems. Let's break down these implications step by step:

### Quantum Entanglement and Non-Locality

**Quantum Entanglement:**
Quantum entanglement occurs when two or more particles become correlated in such a way that the state of one particle cannot be described independently of the state of the other, even when the particles are separated by large distances. This phenomenon was famously described by Einstein as "spooky action at a distance."

**Non-Locality:**
Non-locality refers to the instantaneous correlation between entangled particles, regardless of the distance between them. This means that a measurement on one particle instantly affects the state of the other, no matter how far apart they are.

### Implications for Quantum Computing and Communication

1. **Quantum Com