In [1]:
# imports
from dotenv import load_dotenv

# load in the .env variables
load_dotenv()

True

In [6]:
# imports
from langchain_text_splitters import CharacterTextSplitter

# Read in State of the Union Address File
with open("/Users/homesweethome/workspace/ali_space/AliMirzapour/Medium_RAG_Series/RAG_Docs/2024_state_of_the_union.txt") as f:
    state_of_the_union = f.read()

# Initialize Text Splitter
text_splitter = CharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len
)

# Create Documents (Chunks) From File
texts = text_splitter.create_documents([state_of_the_union])

In [7]:
# imports
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

# Get Embeddings Model
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

# Initialize ChromaDB as Vector Store
vector_store = Chroma(
    collection_name="test_collection",
    embedding_function=embeddings
)

In [8]:
# Save Document Chunks to Vector Store
ids = vector_store.add_documents(texts)

In [9]:
# Query the Vector Store
results = vector_store.similarity_search(
    'Who invaded Ukraine?',
    k=2
)

# Print Resulting Chunks
for res in results:
    print(f"* {res.page_content} [{res.metadata}]\n\n")

* And yes, my purpose tonight is to both wake up this Congress, and alert the American people that this is no ordinary moment either. 

Not since President Lincoln and the Civil War have freedom and democracy been under assault here at home as they are today. 

What makes our moment rare is that freedom and democracy are under attack, both at home and overseas, at the very same time. 

Overseas, Putin of Russia is on the march, invading Ukraine and sowing chaos throughout Europe and beyond. 

If anybody in this room thinks Putin will stop at Ukraine, I assure you, he will not. 

But Ukraine can stop Putin if we stand with Ukraine and provide the weapons it needs to defend itself. That is all Ukraine is asking. They are not asking for American soldiers. 

In fact, there are no American soldiers at war in Ukraine. And I am determined to keep it that way. 

But now assistance for Ukraine is being blocked by those who want us to walk away from our leadership in the world. [{}]


* But now 

**************Naive RAG Flow

In [10]:
# imports
from langchain_openai import ChatOpenAI

# Set Chroma Vector Store as the Retriever
retriever = vector_store.as_retriever()

# Initialize the LLM instance
llm = ChatOpenAI(model="gpt-4o-mini")

In [11]:
# Create Document Parsing Function to String
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


In [12]:
# imports
from langchain_core.prompts import PromptTemplate

# Create the Prompt Template
prompt_template = """Use the context provided to answer 
the user's question below. If you do not know the answer 
based on the context provided, tell the user that you do 
not know the answer to their question based on the context
provided and that you are sorry.

context: {context}

question: {query}

answer: """

# Create Prompt Instance from template
custom_rag_prompt = PromptTemplate.prompt_template(prompt_template)

In [13]:
# imports
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# Create the RAG Chain
rag_chain = (
    {"context": retriever | format_docs, "query": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

# Query the RAG Chain
rag_chain.invoke(
  "According to the 2024 state of the union address, Who invaded Ukraine?"
)

'According to the 2024 State of the Union address, Putin of Russia invaded Ukraine.'

In [14]:
# Get an I don't know from the Model
rag_chain.invoke("What is the purpose of life?")

"I'm sorry, but I do not know the answer to your question based on the context provided."