In [None]:
import os
import dotenv
from pathlib import Path

from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_community.document_loaders.text import TextLoader
from langchain_community.document_loaders import (
    WebBaseLoader,
    PyMuPDFLoader,
    Docx2txtLoader
)
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI,  OpenAIEmbeddings
from langchain_anthropic import ChatAnthropic
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

dotenv.load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

OPENAI_API_KEY: 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


In [3]:
doc_path = [
    "docs/test_rag.pdf",
    "docs/test_rag.docx",
]

docs =[]
for doc_file in doc_path:
    file_path = Path(doc_file)
    
    try:
        if doc_file.endswith('.pdf'):
            loader = PyMuPDFLoader(file_path)
        elif doc_file.endswith('.docx'):
            loader = Docx2txtLoader(file_path)
        elif doc_file.endswith('.txt') or doc_file.endswith('.md'):
            loader = TextLoader(file_path)
        else:
            print(f"Unsupported file type: {doc_file}")
            continue
        
        docs.extend(loader.load())
        
    except Exception as e:
        print(f"Error loading {doc_file}: {e}")     

   
# load URLs
url = "https://docs.streamlit.io/develop/quick-reference/release-notes"
try:
    loader = WebBaseLoader(url)
    docs.extend(loader.load())
except Exception as e:
    print(f"Error loading URL {url}: {e}")  

In [4]:
docs

[Document(metadata={'producer': 'Canva', 'creator': 'Canva', 'creationdate': '2025-04-03T14:10:29+00:00', 'source': 'docs\\test_rag.pdf', 'file_path': 'docs\\test_rag.pdf', 'total_pages': 1, 'format': 'PDF 1.4', 'title': 'Letter of Recommendation', 'author': 'Drill Grassman', 'subject': '', 'keywords': 'DAGjlZ_ZSDU,BAGHKYhAayQ,0', 'moddate': '2025-04-03T14:10:29+00:00', 'trapped': '', 'modDate': "D:20250403141029+00'00'", 'creationDate': "D:20250403141029+00'00'", 'page': 0}, page_content='I am writing to give Andrey the highest recommendation. I was lucky enough to work with him at\ndc1ab on several projects. Andrey knows how to dive into a project head-on and competently manage\ndevelopment priorities, not missing a single detail, but at the same time leaving room for creativity for\ncreative team members. He is one of the most hardworking delivery guys I have encountered in my\nentire career, and if the adjective "talented" can be added to "delivery", then this is definitely about\n

In [5]:
# Split the documents into smaller chunks

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    #length_function=len,
)

document_chunks = text_splitter.split_documents(docs)

In [6]:
# embed the documents and load them into a vector store
vector_store = Chroma.from_documents(
    documents=document_chunks,
    embedding=OpenAIEmbeddings(),
    persist_directory="chroma_db",
    collection_name="test_rag_collection"
)

In [12]:
# Method to retrieve relevant documents chaunks based on a conversation fromt the vector store
def _get_content_retriever_chain (vector_store, llm):
        retriver = vector_store.as_retriever()
        prompt = ChatPromptTemplate.from_messages(
            [
                # MessagesPlaceholder for passing conversation history
                MessagesPlaceholder(variable_name="messages"),
                ("system", "You are a helpful assistant that answers questions based on the provided context."),
                ("user", "{input}"),
                ("user", "Given tyhe above conversation, generate a search query to retrieve relevant inforation from documents from the vector store relevent to conversation, focusing on the most recent messages."),                
            ]
        )
        # create_history_aware_retriever - Create a chain that takes conversation history and returns documents.
        # https://api.python.langchain.com/en/latest/chains/langchain.chains.history_aware_retriever.create_history_aware_retriever.html
        retriever_chain = create_history_aware_retriever(llm, retriver, prompt)

        return retriever_chain


In [17]:
def get_conversational_rag_chain(llm):
    retriever_chain = _get_content_retriever_chain(vector_store, llm)

    prompt = ChatPromptTemplate.from_messages([
        ("system", """You are a helpful assistant that answers user questions based on the provided context.
         If content matches use the most relevant information from the documents to answer the user's question. If the content does not match, answer based on your knowledge.\n
         {context}"""),
        MessagesPlaceholder(variable_name="messages"),
        ("user", "{input}"),
    ])

    stuff_documents_chain = create_stuff_documents_chain(llm, prompt)

    return create_retrieval_chain(
        retriever_chain,
        stuff_documents_chain,
        #return_source_documents=True
    )
    


In [23]:
# Augmented Generation

lmm_stream_openai = ChatOpenAI(
    model="o4-mini-2025-04-16", 
    #temperature=0.2,
    streaming=True,
    )

# Uncomment the following lines to use Anthropic's Claude model
# Make sure to install the langchain-anthropic package and set the ANTHROPIC
#------------------
#llm_anthropic = ChatAnthropic(
#    model="claude-2",   
#    temperature=0.2,
#    streaming=True,
#    )

llm_stream = lmm_stream_openai # model selection

#Simulate a conversation with the user including the message history and last question
messages = [
    {"role" : "user", "content": "Hi! How are you doing today?"},
    {"role" : "assisant", "content": "Hi there! I'fine, thanks for asking. How can I help you?"},
    {"role" : "user", "content": "What are the strongset skills Eugeny mentioned about me?"},
]

# transform message history to LangChain format, woht HUma MAessages and AI Messages
messages = [HumanMessage(content=m["content"]) if m["role"] == "user" else AIMessage(content=m["content"]) for m in messages]

# Create the conversational RAG chain
conversation_rag_chain = get_conversational_rag_chain(llm_stream)
respone_message = "*(RAG Response)*\n"
#response streaming
for chunk in conversation_rag_chain.pick("answer").stream({"messages": messages[:-1], "input": messages[-1].content}):
    respone_message += chunk
    print(chunk, end="", flush=True)

# Append the response message to the conversation history
messages.append({"role": "assisant", "content": respone_message})

Evgeny highlights quite a few standout strengths in his letter, but the ones he emphasizes most are:

• Exceptional resilience – you’re “fireproof” against burnout and even spread that energy to the rest of the team.  
• Relentless delivery mindset – you dive in head-on, juggle priorities without missing a detail, love to get hands-on when needed, and invariably drive projects to a happy ending.  
• Deep product and domain knowledge – you make balanced, well-informed decisions.  
• Top-tier facilitation and leadership – you lead with ease and elegance, manage everyone’s expectations by linking business needs to what the team can deliver, and still leave room for creativity.  
• Demigod-level soft skills – you build rapport instantly, motivate people (even if it means a friendly, metaphorical kick in the ass), always with a smile and total professionalism.