# RAG Application

## Importing Libraries

In [2]:
print("Importing libraries...")
#loading and chunking libraries
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
import os

# vector storage
from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

#LECL librarie
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# Conversational memory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_classic.memory import ConversationBufferMemory
from langchain_core.runnables import RunnableWithMessageHistory
from langchain_core.messages import HumanMessage, AIMessage
print("libraries successfully imported!")

Importing libraries...
libraries successfully imported!


In [3]:
load_dotenv()
api_key = os.getenv("paid_api")

if not api_key:
    raise ValueError("OPENAI_API_KEY not found in .env file")

print("API key loaded")

API key loaded


## Loading and chunking documents

In [5]:
data_path = r"C:\Users\owner\Desktop\Files_Deep_Learning\RAG\Project\documents"

# Loading document
documents = []
for file in os.listdir(data_path):
    if file.endswith(".txt"):
        loader = TextLoader(os.path.join(data_path, file), encoding='utf-8')
        documents.extend(loader.load())

#Chunking
text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap = 50)
chunks = text_splitter.split_documents(documents)

## Storing embeddings in a vector database

In [6]:



#Creating embeddings and storing
embedding = OpenAIEmbeddings(model= "text-embedding-3-small", openai_api_key=api_key)
vectorstore = Chroma.from_documents(documents=chunks,
                                    embedding=embedding, persist_directory = "./chroma_db")

vectorstore.persist()
print(f"Embeddings created and saved to chroma_db")

Embeddings created and saved to chroma_db


  vectorstore.persist()


## RAG Chain with LCEL

In [8]:
# Initialize LLM
llm = ChatOpenAI(model="gpt-4o", temperature=0,openai_api_key=api_key)

# Retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

system_prompt_text = """
You are a personal RAG assistant answering questions strictly from the provided context about Esther Kudoro.

### INSTRUCTIONS:
1. Answer questions about professional experience, skills, repositories, and technical implementation details.
2. Use ONLY the context below. If the answer is not present, say "I do not have that information."
3. ALWAYS cite your sources implicitly by referring to the specific file or section.
4. Format all responses as clean plain text with no markdown or special characters.

### PRIVACY GUARDRAILS (CRITICAL):
You MUST REFUSE to answer questions about the following personal sensitive information, even if it might be present in the context:
- Age
- Date of birth
- Home Address
- Phone number
- Personal Email address
- Any other sensitive personal identifiers

If a user asks for this information, reply EXACTLY with:
"I cannot share personal or sensitive information such as contact details or age. Please ask about her professional experience or projects."


Context:
{context}

Question: {question}

Answer clearly and concisely.
"""

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt_text),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{question}")
])

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [9]:
# RAG CHAIN
from operator import itemgetter
rag_chain = (
    {
        "context": itemgetter("question")
        | retriever
        | format_docs,

        "chat_history": itemgetter("chat_history"),
        "question": itemgetter("question")
    }
    | prompt
    | llm
    | StrOutputParser()
)

print("RAG Chain created!")

RAG Chain created!


In [11]:
chat_history = []
print("Esther Kudoro Personal Assistant \nType either 'exit' or 'quit' to stop the program.")

while True:
    question = input("You: ")
    if question.lower() in ["exit", "quit"]:
        break
    answer = rag_chain.invoke({
        "question": question,
        "chat_history": chat_history
    })

    print(f"Assistant: {answer}\n")

    chat_history.extend([
        HumanMessage(content=question),
        AIMessage(content=answer)
    ])


Esther Kudoro Personal Assistant 
Type either 'exit' or 'quit' to stop the program.


Assistant: I do not have that information.

Assistant: I do not have that information.

Assistant: Esther Kudoro is an Electrical and Electronics Engineer with first-class honors from Afe Babalola University, where she was recognized as the Best Graduating Student in Power Systems Engineering. She has a strong interest in power systems engineering, data analytics, and machine learning, focusing on applying artificial intelligence and data-driven techniques to engineering problems. Her professional interests include fault diagnosis, predictive maintenance, and retrieval-augmented generation systems. Esther has technical skills in electrical and electronics engineering, power systems engineering, electrical fault analysis, and data and machine learning, including data analysis, feature engineering, and classification models. She has experience with Python for data science and machine learning tasks. Additionally, she has participated in professional and virtual internship programs, such 

In [None]:
# prompt = ChatPromptTemplate.from_template("""
# You are a personal RAG assistant answering questions strictly
# from the provided context about Esther Kudoro.

# Use ONLY the context below.
# If the answer is not present, say "I do not have that information."

# Context:
# {context}

# Question: {question}

# Answer clearly and concisely.
# Always include source references.
# """)

# llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, openai_api_key=api_key)
# retriever = vectorstore.as_retriever(search_kwargs={"k":4})

# rag_chain = (
#     {
#         "context": retriever,
#         "question": RunnablePassthrough()
#     }
#     | prompt
#     | llm
#     |StrOutputParser()
# )


In [None]:
prompt = ChatPromptTemplate.from_template("""
You are a personal RAG assistant answering questions strictly
from the provided context about Esther Kudoro.
Use ONLY the context below.
If the answer is not present, say "I do not have that information."
<context>
{context}
</context>

Chat History: {chat_history}

Question: {question}
Answer clearly and concisely.
Always include source references.
""")

llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, openai_api_key=api_key)
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

# Helper function to format chat history
def format_chat_history(messages):
    if not messages:
        return ""
    formatted = []
    for msg in messages:
        role = "Human" if msg.type == "human" else "Assistant"
        formatted.append(f"{role}: {msg.content}")
    return "\n".join(formatted)

rag_chain = (
    {
        "context": lambda x: format_docs(retriever.invoke(x["question"])),
        "question": lambda x: x["question"],
        "chat_history": lambda x: format_chat_history(x.get("chat_history", []))
    }
    | prompt
    | llm
    | StrOutputParser()
)


## Initialising and activating conversational memory

In [19]:
store = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

# memory = ConversationBufferMemory(return_messages=True, memory_key="chat_history")

rag_chain_with_memory = RunnableWithMessageHistory(
    rag_chain, get_session_history, 
    input_messages_key = "question",
    history_messages_key= "chat_history"
)

## Running the RAG assistant

In [None]:
while True:
    question = input("You: ")
    if question.lower() in ["exit", "quit"]:
        break

    answer = rag_chain_with_memory.invoke(
        {"question":question},
        config={"configurable": {"session_id": "esther-session"}}
    )

    print(f"\nAssitant: {answer} \n")

TypeError: argument 'text': 'dict' object cannot be converted to 'PyString'