In [2]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
import os
from dotenv import load_dotenv
from langchain_community.document_loaders import DirectoryLoader,TextLoader
import tiktoken
from langchain_openai import OpenAIEmbeddings,ChatOpenAI
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
import gradio as gr
from langchain.memory import ConversationBufferMemory #to save single conversation history
from langchain.chains import ConversationalRetrievalChain # which connects llm rag ,he core of RAG, retrieves documents and answers questions conversationally.




In [3]:
load_dotenv(override=True)

True

In [4]:
def load_documents(root_path:str):
    """
    Loads all markdown(.md) Files from a directory
    """
    loader = DirectoryLoader(
        path= root_path,
        glob="**/*.md", # go into all folders and fetch .md file 
        loader_cls=TextLoader, #used textloader for each .md file
        loader_kwargs={
         "encoding":"utf-8",
         "autodetect_encoding":True
        }
    )
    try:
        docs =loader.load()

        print( f"loaded {len(docs)} documents from directory {root_path}")
        return docs
    
    except Exception as e:
        print(f'error loading documents {e}')
        # return []



In [5]:
path =r"C:\Users\Mohamed Arshad\Downloads\My_RAG_Lab\llm_engineering\RAG\knowledge-base"

documents =load_documents(root_path=path)

loaded 76 documents from directory C:\Users\Mohamed Arshad\Downloads\My_RAG_Lab\llm_engineering\RAG\knowledge-base


In [6]:
documents[75]

Document(metadata={'source': 'C:\\Users\\Mohamed Arshad\\Downloads\\My_RAG_Lab\\llm_engineering\\RAG\\knowledge-base\\products\\Rellm.md'}, page_content="# Product Summary\n\n# Rellm: AI-Powered Enterprise Reinsurance Solution\n\n## Summary\n\nRellm is an innovative enterprise reinsurance product developed by Insurellm, designed to transform the way reinsurance companies operate. Harnessing the power of artificial intelligence, Rellm offers an advanced platform that redefines risk management, enhances decision-making processes, and optimizes operational efficiencies within the reinsurance industry. With seamless integrations and robust analytics, Rellm enables insurers to proactively manage their portfolios and respond to market dynamics with agility.\n\n## Features\n\n### AI-Driven Analytics\nRellm utilizes cutting-edge AI algorithms to provide predictive insights into risk exposures, enabling users to forecast trends and make informed decisions. Its real-time data analysis empowers r

In [7]:
def count_tokens(docs):
    """
    Count the no. of. tokens
    """
    enc =tiktoken.get_encoding("cl100k_base")
    total_tokens=0

    for d in docs:
        num_tokens =len(enc.encode(d.page_content))
        total_tokens =total_tokens+num_tokens
    
    print(f"total no of tokens are :{total_tokens}")

In [8]:
total =count_tokens(docs=documents)

total no of tokens are :63715


Divide into chunks

In [9]:
text_splitter =RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
chunks =text_splitter.split_documents(documents=documents)

print(len(chunks))

413


In [10]:
# RAG Chunking Guidelines:
# -------------------------------
# chunk_size: number of tokens per chunk
#   - Increase -> Fewer chunks, more context, better for summarization
#               Higher embedding cost, less precise retrieval
#   - Decrease -> More chunks, less context, better for precise retrieval
#               Lower token cost, may break context

# chunk_overlap: number of tokens to repeat between chunks
#   - Increase -> Preserves context, smoother transitions, higher cost
#   - Decrease -> Less redundancy, cheaper, may break context

In [11]:
openai_api_key =os.getenv('OPENAI_API_KEY')

if openai_api_key:
    print('key exists')
else:
    print('key not found')

key exists


In [12]:
#embeddings =OpenAIEmbeddings()

#Free embeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
db_name ='vector_db'

In [13]:
# If the Chroma database folder exists, wipe its contents before creating a new collection
if os.path.exists(db_name):
    Chroma(persist_directory=db_name,embedding_function=embeddings).delete_collection()

In [14]:
# Create Chroma VectorStore
vector_store =Chroma.from_documents(documents=chunks,embedding=embeddings,persist_directory=db_name)



In [15]:
# check Chroma vector store info
data = vector_store._collection.get(include=["documents", "embeddings"])
num_docs = len(data["documents"])
embedding_dim = len(data["embeddings"][0])

print(f"Vector store has {num_docs} documents")
print(f"Embedding dimension: {embedding_dim}")


Vector store has 413 documents
Embedding dimension: 384


In [16]:
#model
llm=ChatOpenAI(model='gpt-4.1-nano')

In [17]:
# conversation memory for chat
memory = ConversationBufferMemory(memory_key='chat_history',return_messages=True)


  memory = ConversationBufferMemory(memory_key='chat_history',return_messages=True)


In [18]:
retriever =vector_store.as_retriever()

In [19]:
# Putting Together
conversation_chain =ConversationalRetrievalChain.from_llm(llm=llm,retriever=retriever,memory=memory)

In [20]:
def chat(message,history):
    result =conversation_chain.invoke({"question":message})
    return result['answer']

In [21]:
gr.ChatInterface(chat).launch()

  self.chatbot = Chatbot(


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


