##Hide warnings

In [33]:
import warnings 
warnings.filterwarnings("ignore")

##Install required libraries

In [34]:
pip install -r ../requirements.txt 

Note: you may need to restart the kernel to use updated packages.


In [35]:
import sys
import os

# Add the superfolder path to sys.path
sys.path.append(os.path.abspath('..'))

from keys import LANGCHAIN_API_KEY , HUGGINGFACEHUB_API_TOKEN

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = LANGCHAIN_API_KEY
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN

#Simple Quiry-Answer Application

In [36]:
from langchain import PromptTemplate, HuggingFaceHub, LLMChain

template = """Question: {question}

Answer: """

prompt = PromptTemplate(template=template, input_variables=["question"])

In [37]:
llm_chain = LLMChain(prompt=prompt, 
                     llm=HuggingFaceHub(repo_id="meta-llama/Llama-3.2-1B", 
                                        model_kwargs={"Temperature":0, 
                                                      
                                                      "max_length":64}))

##The result of using a text generation model

In [38]:
question="What is the capital of Egypt?"
print(llm_chain.run(question))

Question: What is the capital of Egypt?

Answer:  Cairo


In [39]:
template = """Question: {question}

Answer: """

prompt = PromptTemplate(template=template, input_variables=["question"])
llm_chain2 = LLMChain(prompt=prompt, 
                     llm=HuggingFaceHub(repo_id="google/flan-t5-large", 
                                        model_kwargs={"max_length":64}))

##The result of usinf text2text generation model

In [40]:
question="what is the email of the employee Alice Johnson"
print(llm_chain2.run(question))

HfHubHTTPError: 500 Server Error: Internal Server Error for url: https://api-inference.huggingface.co/models/google/flan-t5-large (Request ID: CthLwpK1eNLK9fPGBIyBD)

In [None]:
question="Who are the leadership of the marketing team?"
print(llm_chain2.run(question))

scott mccullough


#Retrival Augmented Generation (RAG) Application

##step 1: Load local documents

In [None]:
from langchain_community.document_loaders import DirectoryLoader
loader = DirectoryLoader("../data",glob="*.pdf",show_progress=True)
doc = loader.load()
print(len(doc))



Error loading file ..\data\InfraTech_Employee_Information_Report.pdf
100%|██████████| 1/1 [15:13<00:00, 913.10s/it]


ImportError: DLL load failed while importing onnx_cpp2py_export: A dynamic link library (DLL) initialization routine failed.

##step 2: Split large documents into smaller chunks for embedding and retrieval

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=20,
    length_function=len,
    add_start_index=True,
)
chunks = text_splitter.split_documents(doc)
print(f"Split {len(doc)} document(s) into {len(chunks)} chunks.")

##Step 3: Convert document chunks into vector embeddings



##Step 4: Vector Stores: Store the embeddings for efficient retrieval.


In [None]:
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
import shutil

CHROMA_PATH ="chroma"

# Clear out the database first if exist.
if os.path.exists(CHROMA_PATH):
    shutil.rmtree(CHROMA_PATH)

# Create a new DB from the documents.
db = Chroma.from_documents(
    chunks, HuggingFaceEmbeddings(), persist_directory=CHROMA_PATH
)
db.persist()
print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}.")

##step 5: Retrievers: Configure a retriever to fetch relevant documents from the vector store.


In [None]:
query_text = "what is the email of the employee Alice Johnson"
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3})

retrieved_docs = retriever.invoke(query_text)
len(retrieved_docs)


##format the retrived chuncks of a document 

In [None]:
from langchain.prompts import ChatPromptTemplate

results = db.similarity_search_with_relevance_scores(query_text, k=3)

PROMPT_TEMPLATE = """
Use the following retrieved context to answer the query:

{context}

Query: {query}
{answer}
"""

context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
prompt_format = prompt_template.format(context=context_text, query=query_text, answer="")
print(prompt_format)

##step 6: LLM Integration: integerate the retrived chuncks in the LLM query

In [None]:
# Create the LLM chain
# Initialize the LLM from Hugging Face Hub
llm_chain3 = LLMChain(prompt=prompt_template, 
                      llm=HuggingFaceHub(
                        repo_id="google/flan-t5-large",
                        model_kwargs={"max_length": 64, "temperature": 0.5})  
                      )

# Run the chain with context and query
response_text = llm_chain3.run({"context": context_text, "query": query_text, "answer":""})

# Extract sources from metadata
sources = [doc.metadata.get("source", None) for doc in retrieved_docs]

# Format the response
formatted_response = f"Response: {response_text}\nSources: {sources}"
prompt_format = prompt_template.format(context=context_text, query=query_text, answer=formatted_response)
print(prompt_format)
#print(formatted_response)


#Combine the RAG components in a function for easy testing

In [None]:
def RAG_query(query_text):

    #Retrieve documents
    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3})
    retrieved_docs = retriever.get_relevant_documents(query_text)

    #Prepare context
    context_text = "\n\n---\n\n".join([doc.page_content for doc in retrieved_docs])

    #Define prompt template
    PROMPT_TEMPLATE = """
    Use the following retrieved context to answer the query:

    {context}

    Query: {query}
    Answer:
    """

    #Create the prompt template
    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)

    #Initialize the LLM
    llm_chain = LLMChain(
        prompt=prompt_template,
        llm=HuggingFaceHub(
            repo_id="google/flan-t5-large",
            model_kwargs={"max_length": 64, "temperature": 0}
        )
    )

    #Run the LLM chain with context and query
    response_text = llm_chain.run({"context": context_text, "query": query_text})

    #Extract the most relvent source
    sources = retrieved_docs[0].metadata.get("source", "Unknown") if retrieved_docs else "No sources available"

    #Format the final response
    formatted_response = f"Response: {response_text}\nSources: {sources}"

    return formatted_response


#Test the function

In [None]:
query_text = "Who are the leadership of the marketing team?"
response = RAG_query(query_text)
print(response)