In [6]:
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
# List of URLs to load documents from
urls = [
    "https://en.wikipedia.org/wiki/Large_language_model",
    "https://en.wikipedia.org/wiki/Retrieval-augmented_generation",
    "https://en.wikipedia.org/wiki/Generative_artificial_intelligence",
]
# Load documents from the URLs
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

In [7]:
# Initialize a text splitter with specified chunk size and overlap
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)
# Split the documents into chunks
doc_splits = text_splitter.split_documents(docs_list)

In [13]:
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_ollama import OllamaEmbeddings

# Create embeddings for documents and store them in a vector store
vectorstore = SKLearnVectorStore.from_documents(
    documents=doc_splits,
    embedding=OllamaEmbeddings(model="llama3.1")
)
retriever = vectorstore.as_retriever(k=4)

In [14]:
from langchain_ollama import ChatOllama
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
# Define the prompt template for the LLM
prompt = PromptTemplate(
    template="""You are an assistant for question-answering tasks.
    Use the following documents to answer the question.
    If you don't know the answer, just say that you don't know.
    Use three sentences maximum and keep the answer concise:
    Question: {question}
    Documents: {documents}
    Answer:
    """,
    input_variables=["question", "documents"],
)

In [15]:
# Initialize the LLM with Llama 3.1 model
llm = ChatOllama(
    model="llama3.1",
    temperature=0,
)

In [16]:
# Create a chain combining the prompt template and LLM
rag_chain = prompt | llm | StrOutputParser()

In [17]:
# Define the RAG application class
class RAGApplication:
    def __init__(self, retriever, rag_chain):
        self.retriever = retriever
        self.rag_chain = rag_chain
    def run(self, question):
        # Retrieve relevant documents
        documents = self.retriever.invoke(question)
        # Extract content from retrieved documents
        doc_texts = "\\n".join([doc.page_content for doc in documents])
        # Get the answer from the language model
        answer = self.rag_chain.invoke({"question": question, "documents": doc_texts})
        return answer

In [27]:
# Initialize the RAG application
rag_application = RAGApplication(retriever, rag_chain)
# Example usage
question = "Tell me how retrevial augmented generation works in an AI model?"
answer = rag_application.run(question)
print("Question:", question)
print("Answer:", answer)

Question: Tell me how retrevial augmented generation works in an AI model?
Answer: In a Retrieval Augmented Generation (RAG) model, retrieval-augmented generation works by encoding a query and documents into vectors, then retrieving the most relevant documents based on similarity. The Language Model (LM) generates an output based on both the query and context included from the retrieved documents. This process allows the LM to use external knowledge without needing to be fine-tuned for specific tasks or tools.


In [28]:
from typing import List

from langchain_core.documents import Document
from langchain_core.runnables import chain


@chain
def retriever(query: str) -> List[Document]:
    docs, scores = zip(*vectorstore.similarity_search_with_score(query))
    for doc, score in zip(docs, scores):
        doc.metadata["score"] = score

    return docs

In [31]:
result = retriever.invoke("dinosaur on a bike in the ocean on the moon")
result

(Document(metadata={'id': '983bf5be-2ed3-4ad3-972e-ab16e0521a6f', 'source': 'https://en.wikipedia.org/wiki/Generative_artificial_intelligence', 'title': 'Generative artificial intelligence - Wikipedia', 'language': 'en', 'score': 0.48687814197667423}, page_content='History\nTimeline\nProgress\nAI winter\nAI boom'),
 Document(metadata={'id': '61cf457f-5f25-4fe3-97ca-715b1251a738', 'source': 'https://en.wikipedia.org/wiki/Large_language_model', 'title': 'Large language model - Wikipedia', 'language': 'en', 'score': 0.5586828623851606}, page_content='43 languages'),
 Document(metadata={'id': '22e14ad3-81d9-4f82-a197-56d8df67e85f', 'source': 'https://en.wikipedia.org/wiki/Generative_artificial_intelligence', 'title': 'Generative artificial intelligence - Wikipedia', 'language': 'en', 'score': 0.5794381751058408}, page_content='Other outlets that have published articles whose content and/or byline have been confirmed or suspected to be created by generative AI models – often with false cont