# Simple RAG

<img src="/home/mauricio/Documents/Projects/RAG-Mastery/Diagrams/indexing_retrieval_generation.png" width="70%">

In [1]:
import sys
import os
import tqdm as notebook_tqdm
from typing import List, Union

# Path to the directory containing config.py
config_path = '/home/mauricio/Documents/Projects/RAG-Mastery'

# Add the directory to sys.path
if config_path not in sys.path:
    sys.path.append(config_path)

# Now you can import the API_KEY from config.py
from config import API_KEY

path_to_docs = "/home/mauricio/Documents/Projects/RAG-Mastery/data"

Import the ChatMistralAI class from langchain. With this, we'll be able to use the AI model from Mistral. In our case, the 7x8b model will be enough for our RAG system. We initialize the model with our API key to access Mistral's services.

In [2]:
from langchain_mistralai.chat_models import ChatMistralAI
def get_llm_model(self):
        return ChatMistralAI(
            model_name="open-mixtral-8x22b", 
            mistral_api_key=self.API_KEY
        )

In the next part, we use an UnstructuredLoader due to the multiple advantages it provides, and later we plan to increase the capabilities of our RAG system with this. The main advantages are that it can handle multiple file formats (.docx, .pdf, .txt) with a single loader and it preserves the document structure. This provides better retention of the original document layout and structure, which can lead to more accurate and context-aware text extraction.


In [3]:
from langchain_unstructured import UnstructuredLoader
from langchain.schema.document import Document

def load_documents(folder_path):
    documents = []
    for file in os.listdir(folder_path):
        if file.endswith('.docx') or file.endswith('.pdf') or file.endswith('.txt'):
            loader = UnstructuredLoader(os.path.join(folder_path, file))
            documents.extend(loader.load())
        print("Document loaded lenght: ", len(documents))
    print("Documents loaded successfully ✅")
    print(documents[0].metadata.get("filename"))
    return documents

The RecursiveCharacterTextSplitter is ideal for documents with multiple images and text because it adaptively splits content using various separators, preserves context through chunk overlap, and maintains the logical flow of diverse document elements. This approach ensures that the relationship between text and image descriptions is retained, while creating manageable chunks for processing. The flexibility in chunk size and overlap allows for fine-tuning to balance context preservation with efficient processing, making it particularly effective for complex documents in a RAG system

In [4]:
from langchain_text_splitters import RecursiveCharacterTextSplitter


def split_documents(documents, chunk_size= 1000, chunk_overlap= 200):
        try:
            text_splitter: RecursiveCharacterTextSplitter = RecursiveCharacterTextSplitter(
                chunk_size=chunk_size,
                chunk_overlap=chunk_overlap,
                length_function=len,
                separators=["\n\n", "\n", " ", ""]
            )
            splits: List[Document] = text_splitter.split_documents(documents)
            print("Split document successfully ✅")
            print("Documents split: ", len(splits))
            return splits
        except Exception as e:
            print(f"Error splitting documents: {e}")
            raise


Now let's create a embeddings function that uses the previous documents and splits and the create a retriever for efficient document search and retrieval. 

The MistralAIEmbeddings is the class provided by langchain that generate the embeddings using the MistralAI API,

Then we import the FAISS is a library for efficient similarity seach and clustering of dense vectors. 

In [5]:
from langchain_mistralai import MistralAIEmbeddings
from langchain_community.vectorstores import FAISS

def embed_documents(splits):
    try:
        embeddings = MistralAIEmbeddings(
            model="mistral-embed",
            mistral_api_key=API_KEY
            )
        vectorstore = FAISS.from_documents(documents=splits, embedding=embeddings)
        retriever = vectorstore.as_retriever(
            search_type="mmr",
            search_kwargs={"k": 6},
        )

        print("Retriever succesfuly created ✅")
        return retriever
    except Exception as e:
        print(f"Error embeding/retrieving documents {e}")
        raise

Finaly we will create a class that performs the RAG by using the retriever we created. This will use the retriever to fetch relevant documents based on a user query. Formatting the retrieved documents and the user query into a structured prompr template.

The last step is to invoke the MistralAI lenguage model to generate a response based on the formatted input.

In [6]:
from langchain.prompts import ChatPromptTemplate

class SimpleRAG:
    def __init__(self, retriever, api_key):
        self.retriever = retriever
        self.API_KEY = api_key
        self.llm = get_llm_model(self)
        self.prompt_template = ChatPromptTemplate.from_messages([
            ("system", "You are an rule assistant for the NFL. Answer the questions based on the context provided. If the answer to the question is not in the context, say \"I don't know\"."),
            ("human", "Context: {context}\n\nQuestion: {question}\nAnswer:")
        ])

    def rag_chain(self, question):
        # Retrieve the relevant documents
        context = self.retriever.get_relevant_documents(question)
        context_str = "\n".join([doc.page_content for doc in context])
        
        # Format the messages
        messages = self.prompt_template.format_messages(
            context=context_str,
            question=question
        )
        
        # Generate the answer
        response = self.llm.invoke(messages)
        
        return response

  

## Use of the chat

In [8]:
import os
import logging
import warnings

# Suppress the specific warning
warnings.filterwarnings("ignore", message="Could not download mistral tokenizer")


# Usage
documents = load_documents(path_to_docs)
splits = split_documents(documents)
retriever = embed_documents(splits)

api_key = "your_mistral_api_key"  # Make sure to replace this with your actual API key
rag = SimpleRAG(retriever, API_KEY)

query = "Explain to me the RUNNING PLAY?"
response = rag.rag_chain(query)
print(response)  



Document loaded lenght:  1964
Documents loaded successfully ✅
2024-nfl-rulebook.pdf
Split document successfully ✅
Documents split:  1995


INFO: HTTP Request: POST https://api.mistral.ai/v1/embeddings "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.mistral.ai/v1/embeddings "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.mistral.ai/v1/embeddings "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.mistral.ai/v1/embeddings "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.mistral.ai/v1/embeddings "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.mistral.ai/v1/embeddings "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.mistral.ai/v1/embeddings "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.mistral.ai/v1/embeddings "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.mistral.ai/v1/embeddings "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.mistral.ai/v1/embeddings "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.mistral.ai/v1/embeddings "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.mistral.ai/v1/embeddings "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.mis

Retriever succesfuly created ✅


INFO: HTTP Request: POST https://api.mistral.ai/v1/embeddings "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"


content="A running play in American football is a type of offensive play where the offense attempts to advance the ball without a forward pass. The play begins in situations such as when a player hands off the ball to a teammate, or when a player takes the ball from the center and runs with it. The context also mentions that a running play begins when there's obvious video evidence that the runner was not out of bounds. However, please note that this is a simplified explanation and the actual rules regarding running plays can be more complex and detailed." response_metadata={'token_usage': {'prompt_tokens': 147, 'total_tokens': 257, 'completion_tokens': 110}, 'model': 'open-mixtral-8x22b', 'finish_reason': 'stop'} id='run-f8f1feda-9a6e-414d-850a-fda5adcef5bf-0' usage_metadata={'input_tokens': 147, 'output_tokens': 110, 'total_tokens': 257}


In [10]:
response.content

"A running play in American football is a type of offensive play where the offense attempts to advance the ball without a forward pass. The play begins in situations such as when a player hands off the ball to a teammate, or when a player takes the ball from the center and runs with it. The context also mentions that a running play begins when there's obvious video evidence that the runner was not out of bounds. However, please note that this is a simplified explanation and the actual rules regarding running plays can be more complex and detailed."

"A running play in American football is a type of offensive play where the offense attempts to advance the ball without a forward pass. The play begins in situations such as when a player hands off the ball to a teammate, or when a player takes the ball from the center and runs with it. The context also mentions that a running play begins when there's obvious video evidence that the runner was not out of bounds. However, please note that this is a simplified explanation and the actual rules regarding running plays can be more complex and detailed."