## RAG with Ollama 

In [1]:
from langchain.document_loaders.pdf import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema.document import Document
from langchain.vectorstores.chroma import Chroma
from langchain_community.embeddings.ollama import OllamaEmbeddings

from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever


In [2]:
#create a path for the data 
path="skincare_data/"


### Load all pdf files and put it into something like one document:

In [3]:
def load_documents():
    """
    Load PDF documents from a specified directory.

    Returns:
    list of Document: A list of documents loaded from the PDF files in the directory.
    """
    
    # Create an instance of PyPDFDirectoryLoader to load PDF files from the specified directory.
    # 'path' specifies the directory where PDF files are located.
    # 'glob="*.pdf"' ensures that only files with a .pdf extension are loaded.
    loader = PyPDFDirectoryLoader(path, glob="*.pdf")
    
    # Use the loader to load all PDF documents from the specified directory.
    # The 'load' method returns a list of Document objects.
    documents = loader.load()
    
    # Return the list of loaded documents.
    return documents


In [4]:
documents = load_documents()
print(f"Number of documents loaded: {len(documents)}")

Number of documents loaded: 40


In [5]:
# Preview first page
documents[0].page_content

'SKIN CARE & BEAUTY PRODUCTS(/BEAUTY-TOOLBOX/ALL-ARTICLES/)\n6 Skin-Care Ingredient Combinations\nThat Don’t Mix\nSome topicals inactivate when combined, destabilize a critical ingredient, or boost skin irritation. Here’s\nhow to build a smart skin-care routine.\nByJessica Migala (/authors/jessica-migala/)\nbyJacquelyn Dosal, MD (/authors/jacquelyn-dosal/)\nUpdated on November 21, 2023Medically Reviewed\nIn some cases, layering products can result in the opposite of your desired e\x00ect.\niStock\nLet’s lay out a couple of scenarios: One, you walk into a Sephora or Ulta and buy a new product,\nexcited to add it to your routine based on the promises on the bottle. Two, you see someone on\nInstagram talking about how a certain product totally changed their skin, and you order it.\nThese products may be good on their own, but do they \x00t into the routine you have now? Or\ncould they work against you?\nPrivacy - TermsAdvertisementHealth Conditions A-Z(https://www.everydayhealth.com/condi

#### Split document into smaller chunks, to make the text more relevant to a specific query

In [6]:
def split_documents(documents):
    """
    Splits a list of documents into smaller chunks using a text splitter.
    
    Parameters:
    documents (list of str): The list of documents to be split into chunks.
    
    Returns:
    list of str: A list of text chunks resulting from the split operation.
    """
    
    # Create an instance of RecursiveCharacterTextSplitter for splitting documents.
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,          # Size of each chunk in characters
        chunk_overlap=500,        # Number of overlapping characters between chunks
        length_function=len,      # Function to determine the length of the text (using len here)
        is_separator_regex=False  # Indicates that separators (such as spaces or punctuation) should NOT be treated as regular expressions
    )
    
    # Split the documents into chunks and return the result.
    return text_splitter.split_documents(documents)


In [7]:
# Call the split_documents function to divide the list of documents into smaller chunks.
# The 'documents' variable is expected to be a list of text documents that need to be split.
# The result is assigned to the 'chunks' variable, which will contain the list of text chunks.

chunks = split_documents(documents)


In [8]:
print (len(chunks))
print(len(documents))

98
40


## Use ollama Models: 


In [9]:
# First check all ollama models 
!ollama list
#!ollama pull mistral
#!ollama pull nomic-embed-text

NAME                   	ID          	SIZE  	MODIFIED     
mistral:latest         	f974a74358d6	4.1 GB	22 hours ago	
nomic-embed-text:latest	0a109f422b47	274 MB	23 hours ago	
llama3.1:latest        	91ab477bec9d	4.7 GB	24 hours ago	


In [10]:
!pip install chromadb



In [11]:
# Create or update a vector database with embeddings for the provided documents.
# The 'vector_db' will store document embeddings which are useful for similarity searches and retrieval.

vector_db = Chroma.from_documents(
    documents=chunks,  # List of document chunks to be added to the vector database
    embedding=OllamaEmbeddings(model="nomic-embed-text", show_progress=True),  # Embedding model used to convert documents into vector representations
    collection_name="local-rag"  # The name given to the collection within the vector database. This helps organize and identify the specific set of document embeddings stored in the database.
)


OllamaEmbeddings: 100%|██████████| 98/98 [00:09<00:00, 10.56it/s]


In [12]:
# Define the name of the local model to be used for the language model.
local_model = "mistral"  # "mistral" refers to the specific local model (with 7 billion parameters) you want to use.

# Initialize a ChatOllama instance using the specified local model.
# This sets up the large language model (LLM) that will be used for generating responses in a chat-based application.
llm = ChatOllama(model=local_model)  # Create the LLM instance with the "mistral" model.


In [13]:
# Define a prompt template for generating alternative versions of a user's question.
# This is used to enhance document retrieval from a vector database by providing
# multiple variations of the question, thereby improving the chances of retrieving relevant documents.
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""Generate five alternative versions of the user's question to improve 
    document retrieval from a vector database. Separate each version with a newline.
    Original question: {question}"""
)

In [14]:
# Initialize a MultiQueryRetriever instance to generate more than five queries
# The retriever uses a large language model (LLM) for generating queries.
# It is configured with a vector database retriever and a custom prompt template (QUERY_PROMPT).
retriever = MultiQueryRetriever.from_llm(
    vector_db.as_retriever(), 
    llm,
    prompt=QUERY_PROMPT
)

# RAG prompt
template = """Answer the question based ONLY on the following context:
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

In [15]:
# Create a processing chain to handle a question-answering task using a sequence of operations.

chain = (
    # Step 1: Retrieve the relevant context based on the user's question.
    {"context": retriever, "question": RunnablePassthrough()}  # The retriever gets context; question passes through unchanged.

    # Step 2: Use the prompt template to format the context and question into a structured prompt for the LLM.
    | prompt  # The formatted prompt is prepared using the provided context and question.

    # Step 3: Pass the prompt to the large language model (LLM) to generate a response.
    | llm  # The LLM generates an answer based on the prompt.

    # Step 4: Parse the output from the LLM into a string format that can be easily used or displayed.
    | StrOutputParser()  # The output from the LLM is parsed into a final string format.
)

In [16]:
#ask the question
chain.invoke(input(""))


OllamaEmbeddings: 100%|██████████| 1/1 [00:01<00:00,  1.12s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 32.76it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 40.05it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 45.00it/s]
OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 48.79it/s]


" According to the provided documents, it is best to avoid combining vitamin C and retinol at the same time as they may have adverse effects on the skin. Vitamin C functions best in the daytime, protecting against the assaults of the day like UV rays and pollution. On the other hand, retinol should be applied at night because it makes skin more photosensitive, increasing the risk of sun damage. When used together, they may increase the risk of overdrying, irritation, and decreasing the ability to absorb vitamin C due to their high pH levels. It's suggested to use salicylic acid in the morning and retinoid at night to minimize adverse effects."