In [5]:
from langchain_community.embeddings.bedrock import BedrockEmbeddings

In [1]:
import os
import shutil
from langchain.document_loaders.pdf import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema.document import Document
from langchain.vectorstores.chroma import Chroma
import hashlib
from langchain.prompts import ChatPromptTemplate
from langchain_community.llms.ollama import Ollama
from langchain_community.embeddings.ollama import OllamaEmbeddings


In [2]:
CHROMA_PATH = "/Users/joudi/Measuring_RAG_Effectiveness/"
DATA_PATH = "/Users/joudi/Measuring_RAG_Effectiveness/data"

def get_embedding_function():
    embeddings = OllamaEmbeddings(model="nomic-embed-text")
    return embeddings

In [4]:
def load_documents():
    document_loader = PyPDFDirectoryLoader(DATA_PATH)
    return document_loader.load()

def split_documents(documents):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=80,
        length_function=len,
        is_separator_regex=False,
    )
    return text_splitter.split_documents(documents)

def calculate_chunk_ids(chunks):
    #last_page_id = None
    #current_chunk_index = 0

    for chunk in chunks:
        source = chunk.metadata.get("source", "unknown_source")
        page = chunk.metadata.get("page", "unknown_page")
        content_hash = hashlib.md5(chunk.page_content.encode("utf-8")).hexdigest()
        
        # Create a unique ID using source, page, and a hash of the content
        chunk_id = f"{source}:{page}:{content_hash}"
        chunk.metadata["id"] = chunk_id

    return chunks

def clear_database():
    if os.path.exists(CHROMA_PATH):
        shutil.rmtree(CHROMA_PATH)

def add_to_chroma(chunks):
    db = Chroma(
        persist_directory=CHROMA_PATH,
        embedding_function=get_embedding_function()
    )
    chunks_with_ids = calculate_chunk_ids(chunks)

    existing_items = db.get(include=[])
    existing_ids = set(existing_items["ids"])
    print(f"Number of existing documents in DB: {len(existing_ids)}")

    new_chunks = [chunk for chunk in chunks_with_ids if chunk.metadata["id"] not in existing_ids]

    if len(new_chunks):
        print(f"üëâ Adding new documents: {len(new_chunks)}")
        new_chunk_ids = [chunk.metadata["id"] for chunk in new_chunks]
        db.add_documents(new_chunks, ids=new_chunk_ids)
        db.persist()
        if new_chunks:
            db.add_documents(new_chunks, ids=new_chunk_ids)
            db.persist()
        print("‚úÖ Documents successfully added and persisted.")
    else:
        print("‚úÖ No new documents to add")

def check_num_in_DB(path):
    try:
        # Load the existing database
        db = Chroma(persist_directory=path, embedding_function=get_embedding_function())
        existing_items = db.get(include=[])  # Fetch existing document metadata (IDs are always included)
        num_existing = len(existing_items["ids"])
        print(f"üìä Number of existing documents in DB: {num_existing}")
        return num_existing
    except Exception as e:
        print(f"‚ùå Error checking existing documents: {e}")
        return 0

In [5]:
documents = load_documents()
chunks = split_documents(documents)
add_to_chroma(chunks)

#check_num_in_DB(CHROMA_PATH)

Number of existing documents in DB: 2226
‚úÖ No new documents to add


In [6]:

PROMPT_TEMPLATE = """
Answer the question based only on the following context:

{context}

---

Answer the question based on the above context: {question}
"""


In [11]:
def query_rag(query_text: str):
    # get the DB.
    embedding_function = get_embedding_function()  
    db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)

    # Search the DB.
    results = db.similarity_search_with_score(query_text, k=5)

    # Build context text from the top results
    context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
    
    # Format the prompt using the context and the query
    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)  
    prompt = prompt_template.format(context=context_text, question=query_text)

    # Generate the model's response
    model = Ollama(model="llama3")  
    response_text = model.invoke(prompt)

    # Get source document IDs
    sources = [doc.metadata.get("id", None) for doc, _score in results]
    
    # Format the response for output
    formatted_response = f"Response: {response_text}\nSources: {sources}"
    
    # Output the response
    print(formatted_response)
    
    return response_text

def query_without_context(query_text: str):
    model = Ollama(model="llama3") 

    # Generate the response from the model, without sending any context
    response_text = model.invoke(query_text)

    return response_text

In [14]:
query_text = "What improvements have cellphones and associated apps brought to society and what serious concerns have arisen with the use of cellphones and apps?"
response_rag = query_rag(query_text)
#print(response_rag)

Response: Based on the provided context, here are some improvements that cellphones and associated apps have brought to society:

* Simplified user interfaces for faster device adoption
* Pre-installed mobile applications (default apps) that provide various features such as messaging, video calls, location services, and fingerprint recognition
* Ability to share devices with family members or others

However, the context also highlights some serious concerns that have arisen with the use of cellphones and apps:

* Lack of understanding among users about installation-time settings and their privacy implications
* Potential ongoing ramifications for user privacy during the entire life-cycle of the device
* Concerns around default apps logging user interactions and uploading data to servers, revealing personal habits and behaviors over time
* Issues with default app descriptions and settings not changing despite updates
* Distrust and tension may arise from various social contexts and nor

In [15]:
response_without = query_without_context(query_text)
print(response_without)

Cellphones and associated apps have brought numerous benefits and improvements to society, but they have also raised several serious concerns. Here are some examples:

**Improvements:**

1. **Global connectivity**: Cellphones have made it possible for people to stay connected with friends, family, and colleagues across the globe.
2. **Information access**: With the internet at their fingertips, people can access vast amounts of information, learn new things, and make informed decisions.
3. **Convenience**: Cellphones have simplified many aspects of daily life, such as banking, shopping, and communication.
4. **Healthcare**: Mobile health (mHealth) apps have enabled remote healthcare services, telemedicine, and patient monitoring.
5. **Education**: Educational apps and online resources have made learning more accessible, engaging, and effective.
6. **Business**: Cellphones have transformed the way businesses operate, with mobile commerce, payment systems, and productivity tools.

**Conc