# Putting all of the Pieces Together

Now that we know how to evaluate semantic similarity using a database, we can proceed to an actual RAG implementation.

In [None]:
! pip install chromadb
! pip install ollama
! pip install nltk

In [46]:
import requests
import chromadb
import numpy as np
import uuid
import time
import nltk

from nltk.tokenize import PunktSentenceTokenizer

from openai import OpenAI
from typing import Dict, Any, List

from api_utils import load_api_params

In [47]:
# Download the punkt tokenizer models that will help us split our text into sentences.
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/dmitrystrakovsky/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [48]:
with open('./heart_attack.txt', 'r', encoding='utf-8') as file:
    text = file.read()

In [50]:
# Break our story up into seperate sentances
tokenizer = PunktSentenceTokenizer()
sentences = tokenizer.tokenize(text)

print(f"Total sentences: {len(sentences)}")
print(sentences[40])

Total sentences: 108
Did the Jabberwock talk to the Vorpal Blade?


## Now that our sentences are split, we can generate embeddings for individual sentences

In [51]:
# Using Nomic model served locally via Ollama for embedding
# Ollama is a friend --> https://ollama.com/
def get_embeddings_from_ollama(text, model="nomic-embed-text"):
    url = "http://localhost:11434/api/embeddings"
    
    payload = {
        "model": model,
        "prompt": text
    }
    
    response = requests.post(url, json=payload)
    return np.array(response.json()["embedding"], dtype=np.float32)

In [52]:
embeddings = []
for sentence in sentences:
    embedding = get_embeddings_from_ollama(sentence)
    embeddings.append(embedding)

## And now, on to packing everything into a database

In [53]:
client = chromadb.PersistentClient(path="./chroma_db")

In [54]:
# Create a unique collection and add the embeddings to it

unique_collection_name = f"document_sentences_{int(time.time())}"

collection = client.get_or_create_collection(
    name=unique_collection_name,
    metadata={"hnsw:space": "cosine"}  # Using cosine similarity
)

# Generate IDs for each sentence
ids = [str(uuid.uuid4()) for _ in embeddings]

collection.add(
    ids=ids,
    embeddings=embeddings,
    documents=sentences
)

## And now ... bringing in the LLM and the full RAG experience with semantic similarity

In [55]:
# Load API parameters and initialize client

SECRETS_PATH = ".secrets.toml"

API_CALL_PARAMS = load_api_params(SECRETS_PATH)
client = OpenAI(
    base_url = API_CALL_PARAMS['API_URL'],
    api_key = API_CALL_PARAMS['API_KEY']
)

In [56]:
def generate_completion(model: str, messages: List[Dict[str, str]]) -> str:
    """Generate LLM output"""
    response = client.chat.completions.create(
        model=model, 
        messages=messages
    )
    return response.choices[0].message.content

In [57]:
USER_PROMPT = """Can you please tell me what is a heart attack."""

In [58]:
query_vector = get_embeddings_from_ollama(USER_PROMPT)

In [59]:
KNOWLEDGE =""

In [62]:
results = collection.query(
    query_embeddings=[query_vector],
    n_results=10
)

print("Query results for:", USER_PROMPT)
for i, (doc, distance) in enumerate(zip(results["documents"][0], results["distances"][0])):
    print(f"Result {i+1}: {doc}")
    KNOWLEDGE += str(doc)+"\n"

Query results for: Can you please tell me what is a heart attack.
Result 1: "This is a Heart Attack!
Result 2: And here, a Heart Attack means war."
Result 3: "I thought a heart attack was a medical condition," Alice replied, confused.
Result 4: We shall launch a Heart Attack immediately!"
Result 5: We will show her why they fear a Heart Attack above all else!"
Result 6: "It's always tea time during a Heart Attack!
Result 7: # Heart Attack: A Wonderland Tale

"Off with her head!"
Result 8: ATTACK!"
Result 9: "That's what they call it when the Heart kingdom launches an offensive."
Result 10: "Well," huffed the Queen of Hearts, "I suppose we could postpone the Heart Attack.


In [63]:
SYSTEM_PROMPT = f"""Answer all user questions to the best of your ability. Use the following text for reference:

{KNOWLEDGE}
"""

print(SYSTEM_PROMPT)

Answer all user questions to the best of your ability. Use the following text for reference:

"This is a Heart Attack!
And here, a Heart Attack means war."
"I thought a heart attack was a medical condition," Alice replied, confused.
We shall launch a Heart Attack immediately!"
We will show her why they fear a Heart Attack above all else!"
"It's always tea time during a Heart Attack!
# Heart Attack: A Wonderland Tale

"Off with her head!"
ATTACK!"
"That's what they call it when the Heart kingdom launches an offensive."
"Well," huffed the Queen of Hearts, "I suppose we could postpone the Heart Attack.
"This is a Heart Attack!
And here, a Heart Attack means war."
"I thought a heart attack was a medical condition," Alice replied, confused.
We shall launch a Heart Attack immediately!"
We will show her why they fear a Heart Attack above all else!"
"This is a Heart Attack!
And here, a Heart Attack means war."
"I thought a heart attack was a medical condition," Alice replied, confused.
We shal

In [64]:
messages = [
    {"role": "system", "content": f"""{SYSTEM_PROMPT}"""},
    {"role": "user", "content":f"""{USER_PROMPT}"""}
]
try:
    model = API_CALL_PARAMS['MODEL']
    LLM_output = generate_completion(model, messages)
except Exception as e:
    raise Exception(f"Error generating completion: {e}")

print(LLM_output)

In the context of the provided text, a "Heart Attack" refers to a war or an offensive launched by the Heart kingdom, not a medical condition. It's a term used by the Queen of Hearts and her kingdom to describe a military attack.

In the real world, however, a heart attack (also known as a myocardial infarction) is a serious medical condition that occurs when the blood flow to the heart is blocked, causing damage to the heart muscle. But in this Wonderland tale, it has a very different meaning!
