# RAG






In [48]:
! pip install cohere -q # we'll get some wikipedia data
! pip install wikipedia -qq
! pip install -qU langchain-text-splitters -qq

from langchain_text_splitters import RecursiveCharacterTextSplitter
import wikipedia
import cohere
co = cohere.ClientV2("") # Get your free API key: https://dashboard.cohere.com/api-keys

In [None]:

article = wikipedia.page('Geoffrey_Hinton')
text = article.content
print(f"The text has roughly {len(text.split())} words.")

In [None]:
# Create basic configurations to chunk the text
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=512,
    chunk_overlap=50,
    length_function=len,
    is_separator_regex=False,
)

# Split the text into chunks with some overlap
chunks_ = text_splitter.create_documents([text])
chunks = [c.page_content for c in chunks_]
print(f"The text has been broken down in {len(chunks)} chunks.")

### Embed every text chunk



In [None]:
# Because the texts being embedded are the chunks we are searching over, we set the input type as search_doc
model = "embed-english-v3.0"

def batch_embed(texts, batch_size=96):
    all_embeddings = []
    for i in range(0, len(texts), batch_size):
        batch = texts[i:i+batch_size]
        response = co.embed(
            texts=batch,
            model=model,
            input_type="search_document",
            embedding_types=['float']
        )
        all_embeddings.extend(response.embeddings.float)
    return all_embeddings

embeddings = batch_embed(chunks)
print(f"TOTAL: {len(embeddings)} embeddings.")



We use a python dictionary using `np.array()` to store the embeddings however you can use Pinecone, Zillis or any other offering.

In [94]:

! pip install numpy -qq

In [95]:
import numpy as np
vector_database = {i: np.array(embedding) for i, embedding in enumerate(embeddings)}
# { 0: array([...]), 1: array([...]), 2: array([...]), ..., 10: array([...]) }

In [96]:
query = "What does the E. in Hintons name signify?"


In [None]:
# Because the text being embedded is the search query, we set the input type as search_query
response = co.embed(
    texts=[query],
    model=model,
    input_type="search_query",
    embedding_types=['float']
)
query_embedding = response.embeddings.float[0]
print("query_embedding: ", query_embedding[:10] + ["..."])

### Retrieve the most relevant chunks from the vector database

We use cosine similarity to find the most similar chunks

In [None]:
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

# Calculate similarity between the user question & each chunk
similarities = [cosine_similarity(query_embedding, chunk) for chunk in embeddings]
print("similarity scores: ", similarities)

# Get indices of the top 10 most similar chunks
sorted_indices = np.argsort(similarities)[::-1]

# Keep only the top 10 indices
top_indices = sorted_indices[:10]
print("Here are the indices of the top 10 chunks after retrieval: ", top_indices)

# Retrieve the top 10 most similar chunks
top_chunks_after_retrieval = [chunks[i] for i in top_indices]
print("Here are the top 10 chunks after retrieval: ")
for t in top_chunks_after_retrieval:
    print("== " + t)

## Rerank the chunks retrieved from the vector database



In [None]:
response = co.rerank(
    query=query,
    documents=top_chunks_after_retrieval,
    top_n=3,
    model="rerank-english-v3.0",
)

top_chunks_after_rerank = [top_chunks_after_retrieval[result.index] for result in response.results]

print("Here are the top 3 chunks after rerank: ")
for t in top_chunks_after_rerank:
    print("== " + t)

In [100]:
# preamble containing instructions about the task and the desired style for the output.
preamble = """
## Task & Context
You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging.

## Style Guide
Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling.
"""

In [None]:
# retrieved documents
documents = [
    {"data": {"title": "chunk 0", "snippet": top_chunks_after_rerank[0]}},
    {"data": {"title": "chunk 1", "snippet": top_chunks_after_rerank[1]}},
    {"data": {"title": "chunk 2", "snippet": top_chunks_after_rerank[2]}},
  ]

# get model response
response = co.chat(
  model="command-r-08-2024",
  messages=[{"role" : "system", "content" : preamble},
            {"role" : "user", "content" : query}],
  documents=documents,
  temperature=0.3
)

print("Final answer:")
print(response.message.content[0].text)