# RAG






In [48]:
! pip install cohere -q # we'll get some wikipedia data
! pip install wikipedia -qq
! pip install -qU langchain-text-splitters -qq

from langchain_text_splitters import RecursiveCharacterTextSplitter
import wikipedia
import cohere
co = cohere.ClientV2("") # Get your free API key: https://dashboard.cohere.com/api-keys

In [49]:

article = wikipedia.page('Geoffrey_Hinton')
text = article.content
print(f"The text has roughly {len(text.split())} words.")

The text has roughly 2291 words.


In [92]:
# Create basic configurations to chunk the text
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=196,
    chunk_overlap=50,
    length_function=len,
    is_separator_regex=False,
)

# Split the text into chunks with some overlap
chunks_ = text_splitter.create_documents([text])
chunks = [c.page_content for c in chunks_]
print(f"The text has been broken down in {len(chunks)} chunks.")

The text has been broken down in 115 chunks.


### Embed every text chunk



In [93]:
# Because the texts being embedded are the chunks we are searching over, we set the input type as search_doc
model = "embed-english-v3.0"

def batch_embed(texts, batch_size=96):
    all_embeddings = []
    for i in range(0, len(texts), batch_size):
        batch = texts[i:i+batch_size]
        response = co.embed(
            texts=batch,
            model=model,
            input_type="search_document",
            embedding_types=['float']
        )
        all_embeddings.extend(response.embeddings.float)
    return all_embeddings

embeddings = batch_embed(chunks)
print(f"TOTAL: {len(embeddings)} embeddings.")

TOTAL: 115 embeddings.




We use a python dictionary using `np.array()` to store the embeddings however you can use Pinecone, Zillis or any other offering.

In [94]:

! pip install numpy -qq

In [95]:
import numpy as np
vector_database = {i: np.array(embedding) for i, embedding in enumerate(embeddings)}
# { 0: array([...]), 1: array([...]), 2: array([...]), ..., 10: array([...]) }

In [96]:
query = "What does the E. in Hintons name signify?"


In [101]:
# Because the text being embedded is the search query, we set the input type as search_query
response = co.embed(
    texts=[query],
    model=model,
    input_type="search_query",
    embedding_types=['float']
)
query_embedding = response.embeddings.float[0]
print("query_embedding: ", query_embedding[:10] + ["..."])

query_embedding:  [-0.040527344, 0.056488037, -0.018753052, 0.04434204, 0.01914978, -0.012611389, 0.022644043, -0.07165527, 0.019515991, 0.03640747, '...']


### Retrieve the most relevant chunks from the vector database

We use cosine similarity to find the most similar chunks

In [102]:
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

# Calculate similarity between the user question & each chunk
similarities = [cosine_similarity(query_embedding, chunk) for chunk in embeddings]
print("similarity scores: ", similarities)

# Get indices of the top 10 most similar chunks
sorted_indices = np.argsort(similarities)[::-1]

# Keep only the top 10 indices
top_indices = sorted_indices[:10]
print("Here are the indices of the top 10 chunks after retrieval: ", top_indices)

# Retrieve the top 10 most similar chunks
top_chunks_after_retrieval = [chunks[i] for i in top_indices]
print("Here are the top 10 chunks after retrieval: ")
for t in top_chunks_after_retrieval:
    print("== " + t)

similarity scores:  [0.4500334860991959, 0.13559572292793903, 0.4860002246235057, 0.1565135127099831, 0.14155310531493967, 0.4034877337600028, 0.2997154100141264, 0.13241436399228365, 0.11363173423445763, 0.40725143036338013, 0.11689773610942873, 0.12668441729257257, 0.43125451565105066, 0.13262501831355825, 0.08763032134075259, 0.05402140099064808, 0.2248387361064034, 0.4973361904507819, 0.15435095968226026, 0.14973796821981455, 0.07770539497942422, 0.14255262560867377, 0.42460580513546087, 0.1519891479701841, 0.0912597925737139, 0.1093380691315491, 0.3664181316009147, 0.3197753831824755, 0.34015310494593787, 0.3402271859804888, 0.06421820357873181, 0.4199617384515595, 0.1000726099767655, 0.3991104194983768, 0.35524473922358685, 0.30598889390294143, 0.13691185057723185, 0.060737921016011706, 0.0988810467290436, 0.42483969864347454, 0.2863442906978318, 0.30432680684362956, 0.11566861353297109, 0.36414068929569354, 0.29126973095529085, 0.1303286469472496, 0.07871657879218447, 0.06318730

## Rerank the chunks retrieved from the vector database



In [103]:
response = co.rerank(
    query=query,
    documents=top_chunks_after_retrieval,
    top_n=3,
    model="rerank-english-v3.0",
)

top_chunks_after_rerank = [top_chunks_after_retrieval[result.index] for result in response.results]

print("Here are the top 3 chunks after rerank: ")
for t in top_chunks_after_rerank:
    print("== " + t)

Here are the top 3 chunks after rerank: 
== Hinton's father was the entomologist Howard Hinton. His middle name comes from another relative, George Everest, the Surveyor General of India after whom the mountain is named. He is the nephew
== Hinton is the great-great-grandson of the mathematician and educator Mary Everest Boole and her husband, the logician George Boole. George Boole's work eventually became one of the foundations of
== Hinton is University Professor Emeritus at the University of Toronto. From 2013 to 2023, he divided his time working for Google (Google Brain) and the University of Toronto, before publicly


In [100]:
# preamble containing instructions about the task and the desired style for the output.
preamble = """
## Task & Context
You help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging.

## Style Guide
Unless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling.
"""

In [104]:
# retrieved documents
documents = [
    {"data": {"title": "chunk 0", "snippet": top_chunks_after_rerank[0]}},
    {"data": {"title": "chunk 1", "snippet": top_chunks_after_rerank[1]}},
    {"data": {"title": "chunk 2", "snippet": top_chunks_after_rerank[2]}},
  ]

# get model response
response = co.chat(
  model="command-r-08-2024",
  messages=[{"role" : "system", "content" : preamble},
            {"role" : "user", "content" : query}],
  documents=documents,
  temperature=0.3
)

print("Final answer:")
print(response.message.content[0].text)

Final answer:
Assuming you are referring to Hinton's surname, the 'E' stands for Everest, after George Everest, the Surveyor General of India.

Hinton is the great-great-grandson of the mathematician and educator Mary Everest Boole and her husband, the logician George Boole.
