In [1]:
%pip install cohere sentence_transformers

Collecting cohere
  Downloading cohere-5.16.1-py3-none-any.whl.metadata (3.4 kB)
Collecting fastavro<2.0.0,>=1.9.4 (from cohere)
  Downloading fastavro-1.11.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.7 kB)
Collecting httpx-sse==0.4.0 (from cohere)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting types-requests<3.0.0,>=2.0.0 (from cohere)
  Downloading types_requests-2.32.4.20250611-py3-none-any.whl.metadata (2.1 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence_transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence_transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence_transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-many

In [3]:
import cohere
from google.colab import userdata
cohere_api_key = userdata.get('COHERE_API_KEY')
# print(cohere_api_key)
co = cohere.Client(cohere_api_key)



In [10]:
#define the documents
documents = [
    {"text":"Ananya Roy is a 26 year-old unmarried girl"},
    {"text":"Ananya lives in Bengaluru since 2023"},
    {"text":"Ananya works in Deloitte"},
    {"text":"Ananya is a Masters' graduate in Computer Science & Engineering"},
    {"text":"Ananya is the only child of her parents"},
    {"text":"Ananya is an introvert and prefers interacting with a few people. She likes reading books, music, playing the flute, drawing and watching movies"},
    {"text":"Ananya has a research publication"}]


In [11]:
query = "Give me information on Ananya's work"

In [12]:
#rerank the documents
results = co.rerank(
    query=query,
    documents=documents,
    top_n=4,
    model = 'rerank-english-v3.0')
print(results)



In [13]:
#display the reranking results
def return_results(results, documents):
  for idx, result in enumerate(results.results):
    print(f"Rank: {idx+1}")
    print(f"Score: {result.relevance_score}")
    print(f"Document: {documents[result.index]}\n")

print(f"Query: {query}")
return_results(results, documents)

Query: Give me information on Ananya's work
Rank: 1
Score: 0.99823856
Document: {'text': 'Ananya works in Deloitte'}

Rank: 2
Score: 0.9933589
Document: {'text': "Ananya is a Masters' graduate in Computer Science & Engineering"}

Rank: 3
Score: 0.8236123
Document: {'text': 'Ananya is an introvert and prefers interacting with a few people. She likes reading books, music, playing the flute, drawing and watching movies'}

Rank: 4
Score: 0.753648
Document: {'text': 'Ananya Roy is a 26 year-old unmarried girl'}



#RAG with reranking method

### Load and Chunk Documents

First, we define the documents and split them into chunks. For this example, we will use the same documents as before.

In [14]:
# Define the documents
documents = [
    {"text":"Ananya Roy is a 26 year-old unmarried girl"},
    {"text":"Ananya lives in Bengaluru since 2023"},
    {"text":"Ananya works in Deloitte"},
    {"text":"Ananya is a Masters' graduate in Computer Science & Engineering"},
    {"text":"Ananya is the only child of her parents"},
    {"text":"Ananya is an introvert and prefers interacting with a few people. She likes reading books, music, playing the flute, drawing and watching movies"},
    {"text":"Ananya has a research publication"}
]

# In a real RAG application, you would likely load documents from a file or database
# and use a more sophisticated chunking method. For this example, we will treat each document as a chunk.
chunked_documents = documents

### Create Embeddings

Now, we generate embeddings for the document chunks. We will use a pre-trained model from the `sentence-transformers` library for this purpose.

In [15]:
from sentence_transformers import SentenceTransformer

# Initialize the sentence transformer model
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# Create embeddings for each document chunk
document_embeddings = embedding_model.encode([doc["text"] for doc in chunked_documents])

print(f"Shape of document embeddings: {document_embeddings.shape}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Shape of document embeddings: (7, 384)


### Set up Vector Store

We will use ChromaDB as an in-memory vector store to store the document embeddings and their corresponding texts.

In [18]:
import chromadb

# Initialize ChromaDB client
client = chromadb.Client()

# Create a collection
collection = client.create_collection("ananya_documents")

# Add documents and embeddings to the collection
collection.add(
    embeddings=document_embeddings.tolist(), # ChromaDB expects list of lists
    documents=[doc["text"] for doc in chunked_documents],
    ids=[f"doc_{i}" for i in range(len(chunked_documents))] # Generate unique IDs
)

print(f"Number of documents in collection: {collection.count()}")

Number of documents in collection: 7


In [17]:
%pip install chromadb

Collecting chromadb
  Downloading chromadb-1.0.15-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.0 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.2-cp311-cp311-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl.metadata (8.7 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Downloading posthog-5.4.0-py3-none-any.whl.metadata (5.7 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.22.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.6 kB)
Collecting opentelemetry-api>=1.2.0 (from chromadb)
  Downloading opentelemetry_api-1.35.0-py3-none-any.whl.metadata (1.5 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_otlp_proto_grpc-1.35.0-py3-none-any.whl.metadata (2.4 kB)
Collecting opentelemetry-sdk>=1.2.0 (from chromadb)
  Downloading opentelemetry_sdk-1.35.0-py3-none-any.whl.metadata (1.5 k

### Retrieve Relevant Documents

Given a query, we retrieve the most relevant document chunks from the vector store based on similarity to the query embedding.

In [19]:
# Define a query
query = "What is Ananya's profession and education?"

# Create embedding for the query
query_embedding = embedding_model.encode(query)

# Retrieve similar documents from the vector store
retrieved_results = collection.query(
    query_embeddings=[query_embedding.tolist()],
    n_results=5  # Retrieve top 5 results
)

# Display the retrieved documents
print("Retrieved Documents:")
for i, doc in enumerate(retrieved_results['documents'][0]):
    print(f"Rank {i+1}: {doc}")

Retrieved Documents:
Rank 1: Ananya has a research publication
Rank 2: Ananya is a Masters' graduate in Computer Science & Engineering
Rank 3: Ananya is the only child of her parents
Rank 4: Ananya is an introvert and prefers interacting with a few people. She likes reading books, music, playing the flute, drawing and watching movies
Rank 5: Ananya works in Deloitte


### Rerank Retrieved Documents

We will use Cohere's reranking model to reorder the retrieved documents based on their relevance to the query.

In [20]:
# Extract the retrieved documents' text
retrieved_documents_text = [doc for doc in retrieved_results['documents'][0]]

# Rerank the retrieved documents
reranked_results = co.rerank(
    query=query,
    documents=retrieved_documents_text,
    top_n=5,  # Rerank the top 5 retrieved documents
    model='rerank-english-v3.0'
)

# Display the reranking results
print("Reranked Documents:")
for idx, result in enumerate(reranked_results.results):
    print(f"Rank: {idx+1}")
    print(f"Score: {result.relevance_score}")
    print(f"Document: {retrieved_documents_text[result.index]}\n")

Reranked Documents:
Rank: 1
Score: 0.99941385
Document: Ananya is a Masters' graduate in Computer Science & Engineering

Rank: 2
Score: 0.99193794
Document: Ananya works in Deloitte

Rank: 3
Score: 0.761893
Document: Ananya has a research publication

Rank: 4
Score: 0.7554566
Document: Ananya is an introvert and prefers interacting with a few people. She likes reading books, music, playing the flute, drawing and watching movies

Rank: 5
Score: 0.64579034
Document: Ananya is the only child of her parents



### Generate Response

Finally, we use a large language model (Gemini) to generate a response based on the reranked documents and the original query.

In [23]:
import google.generativeai as genai
from google.colab import userdata

# Access your Gemini API key
try:
    gemini_api_key = userdata.get('GOOGLE_API_KEY')
    genai.configure(api_key=gemini_api_key)
except userdata.SecretNotFoundError:
    print("Gemini API key not found. Please add it to Colab Secrets with the name 'GOOGLE_API_KEY'")
    gemini_api_key = None

if gemini_api_key:
    # Initialize the Gemini model
    gemini_model = genai.GenerativeModel('gemini-2.5-pro')

    # Prepare the context from the reranked documents
    context = "\n".join([retrieved_documents_text[result.index] for result in reranked_results.results])

    # Create the prompt for the Gemini model
    prompt = f"Based on the following information:\n{context}\n\nAnswer the following question: {query}"

    # Generate the response
    try:
        response = gemini_model.generate_content(prompt)
        print("\nGenerated Response:")
        print(response.text)
    except Exception as e:
        print(f"An error occurred while generating the response: {e}")


Generated Response:
Based on the information provided:

*   **Profession:** Ananya works at Deloitte.
*   **Education:** Ananya has a Master's degree in Computer Science & Engineering.


In [22]:
import google.generativeai as genai

# List available models
for m in genai.list_models():
  if 'generateContent' in m.supported_generation_methods:
    print(m.name)

models/gemini-1.5-pro-latest
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-flash-latest
models/gemini-1.5-flash
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-2.5-pro-preview-03-25
models/gemini-2.5-flash-preview-05-20
models/gemini-2.5-flash
models/gemini-2.5-flash-lite-preview-06-17
models/gemini-2.5-pro-preview-05-06
models/gemini-2.5-pro-preview-06-05
models/gemini-2.5-pro
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-exp-image-generation
models/gemini-2.0-flash-lite-001
models/gemini-2.0-flash-lite
models/gemini-2.0-flash-preview-image-generation
models/gemini-2.0-flash-lite-preview-02-05
models/gemini-2.0-flash-lite-preview
models/gemini-2.0-pro-exp
models/gemini-2.0-pro-exp-02-05
models/gemini-exp-1206
models/gemini-2.0-flash-thinking-exp-01-21
models/gemini-2.0-flash-thinking-exp
models/gemini-2.0-flash-thinking-exp