In [1]:
!pip install numpy faiss-cpu sentence-transformers langchain beautifulsoup4 langchain_experimental

Collecting langchain_experimental
  Downloading langchain_experimental-0.3.4-py3-none-any.whl.metadata (1.7 kB)
Downloading langchain_experimental-0.3.4-py3-none-any.whl (209 kB)
Installing collected packages: langchain_experimental
Successfully installed langchain_experimental-0.3.4


# Tests MRR and Recall@k

In [16]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_experimental.text_splitter import SemanticChunker
from langchain.embeddings import SentenceTransformerEmbeddings


from sentence_transformers import SentenceTransformer
import requests
import time
from bs4 import BeautifulSoup
import re
import numpy as np
import os
import psycopg as pg
from collections import defaultdict
import torch
import faiss

# Progress bar
from tqdm.auto import tqdm


In [6]:
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OMP_NUM_THREADS"]        = "1"
os.environ["MKL_NUM_THREADS"]        = "1"

# Connect to db
We need to use pgvector db for this experiments to analise how the hybrid search (that works with pgvector key word search) changes the metrics

In [6]:
DB_HOST = os.getenv("DB_HOST", "localhost")
DB_PORT = os.getenv("DB_PORT", "5434")
DB_NAME = os.getenv("DB_NAME", "db")
DB_USER = os.getenv("DB_USER", "root")
DB_PASS = os.getenv("DB_PASS", "root")

connection_string = f"postgresql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}"

db_conn = await pg.AsyncConnection.connect(conninfo=connection_string)


In [23]:
async def insert_embedding(link_id: str, chunk: str, embedding: list):
  """Inserts the embedding into the database."""

  try:
    query = '''INSERT INTO chunk (link_id, chunk, embedding, text_search_vector) 
    VALUES (%(link_id)s, 
    %(chunk)s, 
    %(embedding)s, 
    to_tsvector('english', %(chunk)s)
    );'''
    params = {
      "chunk": chunk,
      "embedding": embedding.tolist(),
      "link_id": link_id
    }
    
    await db_conn.execute(query, params)
    
    await db_conn.commit()
    
    return True
  except Exception as e:
    print(f"Error inserting chunk: {e}")
    await db_conn.rollback()
    
    return False

# Retrieve functions

In [28]:
async def semantic_retrieval(query_embedding: list, top_k: int = 25):
    '''
    Retrieve chunks with the highest cosine similarity to the query_embedding.
    '''
    
    # Ensure the embedding is a list of floats
    query_embedding = list(query_embedding)
    
    query = """
    SELECT chunk
    FROM chunk
    ORDER BY embedding <#> %s::vector
    LIMIT %s;
    """
    
    try:
        async with db_conn.cursor() as cursor:
            await cursor.execute(query, (query_embedding, top_k))
            rows = await cursor.fetchall()
            
            return [row[0] for row in rows]
    except Exception as e:
        print(f"Error retrieving related chunks: {e}")
        
        await db_conn.rollback()
        
        return []

async def key_word_retrieval(query: str, top_k: int = 25):
  """Performs a keyword search using the query and returns the top_k results."""

  sql = """
  SELECT chunk
  FROM chunk
  WHERE text_search_vector @@ plainto_tsquery('english', %s)
  LIMIT %s;
  """
  
  params = (query, top_k)
  
  try:
    async with db_conn.cursor() as cursor:
      await cursor.execute(sql, params)
      rows = await cursor.fetchall()
      
      # Return just the list of chunks
      return [row[0] for row in rows]
  except Exception as e:
    print(f"Error performing keyword search: {e}")
    
    db_conn.rollback()
    
    return None

In [39]:
async def hybrid_retrieval(query_text: str, embedding_model, top_k = 25) -> list:
  '''Get the embedded text from the vector database'''
  
  query_embedding = embedding_model.encode(query_text)
  semantic_retrieval_results = await semantic_retrieval(query_embedding, top_k=top_k)
  key_word_retrieval_results = await key_word_retrieval(query_text, top_k=top_k)
  combined_results = [*semantic_retrieval_results, *key_word_retrieval_results]
  # Remove duplicates
  combined_results = list(set(combined_results))[:top_k]

  return combined_results

In [45]:
def rrf_fusion(retriever_results, k=60):
  """
  Apply Reciprocal Rank Fusion (RRF) to the given lists of retrieved documents.

  Parameters:
      retriever_results (List[List[str]]): A list of lists of document strings from each retriever.
      k (int): RRF constant to control the weight decay for lower ranks.

  Returns:
      List[Tuple[str, float]]: List of tuples (document, rrf_score), sorted by score descending.
  """
  scores = defaultdict(float)

  for retriever_res in retriever_results:
      for rank, doc in enumerate(retriever_res):
          # Use rank+1 to make ranks 1-based
          scores[doc] += 1 / (k + rank + 1)

  # Sort documents by score, descending
  ranked_docs = sorted(scores.items(), key=lambda x: x[1], reverse=True)
  
  ranked_docs_without_scores = [doc[0] for doc in ranked_docs]
      
  return ranked_docs_without_scores

In [None]:
COHERE_API_KEY = ''

In [75]:
async def rerank_with_cohere(query: str, chunks: list):
  '''Rerank the chunks using Cohere'''
  
  # Calculate the number of chunks to return from cohere. Get the top 30% of the chunks but max 10 and min 5
  top_n = 10
  headers = {
      "Authorization": f"Bearer {COHERE_API_KEY}",
      "Content-Type": "application/json"
  }
  data = {
      "model": "rerank-v3.5",
      "query": query,
      "documents": chunks,
      "top_n": top_n
  }
  
  try:
    res = requests.post("https://api.cohere.com/v2/rerank", headers=headers, json=data)
    
    res.raise_for_status()
    
    res_json = res.json()
    results = res_json['results']
    ids_to_return = [ell['index'] for ell in results]
    reranked_chunks = [chunks[i] for i in ids_to_return]
  
    return reranked_chunks

  except Exception as e:    
    return chunks

In [None]:

OPENAI_API_KEY = ''
OPENAI_API_URL = "https://api.openai.com/v1/chat/completions"
OPENAI_MODEL = "gpt-4.1-nano-2025-04-14"

In [67]:
def query_expansion(original_query: str, thread: str) -> list[str]:
    """
    Generate multiple search queries based on the original query and the thread,
    using the OpenAI Chat Completions API via requests.
    """
    headers = {
        "Authorization": f"Bearer {OPENAI_API_KEY}",
        "Content-Type": "application/json",
    }
    payload = {
        "model": OPENAI_MODEL,
        "messages": [
            {
                "role": "system",
                "content": (
                    "You are a helpful assistant that generates multiple search queries "
                    "based on a single input query, being the email of the lead."
                )
            },
            {"role": "user", "content": f"For additional context, here is the conversation history: {thread}"},
            {
                "role": "user",
                "content": (
                    f"Generate multiple search queries related to: {original_query}. "
                    "OUTPUT (4 queries). Separate each query with a new line. "
                    "Do not add any other text or numbers before queries."
                )
            },
        ]
    }

    resp = requests.post(OPENAI_API_URL, headers=headers, json=payload)
    resp.raise_for_status()
    data = resp.json()

    # Extract the single message content and split into lines
    content = data["choices"][0]["message"]["content"]
    queries = [q.strip() for q in content.split("\n") if q.strip()]
    return queries

## Load data


In [7]:
def fetch_page(url):
  """Makes a request to the given URL using a random proxy. Falls back to default address if the proxy fails."""
  headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.6312.107 Safari/537.36'}

  try:
    # response = requests.get(url, headers=headers, proxies={"http": proxy, "https": proxy})
    response = requests.get(url, headers=headers)

    response.raise_for_status()

    return response.text
  except Exception as e:
    print(f"error:", e)

def process_page_text_with_bs4_and_regex(html):
  """Processes the page text using BeautifulSoup and regex."""
  try:
    soup = BeautifulSoup(html, "lxml")
    elements_to_remove = soup(["script", "style", "noscript", 'footer'])
    for tag in elements_to_remove:
        tag.decompose()

    text = soup.get_text(separator="\n", strip=True)
    text = re.sub(r'\s+', ' ', text)

    return text
  except Exception as e:
    print(f"Error processing page text: {e}")
    return None

In [22]:
urls_to_scrape = ['https://aisdr.com/blog/', 'https://aisdr.com/', 'https://aisdr.com/ai-case-studies/', 'https://aisdr.com/book-demo/', 'https://ucu.edu.ua/en/', 'https://www.ycombinator.com/companies/aisdr', 'https://en.wikipedia.org/wiki/Artificial_intelligence']

In [23]:
pages = []

for url in urls_to_scrape:
  scrapped_html = fetch_page(url)
  page_text = process_page_text_with_bs4_and_regex(scrapped_html)
  pages.append(page_text)

## Metrics functions


In [10]:
def recall_at_k(retrieved_ids, ground_truth_ids, k):
    # 1. Take the top-k retrieved items
    top_k = retrieved_ids[:k]

    # 2. Check if any of those k items appear in the set of ground-truths
    has_hit = bool(set(top_k) & set(ground_truth_ids))

    # 3. Return 1 if there is at least one correct item in the top-k, else 0
    return int(has_hit)

def reciprocal_rank(retrieved_ids, ground_truth_ids):
    for idx, doc_id in enumerate(retrieved_ids, start=1):
        # As soon as we hit a correct doc, return 1 / its rank position
        if doc_id in ground_truth_ids:
            return 1.0 / idx
    # If we never find a correct doc, the reciprocal rank is zero
    return 0.0

def get_mean_metric(metrics):
  N = len(metrics)

  mean_metric = sum(metrics) / N

  return mean_metric

## Chunking functions


In [11]:
def recursive_text_splitter(text_to_split):
  text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=256,
    chunk_overlap=25,
    length_function=len,
    is_separator_regex=False,
  )

  splitted_text = text_splitter.split_text(text_to_split)

  return splitted_text

##  RAG improvements experiments


### all-mpnet-base-v2 with recursive text splitter


In [10]:
all_mpnet_base_v2_model_name = 'all-mpnet-base-v2'
all_mpnet_base_v2 = SentenceTransformer(all_mpnet_base_v2_model_name, device='cpu')

In [13]:
recursively_splitted_texts = [recursive_text_splitter(page) for page in pages]

In [14]:
chunks_amount = 0

for splitted_text in recursively_splitted_texts:
  chunks_amount += len(splitted_text)

print(chunks_amount)

140


In [15]:
combined_pages_chunks = [chunk for splitted_text in recursively_splitted_texts for chunk in splitted_text]

print(len(combined_pages_chunks))

140


In [None]:
emb_list = []
for chunk in tqdm(combined_pages_chunks, desc="Embedding chunks"):
    with torch.no_grad():
        emb = all_mpnet_base_v2.encode(
            chunk,
            batch_size=1,
            convert_to_numpy=True,
            show_progress_bar=False
        )
    emb_list.append(emb)

recursively_splitted_aisdr_embeddings = np.vstack(emb_list)
print("Embeddings:", recursively_splitted_aisdr_embeddings.shape)

Embedding chunks: 100%|██████████| 140/140 [00:07<00:00, 19.75it/s]

Embeddings: (140, 768)





## Save chunks to database

In [25]:
for i, chunk in enumerate(combined_pages_chunks):
  try:
    await insert_embedding(1, chunk, recursively_splitted_aisdr_embeddings[i])
  except Exception as e:
    print(f"Error inserting embedding: {e}")

In [33]:
question_ground_truth_pairs = qa_pairs = {
    "What are the main features of AiSDR for AI sales outreach?": "is AiSDR? Features Every tool you need for AI sales outreach Independent AI sales assistant An extra pair of hands for your sales growth Prospecting with AI Find leads with an appetite for your offer Our best AI emails Clients' favorite emails generated",
    "What kind of emails does AiSDR generate for end-to-end sales outreach?": "emails generated by AiSDR End-to-end AI Sales Outreach All your bases covered within one solution AI for HubSpot sales Make the best of your CRM data Speak with our AI Let AiSDR try and convince you to book a meeting with us Human or AI? See if you can",
    "What interactive game lets users spot AI-generated emails?": "or AI? See if you can spot emails that were AI-generated Play the game Inbound Lead nurturing Qualification and scoring Market research Outbound AI Lead Generation Pipeline management Human or AI? See if you can spot emails that were AI-generated Play the",
    "Where can users find resources like blog posts, guides, videos, and knowledge base articles for AiSDR?": "AI-generated Play the game AiSDR Resource Library Blog Guides Videos Knowledge Base Partnership Program Awards and Recognition About us AI Prospecting Database for Inc 5000 Companies Human or AI? See if you can spot emails that were AI-generated Play the",
    "How does AiSDR book meetings with ideal prospects through personalized conversations?": "AI-generated Play the game Backed by Book more, stress less with AiSDR AiSDR books meetings with your ideal prospects through personalized conversations via texts and emails that use their LinkedIn activity and your HubSpot data. Fuel your campaigns with",
    "How many contacts are in AiSDR’s lead database for fresh campaigns?": "Fuel your campaigns with fresh leads from our database of 700M+ contacts. Show me what AI can do for me 0 0 0 0 0 0 0 Trusted by our clients > Explore use cases Hire your first AI SDR that’s already trained to… Have a meaningful conversation AiSDR",
    "What does AiSDR analyze to catch a lead’s eye and make a strong pitch?": "conversation AiSDR analyzes a lead’s 3 most recent LinkedIn posts to catch the reader’s eye and make a strong pitch. It also sends timely and contextualized follow-ups, answers questions, addresses concerns, and even handles referrals and auto-replies",
    "What did users say about AiSDR’s auto-replies and booking performance?": "and auto-replies like a pro sales rep. It booked a call for me on the first day of using the platform! Nelly W. Supply Chain and S&OP Expert, GMDH Streamline Match the way you speak With the most detailed and flexible configuration on the market, you’ll",
    "How does AiSDR ensure every message aligns with your tone and brand voice?": "on the market, you’ll be confident that every word AiSDR says is in line with your tone and brand voice, and has your human touch. Our GTM Engineers are on stand-by 24/7 to tweak the AI for you, up the volume of meetings booked, and answer any questions",
    "What do users appreciate about the realism of AiSDR’s emails?": "and answer any questions about our AI-powered sales engagement platform. I like how realistic every email is. Not only is the email’s contents adjusted for each lead, but the sales emails even take into account the individual prospect’s personal interests",
    "How does AiSDR help find customers who match your ICP?": "personal interests and achievements. Natalia L. Director, VAS Find the right customers Dig through our database for extra lead generation to find people who match your ICP. Using your qualification criteria and our hiring and buying intent data, AiSDR",
    "After using hiring and buying intent data, what does AiSDR do with fitting prospects?": "intent data, AiSDR sales automation will then pursue interactions with prospects who are a good fit and flag them for your attention once they show interest. AiSDR is exactly what we needed to start up and run our cold outbound to book meetings. We’ve",
    "What benefit did Sequesto’s CMO mention about expanding outreach with AiSDR?": "to book meetings. We’ve been able to expand our outreach efforts to new markets in less time than it would’ve been had we hired a new sales rep. Lea Vanessa S. CMO, Sequesto Handle your HubSpot leads Using our native two-way integration, AiSDR",
    "How does AiSDR automatically sync with HubSpot lists and enroll new leads?": "integration, AiSDR automatically syncs with your active and static HubSpot lists and enrolls new leads into campaigns based on their sales journey and intent level. Without fail, empower sales SDRs to follow up with demo requests and trade show leads,",
    "What types of leads can AiSDR nurture and convert automatically?": "and trade show leads, nurture your inbound prospects, convert free trials into paying customers… The list is endless. Everything is handled by AI and we just select which clients should be receiving which update and the AI sends really great sales emails",
    "What outcomes did Danni S. mention AiSDR’s emails and personalized follow-ups achieved?": "great sales emails and personalized follow-up messages that make sense and share the value we offer really well so that it comes as a natural extension of their customer experience and brings us new cross-sell customer meetings. Danni S. Product Manager,",
    "What performance metrics does AiSDR claim regarding revenue and conversion rates?": "S. Product Manager, AXDRAFT Have a meaningful conversation Match the way you speak Find the right customers Handle your HubSpot leads … on full auto-pilot. Humanly sounding, humanly great. Score up to 15% more revenue through value 1-5% lead-to-demo CR",
    "What key metrics does AiSDR report for lead-to-demo conversion rate and open rates?": "1-5% lead-to-demo CR 68% open rate 100% of pipeline processed 5-8.5% reply rate Get started > Check out the platform Generate predictable pipeline without lifting a finger Set up an omnichannel outreach process AiSDR helps you get interactions with",
    "How does AiSDR customize each outbound message?": "get interactions with prospects via text and email. Every message created by our outbound sales software is 100% customized using the best sales tactics of 50+ SDR leaders, as well as a lead’s recent LinkedIn activity. Run sales prospecting globally,",
    "What features of AiSDR’s lead database help reduce bounce rates when prospecting globally?": "prospecting globally, confidently AiSDR’s built-in lead database contains the contact info of over 700M+ leads around the world, reducing your barrier to exploring new markets. Every address is triple-checked so that your bounce rate is < 5%. Your virtual",
    "In which languages can AiSDR conduct tech sales?": "is < 5%. Your virtual SDR can run your SDR tech sales in any language, including English, German, Chinese, and Arabic (to name a few). Jump ahead of your competition Time kills deals. Since 78% of new business goes to the company that responds first,",
    "What is AiSDR’s promised response time to new business inquiries?": "that responds first, AiSDR’s <10 minute time to respond puts your business in pole position for a new deal. AiSDR's automated inbox warm-up protects you from burning your domains and setting back your email outreach by 4-6 weeks. Send hyper-personalized",
    "What percentage of prospects expect personalization, according to AiSDR?": "Send hyper-personalized emails and texts 71% of prospects expect personalization. AiSDR’s virtual SDR tailors every message with the help of lead data and the prospect’s 3 most recent LinkedIn posts so that messages are relevant and resonate. Converse",
    "How does AiSDR ensure its messages don’t sound like AI?": "and resonate. Converse with leads naturally (without sounding like AI) AiSDR builds best practices from 50+ successful SDR sales leaders, as well as your writing style and tone, into your AI persona. This ensures that not only do your texts and emails get"
}

In [30]:
question = "How does AiSDR ensure its messages don’t sound like AI?"
question_embedding = all_mpnet_base_v2.encode(question, convert_to_numpy=True)
retrieved_chunks = await semantic_retrieval(question_embedding)

print("Retrieved chunks:")
for chunk in retrieved_chunks:
    print(chunk)

Retrieved chunks:
on the market, you’ll be confident that every word AiSDR says is in line with your tone and brand voice, and has your human touch. Our GTM Engineers are on stand-by 24/7 to tweak the AI for you, up the volume of meetings booked, and answer any questions
with leads AiSDR trains the AI individually for each client so you can ditch cookie-cutter sequences and speak with leads naturally Best AI messages on the market With AiSDR, you can confidently send emails and texts in your tone and brand voice (without
AiSDR also adds leads to your HubSpot, works with leads you’ve generated, and helps you get more to fuel your sales process. Our AI adjusts to their buying intent level and guides your leads to the next step in their sales cycle without overselling or
and resonate. Converse with leads naturally (without sounding like AI) AiSDR builds best practices from 50+ successful SDR sales leaders, as well as your writing style and tone, into your AI persona. This ensures that not

In [37]:
async def run_test(question_ground_truth_pairs, chunks):
  list_of_arr = []
  list_of_recall_at_10 = []
  list_of_recall_at_5 = []

  for question in question_ground_truth_pairs.keys():
    question_embedding = all_mpnet_base_v2.encode(question)
    question_embedding = np.array(question_embedding)

    retrieved_chunks = await semantic_retrieval(question_embedding, top_k=10)
    retrieved_ids = [chunks.index(chunk) for chunk in retrieved_chunks]

    ground_truth_chunk = question_ground_truth_pairs[question]
    #
    ground_truth_id = chunks.index(ground_truth_chunk)

    recall_at_10 = recall_at_k(retrieved_ids, [ground_truth_id], k=10)
    recall_at_5 = recall_at_k(retrieved_ids, [ground_truth_id], k=5)
    arr = reciprocal_rank(retrieved_ids, [ground_truth_id])

    list_of_arr.append(arr)
    list_of_recall_at_10.append(recall_at_10)
    list_of_recall_at_5.append(recall_at_5)

  mean_arr = get_mean_metric(list_of_arr)
  mean_recall_at_10 = get_mean_metric(list_of_recall_at_10)
  mean_recall_at_5 = get_mean_metric(list_of_recall_at_5)

  print("MRR: ",mean_arr)
  print("Mean Recall at 10: ", mean_recall_at_10)
  print("Mean Recall at 5: ", mean_recall_at_5)

In [38]:
print("Baseline metrics:")

await run_test(question_ground_truth_pairs, combined_pages_chunks)

Baseline metrics:
MRR:  0.3756944444444444
Mean Recall at 10:  0.6666666666666666
Mean Recall at 5:  0.5833333333333334


## Test with hybrid search

In [40]:
async def hybrid_retrieval_test(question_ground_truth_pairs, chunks):
  list_of_arr = []
  list_of_recall_at_10 = []
  list_of_recall_at_5 = []

  for question in question_ground_truth_pairs.keys():
    question_embedding = all_mpnet_base_v2.encode(question)
    question_embedding = np.array(question_embedding)

    retrieved_chunks = await hybrid_retrieval(question, all_mpnet_base_v2, top_k=10)
    retrieved_ids = [chunks.index(chunk) for chunk in retrieved_chunks]

    ground_truth_chunk = question_ground_truth_pairs[question]
    #
    ground_truth_id = chunks.index(ground_truth_chunk)

    recall_at_10 = recall_at_k(retrieved_ids, [ground_truth_id], k=10)
    recall_at_5 = recall_at_k(retrieved_ids, [ground_truth_id], k=5)
    arr = reciprocal_rank(retrieved_ids, [ground_truth_id])

    list_of_arr.append(arr)
    list_of_recall_at_10.append(recall_at_10)
    list_of_recall_at_5.append(recall_at_5)

  mean_arr = get_mean_metric(list_of_arr)
  mean_recall_at_10 = get_mean_metric(list_of_recall_at_10)
  mean_recall_at_5 = get_mean_metric(list_of_recall_at_5)

  print("MRR: ",mean_arr)
  print("Mean Recall at 10: ", mean_recall_at_10)
  print("Mean Recall at 5: ", mean_recall_at_5)

In [41]:
print("Hybrid search metrics:")

await hybrid_retrieval_test(question_ground_truth_pairs, combined_pages_chunks)

Hybrid search metrics:
MRR:  0.2264880952380952
Mean Recall at 10:  0.7083333333333334
Mean Recall at 5:  0.5416666666666666


In [None]:
async def hybrid_retrieval_test_with_rrf(question_ground_truth_pairs, chunks):
  list_of_arr = []
  list_of_recall_at_10 = []
  list_of_recall_at_5 = []

  for question in question_ground_truth_pairs.keys():
    question_embedding = all_mpnet_base_v2.encode(question)
    question_embedding = np.array(question_embedding)

    # retrieved_chunks = await hybrid_retrieval(question, all_mpnet_base_v2, top_k=10)
    semantically_retrieved_chunks = await semantic_retrieval(question_embedding, top_k=10)
    keyword_retrieved_chunks = await key_word_retrieval(question, top_k=10)    
    #print('Before RRF:', retrieved_chunks)
    
    retrieved_chunks = rrf_fusion([semantically_retrieved_chunks, keyword_retrieved_chunks])
    
    #print('After RRF:', retrieved_chunks)
    
    retrieved_ids = [chunks.index(chunk) for chunk in retrieved_chunks]

    ground_truth_chunk = question_ground_truth_pairs[question]
    #
    ground_truth_id = chunks.index(ground_truth_chunk)

    recall_at_10 = recall_at_k(retrieved_ids, [ground_truth_id], k=10)
    recall_at_5 = recall_at_k(retrieved_ids, [ground_truth_id], k=5)
    arr = reciprocal_rank(retrieved_ids, [ground_truth_id])

    list_of_arr.append(arr)
    list_of_recall_at_10.append(recall_at_10)
    list_of_recall_at_5.append(recall_at_5)

  mean_arr = get_mean_metric(list_of_arr)
  mean_recall_at_10 = get_mean_metric(list_of_recall_at_10)
  mean_recall_at_5 = get_mean_metric(list_of_recall_at_5)

  print("MRR: ",mean_arr)
  print("Mean Recall at 10: ", mean_recall_at_10)
  print("Mean Recall at 5: ", mean_recall_at_5)

In [65]:
print("Hybrid search with RFF metrics:")

await hybrid_retrieval_test_with_rrf(question_ground_truth_pairs, combined_pages_chunks)

Hybrid search with RFF metrics:
MRR:  0.48055555555555546
Mean Recall at 10:  0.7083333333333334
Mean Recall at 5:  0.625


We see a improvement using hybrid retrieval with RFF reranking
MRR: 0.38 -> 0.48 (26% improvement)
Recall@10: 0.67 -> 0.7 (3% improvement)
Recall@5: 0.58 -> 0.625 (7% improvement)

Let's run the experiment to test if the query expansion produces better results

In [69]:
async def query_expansion_experiment(question_ground_truth_pairs, chunks):
  list_of_arr = []
  list_of_recall_at_10 = []
  list_of_recall_at_5 = []
  
  thread = 'Hi Andrii, AiSDR is a sales engagement platform that uses AI to automate the process of reaching out to potential customers. It helps sales teams to find leads, engage with them, and ultimately close deals more efficiently. The platform offers features such as personalized email outreach, lead scoring, and analytics to track the performance of sales campaigns. AiSDR aims to streamline the sales process and improve conversion rates by leveraging AI technology.'

  for question in question_ground_truth_pairs.keys():
    query_expansion_results = query_expansion(question, thread)
    all_queries = [question, *query_expansion_results]
    
    list_of_lists_of_chunks = []
    
    for query in all_queries:
      question_embedding = all_mpnet_base_v2.encode(query)
      retrieved_chunks = await semantic_retrieval(question_embedding, top_k=10)
      keyword_retrieved_chunks = await key_word_retrieval(query, top_k=10)
      
      list_of_lists_of_chunks.append(retrieved_chunks)
      list_of_lists_of_chunks.append(keyword_retrieved_chunks)
      
      
    
    retrieved_chunks = rrf_fusion(list_of_lists_of_chunks)

    retrieved_ids = [chunks.index(chunk) for chunk in retrieved_chunks]

    ground_truth_chunk = question_ground_truth_pairs[question]
    #
    ground_truth_id = chunks.index(ground_truth_chunk)

    recall_at_10 = recall_at_k(retrieved_ids, [ground_truth_id], k=10)
    recall_at_5 = recall_at_k(retrieved_ids, [ground_truth_id], k=5)
    arr = reciprocal_rank(retrieved_ids, [ground_truth_id])

    list_of_arr.append(arr)
    list_of_recall_at_10.append(recall_at_10)
    list_of_recall_at_5.append(recall_at_5)

  mean_arr = get_mean_metric(list_of_arr)
  mean_recall_at_10 = get_mean_metric(list_of_recall_at_10)
  mean_recall_at_5 = get_mean_metric(list_of_recall_at_5)

  print("MRR: ",mean_arr)
  print("Mean Recall at 10: ", mean_recall_at_10)
  print("Mean Recall at 5: ", mean_recall_at_5)

In [70]:
print("Metrics with query expansion:")
await query_expansion_experiment(question_ground_truth_pairs, combined_pages_chunks)

Metrics with query expansion:
MRR:  0.4201651936026936
Mean Recall at 10:  0.6666666666666666
Mean Recall at 5:  0.4583333333333333


Even though the metrics dropped a bit, we filled the context with some related info that might be not the direct hit, but makes the generated info more full

In [76]:
async def cohere_reranking_experiment(question_ground_truth_pairs, chunks):
  list_of_arr = []
  list_of_recall_at_10 = []
  list_of_recall_at_5 = []
  
  thread = 'Hi Andrii, AiSDR is a sales engagement platform that uses AI to automate the process of reaching out to potential customers. It helps sales teams to find leads, engage with them, and ultimately close deals more efficiently. The platform offers features such as personalized email outreach, lead scoring, and analytics to track the performance of sales campaigns. AiSDR aims to streamline the sales process and improve conversion rates by leveraging AI technology.'

  for question in question_ground_truth_pairs.keys():
    query_expansion_results = query_expansion(question, thread)
    all_queries = [question, *query_expansion_results]
    
    list_of_lists_of_chunks = []
    
    for query in all_queries:
      question_embedding = all_mpnet_base_v2.encode(query)
      retrieved_chunks = await semantic_retrieval(question_embedding, top_k=10)
      keyword_retrieved_chunks = await key_word_retrieval(query, top_k=10)
      
      list_of_lists_of_chunks.append(retrieved_chunks)
      list_of_lists_of_chunks.append(keyword_retrieved_chunks)
      
    retrieved_chunks = rrf_fusion(list_of_lists_of_chunks)
    
    retrieved_chunks = await rerank_with_cohere(question, retrieved_chunks)

    retrieved_ids = [chunks.index(chunk) for chunk in retrieved_chunks]

    ground_truth_chunk = question_ground_truth_pairs[question]
    #
    ground_truth_id = chunks.index(ground_truth_chunk)

    recall_at_10 = recall_at_k(retrieved_ids, [ground_truth_id], k=10)
    recall_at_5 = recall_at_k(retrieved_ids, [ground_truth_id], k=5)
    arr = reciprocal_rank(retrieved_ids, [ground_truth_id])

    list_of_arr.append(arr)
    list_of_recall_at_10.append(recall_at_10)
    list_of_recall_at_5.append(recall_at_5)

  mean_arr = get_mean_metric(list_of_arr)
  mean_recall_at_10 = get_mean_metric(list_of_recall_at_10)
  mean_recall_at_5 = get_mean_metric(list_of_recall_at_5)

  print("MRR: ",mean_arr)
  print("Mean Recall at 10: ", mean_recall_at_10)
  print("Mean Recall at 5: ", mean_recall_at_5)

In [79]:
print("Metric with Cohere reranking:")
await cohere_reranking_experiment(question_ground_truth_pairs, combined_pages_chunks)

Metric with Cohere reranking:
MRR:  0.505324074074074
Mean Recall at 10:  0.7083333333333334
Mean Recall at 5:  0.5833333333333334


Additional reranking with Cohere gave the best metrics, compensating the noise produced by the query expansion