In [None]:
# Install required libraries
!pip install python-terrier
!pip install transformers
!pip install gdown
!pip install rank-bm25
!pip install nltk
!pip install tqdm

Collecting python-terrier
  Downloading python_terrier-0.13.0-py3-none-any.whl.metadata (11 kB)
Collecting ir-datasets>=0.3.2 (from python-terrier)
  Downloading ir_datasets-0.5.9-py3-none-any.whl.metadata (12 kB)
Collecting wget (from python-terrier)
  Downloading wget-3.2.zip (10 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pyjnius>=1.4.2 (from python-terrier)
  Downloading pyjnius-1.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Collecting ir-measures>=0.3.1 (from python-terrier)
  Downloading ir_measures-0.3.7-py3-none-any.whl.metadata (7.0 kB)
Collecting pytrec-eval-terrier>=0.5.3 (from python-terrier)
  Downloading pytrec_eval_terrier-0.5.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (777 bytes)
Collecting dill (from python-terrier)
  Downloading dill-0.3.9-py3-none-any.whl.metadata (10 kB)
Collecting chest (from python-terrier)
  Downloading chest-0.2.3.tar.gz (9.6 kB)
  Preparing metadata (setup.py)

In [None]:
# Download the dataset
!gdown 1HhgXzyEpsZNcenU9XhJuOYyDUKEzUse4
!unzip pir_data.zip

Downloading...
From (original): https://drive.google.com/uc?id=1HhgXzyEpsZNcenU9XhJuOYyDUKEzUse4
From (redirected): https://drive.google.com/uc?id=1HhgXzyEpsZNcenU9XhJuOYyDUKEzUse4&confirm=t&uuid=671bbf61-58d5-4199-b02a-db4f82fa62c6
To: /content/pir_data.zip
100% 3.30G/3.30G [00:45<00:00, 72.6MB/s]
Archive:  pir_data.zip
   creating: PIR_data/
  inflating: PIR_data/tags.csv       
  inflating: PIR_data/questions_with_answer.csv  
  inflating: PIR_data/questions.csv  
  inflating: PIR_data/comments.csv   
  inflating: PIR_data/users.csv      
  inflating: PIR_data/answers.csv    
   creating: PIR_data/answer_retrieval/
   creating: PIR_data/answer_retrieval/val/
  inflating: PIR_data/answer_retrieval/val/subset_data.jsonl  
  inflating: PIR_data/answer_retrieval/val/qrels.json  
   creating: PIR_data/answer_retrieval/train/
  inflating: PIR_data/answer_retrieval/train/subset_data.jsonl  
  inflating: PIR_data/answer_retrieval/train/qrels.json  
   creating: PIR_data/answer_retrieval/tes

In [None]:
# Import libraries
import json
import pandas as pd
import numpy as np
from tqdm import tqdm
from rank_bm25 import BM25Okapi
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import re
from sklearn.metrics import precision_score, recall_score, ndcg_score

In [None]:
# Initialize PyTerrier
import pyterrier as pt
if not pt.started():
    pt.init()

  if not pt.started():


terrier-assemblies 5.11 jar-with-dependencies not found, downloading to /root/.pyterrier...
Done
terrier-python-helper 0.0.8 jar not found, downloading to /root/.pyterrier...
Done


Java started and loaded: pyterrier.java, pyterrier.terrier.java [version=5.11 (build: craig.macdonald 2025-01-13 21:29), helper_version=0.0.8]
java is now started automatically with default settings. To force initialisation early, run:
pt.java.init() # optional, forces java initialisation
  pt.init()


In [None]:
# Download required NLTK resources
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')  # For stopword removal
stop_words = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [None]:
val_queries = pd.read_json("PIR_data/answer_retrieval/val/subset_data.jsonl", lines=True)

# Renaming the column names for pyterrier
val_queries = val_queries[['id','title','user_id']]
val_queries.columns = ['qid', 'query', 'user_id']

In [None]:
# Load relevance judgments (qrels)
val_qrels = pd.read_json("PIR_data/answer_retrieval/val/qrels.json", orient='index').reset_index()
val_qrels.columns = ['qid', 'docno']
val_qrels['label'] = 1

In [None]:
# Load subset_answers.json
subset_answers = pd.read_json("PIR_data/answer_retrieval/subset_answers.json", orient='index').reset_index()
subset_answers.columns = ['docno', 'answer']

In [None]:
# Inspect the data
print("\nValidation Data:")
print(val_queries.head())

print("\nValidation Qrels:")
print(val_qrels.head())

print("\nSubset Answers:")
print(subset_answers.head())


Validation Data:
               qid                                              query  user_id
0  academia_143743      on answering a question that no one has asked  1582241
1  academia_148899  how much domain expertise and network does a s...   935589
2      anime_56513  does overhaul need to touch with his hands to ...    59256
3      anime_59459          why did kanon reincarnate in another race    59256
4     apple_408963      how do i disallow screen sharing for messages   331923

Validation Qrels:
               qid            docno  label
0  academia_143743  academia_143753      1
1  academia_148899  academia_148936      1
2      anime_56513      anime_56712      1
3      anime_59459      anime_59463      1
4     apple_408963     apple_408965      1

Subset Answers:
          docno                                             answer
0  writers_2010  tldrif youre going to do present tense do it f...
1  writers_2018  your writing style is streamofconsciousness wh...
2  writers_20

### Preprocessing
We used the regex library to clean the dataset by removing excess whitespace, lowercasing text, and eliminating special characters. Other preprocessing methods were avoided due to undesired output.

In [None]:
def clean_text(text):
  text = text.lower()
  text = re.sub(r"\\", "", text)  # Remove backslashes
  text = re.sub(r"[^a-zA-Z0-9\s]", "", text)  # Remove special characters and punctuation
  text = re.sub(r"\s+", " ", text)  # Remove extra whitespace
  text = text.strip()
  return text

In [None]:
val_queries['query'] = val_queries['query'].apply(clean_text)
subset_answers['answer'] = subset_answers['answer'].apply(clean_text)

In [None]:
# Display the preprocessed data
print("\nPreprocessed Validation Data:")
print(val_queries.head())
print("\nPreprocessed Subset Answers:")
print(subset_answers.head())


Preprocessed Validation Data:
               qid                                              query  user_id
0  academia_143743      on answering a question that no one has asked  1582241
1  academia_148899  how much domain expertise and network does a s...   935589
2      anime_56513  does overhaul need to touch with his hands to ...    59256
3      anime_59459          why did kanon reincarnate in another race    59256
4     apple_408963      how do i disallow screen sharing for messages   331923

Preprocessed Subset Answers:
          docno                                             answer
0  writers_2010  tldrif youre going to do present tense do it f...
1  writers_2018  your writing style is streamofconsciousness wh...
2  writers_2023  place emphasis on uncomfortable things dependi...
3  writers_2026  the answer to this depends a lot on what youre...
4  writers_2095  short answer read a book on writing stand up c...


### Indexing


In [None]:
import os
from pyterrier.measures import *

pqa_index_path = './se-pqa'

if not os.path.exists(pqa_index_path + '/data.properties'):
  indexer = pt.index.IterDictIndexer(pqa_index_path, overwrite=True, fields=['docno', 'answer'], text_attrs=['answer'])
  index_ref = indexer.index(subset_answers.to_dict(orient='records'))

else:
  index_ref = pt.IndexRef.of(pqa_index_path + '/data.properties')

index = pt.IndexFactory.of(index_ref)

07:56:08.262 [ForkJoinPool-1-worker-3] WARN org.terrier.structures.indexing.Indexer -- Indexed 2 empty documents


In [None]:
print(index.getCollectionStatistics().toString())

Number of documents: 9398
Number of terms: 99151
Number of postings: 717355
Number of fields: 1
Number of tokens: 1065591
Field names: [answer]
Positions:   false



#Phase I : Baseline retrieval and Neural reranking

### Baseline Retrieval
BM25 was employed as the primary retrieval model to fetch an initial set of relevant documents.

In [None]:
# Initialize BM25
bm25 = pt.BatchRetrieve(index, wmodel="BM25")

  bm25 = pt.BatchRetrieve(index, wmodel="BM25")


###Neural Reranking
 A cross-encoder model was used to refine and reorder the retrieved results for improved accuracy.

 The purpose of this function is to re-score these documents based on how well they match the query using a pre-trained neural network model called a cross-encoder.

In [None]:
# Neural Reranking (Cross-Encoder)
def rerank_with_cross_encoder(query, documents, batch_size=16):
    tokenizer = AutoTokenizer.from_pretrained('cross-encoder/ms-marco-MiniLM-L-6-v2')
    model = AutoModelForSequenceClassification.from_pretrained('cross-encoder/ms-marco-MiniLM-L-6-v2')

    scores = []
    total_batches = (len(documents) // batch_size) + 1

    for i in range(0, len(documents), batch_size):
        batch_documents = documents[i:i + batch_size]
        inputs = tokenizer([query] * len(batch_documents), batch_documents, padding=True, truncation=True, return_tensors="pt", max_length=512)
        with torch.no_grad():
            batch_scores = model(**inputs).logits.flatten().tolist()
        scores.extend(batch_scores)

    return scores

In [None]:
def neural_reranker(retrieved):
    results = []
    total_queries = len(retrieved['qid'].unique())

    for _, row in retrieved.iterrows():
        qid = row['qid']
        query = row['query']
        docno = row['docno']

        # Retrieve top documents using the base retriever
        top_answers = subset_answers[subset_answers['docno'] == docno]['answer'].tolist()

        if not top_answers or all(not answer.strip() for answer in top_answers):
            # If empty, assign a default score or skip this document
            reranked_scores = [0]
        else:
            # Rerank using the cross-encoder
            reranked_scores = rerank_with_cross_encoder(query, top_answers)

        reranked_results = pd.DataFrame({'qid': qid, 'docno': docno, 'score': reranked_scores, 'query': query})
        results.append(reranked_results)

    return pd.concat(results)

In [None]:
# Define the pipeline
pipeline = bm25 % 20 >> pt.apply.generic(lambda df: neural_reranker(df))  # BM25 followed by neural reranking

# Prepare queries and qrels for PyTerrier Experiment
queries = val_queries[['qid', 'query']].rename(columns={'query': 'query'})
qrels = val_qrels[['qid', 'docno', 'label']]

# Run the experiment
experiment = pt.Experiment(
    [bm25, pipeline],  # Systems to compare (BM25 and BM25 + Neural Reranker)
    queries,           # Queries
    qrels,             # Qrels
    eval_metrics=['map', 'ndcg', 'P_10', 'recall'],  # Evaluation metrics
    names=['BM25', 'BM25 + Neural Reranker']         # System names
)

# Display the experiment results
print("\nExperiment Results:")
print(experiment)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/794 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]


Experiment Results:
                     name       map      ndcg      P_10       R@5      R@10  \
0                    BM25  0.614908  0.681505  0.078571  0.693878  0.785714   
1  BM25 + Neural Reranker  0.751172  0.770690  0.081633  0.785714  0.816327   

       R@15      R@20      R@30     R@100     R@200     R@500    R@1000  
0  0.806122  0.836735  0.836735  0.897959  0.918367  0.938776  0.948980  
1  0.826531  0.836735  0.836735  0.836735  0.836735  0.836735  0.836735  


In [None]:
# Print baseline and reranked scores
query = val_queries.iloc[0]['query']
baseline_results = bm25.search(query)
print("Baseline Results:")
print(baseline_results.head())

# Rerank the top documents
top_docs = baseline_results['docno'].tolist()[:10]
top_answers = subset_answers[subset_answers['docno'].isin(top_docs)]['answer'].tolist()
reranked_scores = rerank_with_cross_encoder(query, top_answers)

print("\nReranked Scores:")
for docno, score in zip(top_docs, reranked_scores):
    print(f"Document: {docno}, Score: {score}")

Baseline Results:
  qid  docid             docno  rank      score  \
0   1    284  workplace_102543     0  16.004154   
1   1   2513  philosophy_23588     1  14.058121   
2   1    471       travel_9601     2  13.907111   
3   1   8336    buddhism_10876     3  13.775562   
4   1    316  workplace_181826     4  13.450300   

                                           query  
0  on answering a question that no one has asked  
1  on answering a question that no one has asked  
2  on answering a question that no one has asked  
3  on answering a question that no one has asked  
4  on answering a question that no one has asked  

Reranked Scores:
Document: workplace_102543, Score: 4.587680816650391
Document: philosophy_23588, Score: 0.32981041073799133
Document: travel_9601, Score: -6.2400712966918945
Document: buddhism_10876, Score: -1.0265294313430786
Document: workplace_181826, Score: -2.820413589477539
Document: academia_13612, Score: -3.0258612632751465
Document: buddhism_64, Score: -2.





*   The highest reranked score is for workplace_102543, which was also the top-ranked document by BM25.
*   Many documents have negative scores, which might indicate that the neural reranker is not confident about their relevance.





## Phase II: Query Expansion and Personalization

In [None]:
# Load CSV files (users and tags)
users = pd.read_csv("PIR_data/users.csv")
print("Users Data:\n")
print(users.head())
# Try reading the file with error handling
try:
    tags = pd.read_csv(
        "PIR_data/tags.csv",
        delimiter=",",
        encoding="utf-8",
        on_bad_lines="skip",
        engine="python"
    )
    print("File read successfully!\n")
    print("Tags Data:\n")
    print(tags.head())
except pd.errors.ParserError as e:
    print(f"Error reading CSV: {e}")

Users Data:

   Id  Reputation             CreationDate   DisplayName  \
0  -1           1  2010-11-18T19:05:26.543     Community   
1   1         101  2010-11-18T20:21:51.560  Geoff Dalgas   
2   2         101  2010-11-18T20:24:29.687  Jarrod Dixon   
3   3         101  2010-11-18T20:24:35.210        Emmett   
4   4         101  2010-11-18T20:25:14.693           Jin   

                                             AboutMe  Views  UpVotes  \
0  <p>Hi, I'm not really a person.</p>\n<p>I'm a ...    151     5590   
1  <p>Dev #2 who helped create Stack Overflow cur...    807        0   
2  <p>Former <a href="http://blog.stackoverflow.c...      6        5   
3  <p>co-founder of <a href="https://airtable.com...      5        1   
4  <p>I used to design stuff for Stack Exchange. ...     10        1   

   DownVotes  AccountId  
0       4155       -1.0  
1          0        2.0  
2          0        3.0  
3          0     1998.0  
4          0    21721.0  
File read successfully!

Tags Data:



###Query Expansion
Utilizing a pretrained model to enhance user queries with additional relevant terms based on context.

In [None]:
from transformers import pipeline

# Initialize the query expansion pipeline
query_expansion_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")

def expand_query(query, user_id=None, user_data=None):
    if user_id is not None and user_data is not None:
        try:
            user_info = user_data[user_data["Id"] == user_id]["AboutMe"].iloc[0]
            if pd.isna(user_info):  # Handle NaN values in 'AboutMe'
                user_info = ""
            expanded_query = query_expansion_pipeline(f"Expand the query '{query}' for a user with interests: {user_info}")[0]["generated_text"]
        except IndexError:
            print(f"Warning: No user info found for user ID {user_id}. Using original query.")
            expanded_query = query  # Fallback to original query
    else:
        expanded_query = query_expansion_pipeline(f"Expand the query '{query}'")[0]["generated_text"]
    return expanded_query

# Example usage
original_query = val_queries.iloc[0]['query']
expanded_query = expand_query(original_query)
print(f"Original Query: {original_query}")
print(f"Expanded Query: {expanded_query}")

config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Device set to use cpu


Original Query: on answering a question that no one has asked
Expanded Query: On answering a question that no one has asked, I'm asking you to answer a


###Personalization
Integrated user data from the 'About Me' column and tags dataframe to refine search results

In [None]:
# Preprocess the AboutMe field to extract keywords
def extract_keywords(text):
    if pd.isna(text):
        return []
    # Remove HTML tags and lowercase the text
    text = " ".join(text.split()).lower()
    # Tokenize and remove stopwords
    tokens = [word for word in text.split() if word.isalnum() and word not in stop_words]
    return tokens

# Add a column for user keywords
users['keywords'] = users['AboutMe'].apply(extract_keywords)

def personalize_query(query, user_id, user_data, tags_data):
    """Personalizes a query based on user data and tags."""
    try:
        user_info = user_data[user_data["Id"] == user_id]
        # Extract relevant user features (e.g., keywords from 'AboutMe')
        user_keywords = user_info["keywords"].iloc[0] if isinstance(user_info["keywords"].iloc[0], list) else []

        # Find relevant tags based on user's keywords or other user features.
        user_tags = []  # Initialize list to store relevant tags
        if tags_data is not None:
            for keyword in user_keywords:
                # Find tags that match user's keywords
                related_tags = tags_data[tags_data['TagName'].str.contains(keyword, na=False, case=False)]
                user_tags.extend(related_tags['TagName'].tolist())

            # Remove duplicates
            user_tags = list(set(user_tags))

            # Add relevant tags to the query
            personalized_query = query + " " + " ".join(user_tags)
        else:
            personalized_query = query

        return personalized_query

    except IndexError:
        print(f"Warning: No user info found for ID {user_id}. Returning original query.")
        return query  # Return the original query if no user info is found

# Example usage
user_id_to_expand = users['Id'][0]
personalized_query = personalize_query(original_query, user_id_to_expand, users, tags)
print(f"Original Query: {original_query}")
print(f"Personalized Query: {personalized_query}")

Original Query: on answering a question that no one has asked
Personalized Query: on answering a question that no one has asked targeting community-aquarium cold-weapons Water heating includes all methods to raise the temperature of a particular water resource.For questions about space-heating please use heating old-west harold-bloom pokemon-zeta-omicron old-2021 david-gerrold Salt is an indie game available on Steam. The game was developed by Lavaboots Studios. Salt is an open world singleplayer game, with RPG and mainly survival elements. In the game you spawn in a procedurally generated world and try to survive and explore islands around you with your boat to get new items. dive-sites pokemon-go poker-night-at-the-inventory the-book-of-lost-things matthew-arnold motion-processor pokemon-tcg-online tf2-soldier opening-hours Day watch is a novel from Russian authors Sergey Lukyanenko and Vladimir Vasilyev. It is part of the Night watch cycle (it has its own tag) which is mostly writte

###Recommender System
Used TF-IDF and cosine similarity to generate recommender scores. The recommender scores were computed by transforming the subset of answers into TF-IDF vectors and measuring similarity with user queries. The system retrieved top relevant documents and combined these scores with retrieval rankings.

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Initialize TF-IDF Vectorizer
tfidf_vectorizer = TfidfVectorizer()

# Fit TF-IDF on the subset_answers
tfidf_matrix = tfidf_vectorizer.fit_transform(subset_answers['answer'])

def get_recommender_scores(query, user_id, docnos):
    """Generates recommender scores based on TF-IDF and cosine similarity."""
    query_vector = tfidf_vectorizer.transform([query])
    doc_indices = subset_answers[subset_answers['docno'].isin(docnos)].index
    doc_vectors = tfidf_matrix[doc_indices]
    scores = cosine_similarity(query_vector, doc_vectors).flatten()
    return scores

def combine_scores(retrieval_scores, recommender_scores, alpha=0.5):
    """Combines retrieval and recommender scores."""
    combined_scores = alpha * retrieval_scores + (1 - alpha) * recommender_scores
    return combined_scores

# Example usage
top_docs = bm25.search(original_query)['docno'].tolist()[:10]
recommender_scores = get_recommender_scores(original_query, user_id_to_expand, top_docs)
print(f"Recommender Scores: {recommender_scores}")

Recommender Scores: [0.28153109 0.15871698 0.11530065 0.08697088 0.0704283  0.06515638
 0.13694983 0.08617288 0.22408871 0.01028938]


###Score Integration
Combined BM25 retrieval scores, neural reranking, and recommender system-generated scores using a weighted combination approach to personalize results effectively.



In [None]:
def combined_reranker(retrieved):
    results = []
    total_queries = len(retrieved['qid'].unique())

    # Track progress for each query
    for _, row in tqdm(retrieved.iterrows(), total=len(retrieved), desc="Combining Scores", miniters=10000):
        qid = row['qid']
        query = row['query']
        user_id = users['Id'][0]
        docno = row['docno']

        # Retrieve top documents using the base retriever
        top_answers = subset_answers[subset_answers['docno'] == docno]['answer'].tolist()

        # Get retrieval scores (from the previous stage)
        retrieval_scores = retrieved[retrieved['docno'] == docno]['score'].values

        # Get recommender scores
        recommender_scores = get_recommender_scores(query, user_id, [docno])

        # Assign a default score or skip this document
        if not top_answers or all(not answer.strip() for answer in top_answers):
            combined_score = 0
        elif len(recommender_scores) > 0 and len(retrieval_scores) > 0:
            combined_score = combine_scores(retrieval_scores[0], recommender_scores[0])
        else:
            combined_score = retrieval_scores[0] if len(retrieval_scores) > 0 else 0

        results.append([qid, docno, combined_score])

    return pd.DataFrame(results, columns=['qid', 'docno', 'score'])

# Define the pipeline
pipeline_final = (
    bm25 % 20 >>
    pt.apply.generic(lambda df: neural_reranker(df)) >>  # Apply neural reranking
    pt.apply.generic(lambda df: combined_reranker(df))   # Apply combined reranking
)

# Prepare queries and qrels for PyTerrier Experiment
queries = val_queries[['qid', 'query', 'user_id']].rename(columns={'query': 'query'})
qrels = val_qrels[['qid', 'docno', 'label']]

# Run the experiment with progress tracking
print("Running experiment...")
experiment_final = pt.Experiment(
    [bm25, pipeline_final],  # Systems to compare
    queries,           # Queries
    qrels,             # Qrels
    eval_metrics=['map', 'ndcg', 'P_10'],  # Evaluation metrics
    names=['BM25 Baseline', 'BM25 + Neural Reranker + Recommender + Query Expansion + Personalization']  # System names
)

# Display the experiment results
print("\nFinal Experiment Results:")
print(experiment_final)

# Detailed comparison
improvement_map = experiment_final.loc[experiment_final['name'] == 'BM25 + Neural Reranker + Recommender + Query Expansion + Personalization', 'map'].iloc[0] - experiment_final.loc[experiment_final['name'] == 'BM25 Baseline', 'map'].iloc[0]
improvement_ndcg = experiment_final.loc[experiment_final['name'] == 'BM25 + Neural Reranker + Recommender + Query Expansion + Personalization', 'ndcg'].iloc[0] - experiment_final.loc[experiment_final['name'] == 'BM25 Baseline', 'ndcg'].iloc[0]
improvement_p10 = experiment_final.loc[experiment_final['name'] == 'BM25 + Neural Reranker + Recommender + Query Expansion + Personalization', 'P_10'].iloc[0] - experiment_final.loc[experiment_final['name'] == 'BM25 Baseline', 'P_10'].iloc[0]

print(f"\nMAP Improvement: {improvement_map}")
print(f"nDCG Improvement: {improvement_ndcg}")
print(f"P@10 Improvement: {improvement_p10}")


Running experiment...


Combining Scores: 100%|██████████| 2062/2062 [00:16<00:00, 124.87it/s]



Final Experiment Results:
                                                name       map      ndcg  \
0                                      BM25 Baseline  0.614908  0.681505   
1  BM25 + Neural Reranker + Recommender + Query E...  0.675381  0.711680   

       P_10  
0  0.078571  
1  0.078571  

MAP Improvement: 0.06047265699016979
nDCG Improvement: 0.03017504865049403
P@10 Improvement: 0.0
