In [1]:
import pandas as pd
import pickle
from rank_bm25 import BM25Okapi
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
from sentence_transformers import SentenceTransformer, util
import torch
from nltk.tokenize import word_tokenize
import nltk
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm
  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'


In [2]:
df = pd.read_csv('lcr_input_final.csv')
abstracts = list(set(df['abstract']))

In [3]:
nltk.download('punkt')

# Prepare the BM25 model
tokenized_abstracts = [word_tokenize(str(abstract)) for abstract in abstracts]
bm25 = BM25Okapi(tokenized_abstracts)


scincl = SentenceTransformer('/Users/borankahraman/ITU/lcr/lcr_yeni/fine-tuned-scincl-2')

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/borankahraman/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
No sentence-transformers model found with name /Users/borankahraman/ITU/lcr/lcr_yeni/fine-tuned-scincl-2. Creating a new one with MEAN pooling.


In [4]:
# Query and rerank function
def query_and_rerank(query, bm25, scincl_model, top_k=10):
    # Tokenize the query
    tokenized_query = query.split()
    
    # Get BM25 top_k results
    bm25_scores = bm25.get_scores(tokenized_query)
    top_indices = bm25_scores.argsort()[-top_k:][::-1]
    top_abstracts = [abstracts[i] for i in top_indices]

    # Encode the query and abstracts
    query_embedding = scincl_model.encode(query, convert_to_tensor=True)
    abstract_embeddings = scincl_model.encode(top_abstracts, convert_to_tensor=True)
    
    # Compute cosine similarities
    cosine_scores = util.pytorch_cos_sim(query_embedding, abstract_embeddings)[0]
    
    # Sort by similarity scores
    sorted_indices = torch.argsort(cosine_scores, descending=True)
    
    # Collect the top 10 similar abstracts
    reranked_top_abstracts = [top_abstracts[i] for i in sorted_indices[:10]]

    return reranked_top_abstracts

# Example query
query = "The political system of a country defines the set of formal legal institutions that constitute a government or a state and establishes the distribution of power and resources among its citizens and government officials."

# Get the most suitable abstract for the query
most_suitable_abstract = query_and_rerank(query, bm25, scincl)


In [5]:
df = pd.read_csv('eval_dataset.csv')

In [6]:
context_l = df['context'].tolist()
eval_abstract = df['abstract'].tolist()

In [7]:
tp_3 = 0
tp_5 = 0
tp_10 = 0

In [8]:
# Evaluate the model by calculating precision@1, precision@5, and precision@10

for i in range(len(context_l)):
    most_suitable_abstracts = query_and_rerank(context_l[i], bm25, scincl)


    if type(eval_abstract[i]) != str:
        continue

    if eval_abstract[i] in most_suitable_abstracts[:3]:
        tp_3 += 1
    
    if eval_abstract[i] in most_suitable_abstracts[:5]:
        tp_5 += 1

    if eval_abstract[i] in most_suitable_abstracts:
        tp_10 += 1

query_count = len(context_l)

print(f"Precision@3: {tp_3/query_count}")
print(f"Precision@5: {tp_5/query_count}")
print(f"Precision@10: {tp_10/query_count}")

Precision@3: 0.05451713395638629
Precision@5: 0.11682242990654206
Precision@10: 0.2071651090342679


In [9]:
# Calculate MRR

mrr_3 = 0
mrr_5 = 0
mrr_10 = 0

for i in range(len(context_l)):
    most_suitable_abstracts = query_and_rerank(context_l[i], bm25, scincl)

    if type(eval_abstract[i]) != str:
        continue

    if eval_abstract[i] in most_suitable_abstracts:
        mrr_10 += 1/(most_suitable_abstracts.index(eval_abstract[i]) + 1) 

    if eval_abstract[i] in most_suitable_abstracts[:5]:
        mrr_5 += 1/(most_suitable_abstracts.index(eval_abstract[i]) + 1) 
    
    if eval_abstract[i] in most_suitable_abstracts[:3]:
        mrr_3 += 1/(most_suitable_abstracts.index(eval_abstract[i]) + 1) 
        

print(f"MRR@3: {mrr_3/query_count}")
print(f"MRR@5: {mrr_5/query_count}")
print(f"MRR@10: {mrr_10/query_count}")

MRR@3: 0.035565939771547254
MRR@5: 0.05005192107995845
MRR@10: 0.06262671215942245


In [10]:
# Calculate NDCG@10
# Define a function to calculate DCG@K

ndcg_values_3 = []
ndcg_values_5 = []
ndcg_values_10 = []


def dcg_at_k(relevances, k):
    relevances = np.asfarray(relevances)[:k]
    if relevances.size:
        return np.sum(relevances / np.log2(np.arange(2, relevances.size + 2)))
    return 0

# Define a function to calculate NDCG@K
def ndcg_at_k(relevances, k):
    dcg_max = dcg_at_k(sorted(relevances, reverse=True), k)  # Calculate IDCG@K
    if not dcg_max:
        return 0.0
    return dcg_at_k(relevances, k) / dcg_max

for i in range(len(context_l)):
    most_suitable_abstracts = query_and_rerank(context_l[i], bm25, scincl)
    relevant = [0 for i in range(10)]

    if type(eval_abstract[i]) != str:
        continue

    if eval_abstract[i] in most_suitable_abstracts:
        index = most_suitable_abstracts.index(eval_abstract[i])
        relevant[index] = 1

        ndcg = ndcg_at_k(relevant, 10)
        ndcg_values_10.append(ndcg)
    
    if eval_abstract[i] in most_suitable_abstracts[:5]:
        index = most_suitable_abstracts.index(eval_abstract[i])
        relevant[index] = 1

        ndcg = ndcg_at_k(relevant, 5)
        ndcg_values_5.append(ndcg)
    
    if eval_abstract[i] in most_suitable_abstracts[:3]:
        index = most_suitable_abstracts.index(eval_abstract[i])
        relevant[index] = 1

        ndcg = ndcg_at_k(relevant, 3)
        ndcg_values_3.append(ndcg)

print(f"NDCG@3: {sum(ndcg_values_3)/query_count}")
print(f"NDCG@5: {sum(ndcg_values_5)/query_count}")
print(f"NDCG@10: {sum(ndcg_values_10)/query_count}")

NDCG@3: 0.04040533845683183
NDCG@5: 0.06628311074840784
NDCG@10: 0.09602397468168575
