# Extrinsic Evaluation
Post - Filter approach:

- we use the sensitivity labels from our intrinsic evaluation 
  
- in the post filter approach we rank the documents according to the  coordinate ascent algorithm optimizing towards normalized
Discounted Cumulative Gain (nDCG)

- for that we use predefined functions from https://github.com/rueycheng/CoordinateAscent/blob/master as the implementation was not mentioned in the paper

-> the general workflow will be the same as in extrinsic_LogisticRegression

In [2]:
import pandas as pd
bert_results = pd.read_csv("sensitivity_predictions_comparison.csv")

In [3]:
judged_df = pd.read_csv("./data/judged.txt", sep="\t", header=None,
                        names=["Query", "Document-UI", "Document-Index", "Relevance1", "Relevance2", "Relevance3"])
# Verify the loaded DataFrame
print(judged_df.head())

   Query  Document-UI  Document-Index Relevance1 Relevance2 Relevance3
0      1     87097544           40626          d        NaN          d
1      1     87153566           11852          n        NaN          n
2      1     87157536           12693          d        NaN        NaN
3      1     87157537           12694          d        NaN        NaN
4      1     87184723           15450          n        NaN        NaN


get the query documents

In [4]:
def parse_queries(file_path):
    """
    Parses query files with the format:
    .I <Query ID>
    .B <Background>
    .W <Query Text>
    """
    query_list = []
    current_query_id = None
    current_query_text = None
    
    with open(file_path, "r") as file:
        for line in file:
            line = line.strip()
            if line.startswith(".I"):
                if current_query_id is not None and current_query_text is not None:
                    query_list.append({"Query": current_query_id, "query_text": current_query_text})
                current_query_id = int(line.split()[1])  # Extract Query ID
                current_query_text = None  # Reset query text
            elif line.startswith(".W"):
                current_query_text = ""  # Initialize query text
            elif current_query_text is not None:
                current_query_text += " " + line  # Append to query text
        
        # Append the last query
        if current_query_id is not None and current_query_text is not None:
            query_list.append({"Query": current_query_id, "query_text": current_query_text.strip()})
    
    return pd.DataFrame(query_list)

# Parse queries from files
queries1 = parse_queries("querys/Queries1.txt")
queries2 = parse_queries("querys/Queries2.txt")

# Combine the queries into a single DataFrame
queries_df = pd.concat([queries1, queries2], ignore_index=True)

# Verify the parsed queries
print(queries_df.head())


   Query                                         query_text
0      1   Are there adverse effects on lipids when prog...
1      2   pathophysiology and treatment of disseminated...
2      3   anticardiolipin and lupus anticoagulants, pat...
3      4                    reviews on subdurals in elderly
4      5   effectiveness of etidronate in treating hyper...


assign relevance labels from the judged.txt

In [5]:
def compute_relevance(row):
    # Count how many of the relevance columns contain 'd' (relevant)
    return sum(1 for val in [row["Relevance1"], row["Relevance2"], row["Relevance3"]] if val == "d")

# Add a total relevance score to judged_df
judged_df["Relevance_total"] = judged_df.apply(compute_relevance, axis=1)

# Keep only the necessary columns
judged_df_cleaned = judged_df[["Query", "Document-Index", "Relevance_total"]].rename(
    columns={"Document-Index": "sequential identifier"}
)

# Verify the processed DataFrame
print(judged_df_cleaned.head())

   Query  sequential identifier  Relevance_total
0      1                  40626                2
1      1                  11852                0
2      1                  12693                1
3      1                  12694                1
4      1                  15450                0


In [6]:
bert_with_relevance = pd.merge(bert_results, judged_df_cleaned, on="sequential identifier", how="left")

# Verify the merged DataFrame
print(bert_with_relevance.head())

   sequential identifier                                     title_abstract  \
0                    126  Prospective study of liver function in childre...   
1                    154  Postpartum thyroiditis--an underdiagnosed dise...   
2                    223  Primary renal actinomycosis in the presence of...   
3                    283  Clinical course of breast cancer patients with...   
4                    300  Cardiac abnormalities in patients with diffuse...   

   actual_sensitivity  predicted_sensitivity  Query  Relevance_total  
0                   0                      0     36                0  
1                   1                      1     76                0  
2                   1                      1      8                0  
3                   0                      0     22                1  
4                   0                      0     40                0  


In [7]:
bert_full = pd.merge(bert_with_relevance, queries_df, on="Query", how="left")

# Verify the final merged DataFrame
print(bert_full.head())

   sequential identifier                                     title_abstract  \
0                    126  Prospective study of liver function in childre...   
1                    126  Prospective study of liver function in childre...   
2                    154  Postpartum thyroiditis--an underdiagnosed dise...   
3                    154  Postpartum thyroiditis--an underdiagnosed dise...   
4                    223  Primary renal actinomycosis in the presence of...   

   actual_sensitivity  predicted_sensitivity  Query  Relevance_total  \
0                   0                      0     36                0   
1                   0                      0     36                0   
2                   1                      1     76                0   
3                   1                      1     76                0   
4                   1                      1      8                0   

                                          query_text  
0   CAN DILANTIN or PHENOBARBITAL CAU

# Post - Filter approach
- calculate bm25 and proximity count accordingly for the analys

In [8]:
from rank_bm25 import BM25Okapi

# Tokenize documents and queries
documents = [doc.split() for doc in bert_full["title_abstract"]]
queries = [query.split() for query in bert_full["query_text"]]

# Initialize BM25 model
bm25 = BM25Okapi(documents)

# Compute BM25 scores
bert_full["bm25_score"] = [bm25.get_scores(query)[i] for i, query in enumerate(queries)]

KeyboardInterrupt: 

In [9]:
def proximity_count(query, doc, window=8):
    """
    Count how many query terms appear together within a specified window size in the document.
    """
    terms = query.split()
    doc_terms = doc.split()
    count = 0
    for i in range(len(doc_terms) - window + 1):
        window_terms = doc_terms[i:i + window]
        if all(term in window_terms for term in terms):
            count += 1
    return count

bert_full["proximity_count"] = bert_full.apply(
    lambda row: proximity_count(row["query_text"], row["title_abstract"]),
    axis=1
)