In [None]:
import pandas as pd
import re
import ast
import math

def extract_alias_from_textbook_name(name: str) -> str | None:
    if not isinstance(name, str):
        return None
    m = re.search(r'\("([^"]+)"\)', name)
    return m.group(1).strip() if m else None

def parse_ground_truth_ranges(gt_str: str) -> list[tuple[int,int]]:
    if not isinstance(gt_str, str):
        return []
    # pull out all [a,b] pairs
    pairs = re.findall(r'\[\s*(\d+)\s*,\s*(\d+)\s*\]', gt_str)
    return [(int(a), int(b)) for a, b in pairs]

def range_to_pages(ranges: list[tuple[int,int]]) -> set[int]:
    """
    Expand inclusive ranges to a set of page numbers.
    e.g. [(89,91), (97,99)] -> {89,90,91,97,98,99}
    """
    pages = set()
    for a, b in ranges:
        if a <= b:
            pages.update(range(a, b+1))
        else:
            pages.update(range(b, a+1))
    return pages

def parse_top5_string(s: str) -> list[tuple[str,int]]:
    if not isinstance(s, str):
        return []
    # Grab the inside of parentheses following t5 =
    m = re.search(r't5\s*=\s*\((.*)\)\s*$', s.strip())
    inside = m.group(1) if m else s.strip()

    # Find groups like alias-(numbers)
    groups = re.findall(r'([A-Za-z0-9_]+)\s*-\s*\(\s*([0-9,\s]+)\s*\)', inside)
    results = []
    for alias, nums in groups:
        pages = [int(x) for x in re.findall(r'\d+', nums)]
        for p in pages:
            results.append((alias.strip(), p))
    return results[:5]  # ensure top-5 only

def metrics_at_k(retrieved_pairs: list[tuple[str,int]], gt_alias: str, gt_pages: set[int], k: int = 5):
    topk = retrieved_pairs[:k]
    rel = []
    for alias, page in topk:
        rel.append(1 if (alias == gt_alias and page in gt_pages) else 0)

    # Precision@k
    prec = sum(rel) / max(k, 1)

    # Recall@k (denominator = total number of relevant pages)
    denom = max(len(gt_pages), 1)
    rec = sum(rel) / denom

    # F1@k
    f1 = 0.0 if (prec + rec) == 0 else 2 * prec * rec / (prec + rec)

    # Average Precision (AP)
    running_hits = 0
    ap_sum = 0.0
    for i, r in enumerate(rel, start=1):
        if r == 1:
            running_hits += 1
            ap_sum += running_hits / i
    ap = ap_sum / denom  # standard AP normalization by total relevant

    # Reciprocal Rank (RR)
    rr = 0.0
    for i, r in enumerate(rel, start=1):
        if r == 1:
            rr = 1.0 / i
            break

    return dict(Prec5=prec, Recall5=rec, F15=f1, AvgPrec=ap, RR=rr)

EXCEL_PATH = "Vector Discovery Benchmark 75.xlsx"

df = pd.read_excel(EXCEL_PATH, sheet_name="Sheet1")

COL_TEXTBOOK = "Textbook Name"
COL_GT = "Ground Truth: [] means all pages in sequence"
COL_COS = "Cosine Distance top 5"
COL_DOT = "Dot product top 5"
COL_EUC = "Euclidean distance top 5"
COL_MAN = "Manhattan distance top 5"

# List to store the results for each query
rows = []
for _, row in df.iterrows():
    gt_alias = extract_alias_from_textbook_name(row.get(COL_TEXTBOOK, ""))
    gt_ranges = parse_ground_truth_ranges(row.get(COL_GT, ""))
    gt_pages = range_to_pages(gt_ranges)

    # Parse each similarity's top-5 list as (alias, page) pairs
    cos_pairs = parse_top5_string(str(row.get(COL_COS, "")))
    dot_pairs = parse_top5_string(str(row.get(COL_DOT, "")))
    euc_pairs = parse_top5_string(str(row.get(COL_EUC, "")))
    man_pairs = parse_top5_string(str(row.get(COL_MAN, "")))

    # Calculate the IR metrics for each similarity
    cos_m = metrics_at_k(cos_pairs, gt_alias, gt_pages, k=5)
    dot_m = metrics_at_k(dot_pairs, gt_alias, gt_pages, k=5)
    euc_m = metrics_at_k(euc_pairs, gt_alias, gt_pages, k=5)
    man_m = metrics_at_k(man_pairs, gt_alias, gt_pages, k=5)

    # Append the results for each query to the rows list
    rows.append({
        "Query ID": row.get("Query ID"),
        "Cosine_Prec@5": cos_m["Prec5"],   "Cosine_Recall@5": cos_m["Recall5"], "Cosine_F1@5": cos_m["F15"],
        "Cosine_AvgPrec": cos_m["AvgPrec"], "Cosine_RR": cos_m["RR"],

        "Dot_Prec@5": dot_m["Prec5"],      "Dot_Recall@5": dot_m["Recall5"],    "Dot_F1@5": dot_m["F15"],
        "Dot_AvgPrec": dot_m["AvgPrec"],   "Dot_RR": dot_m["RR"],

        "Euclidean_Prec@5": euc_m["Prec5"],   "Euclidean_Recall@5": euc_m["Recall5"], "Euclidean_F1@5": euc_m["F15"],
        "Euclidean_AvgPrec": euc_m["AvgPrec"], "Euclidean_RR": euc_m["RR"],

        "Manhattan_Prec@5": man_m["Prec5"],   "Manhattan_Recall@5": man_m["Recall5"], "Manhattan_F1@5": man_m["F15"],
        "Manhattan_AvgPrec": man_m["AvgPrec"], "Manhattan_RR": man_m["RR"],
    })

# Create a DataFrame from the rows list
per_query = pd.DataFrame(rows)

# Calculate averages over all queries
avg_row = {"Query ID": "AVERAGE"}
for col in per_query.columns:
    if col == "Query ID":
        continue
    avg_row[col] = per_query[col].mean()

# Append the average row to the DataFrame
summary = pd.concat([per_query, pd.DataFrame([avg_row])], ignore_index=True)

# Create a final summary table
final_summary = pd.DataFrame({
    "Metric": ["Prec@5", "Recall@5", "F1@5", "Avg Prec", "Recip Rank"],
    "Cosine": [
        summary.loc[summary["Query ID"]=="AVERAGE","Cosine_Prec@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Cosine_Recall@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Cosine_F1@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Cosine_AvgPrec"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Cosine_RR"].item(),
    ],
    "Dot": [
        summary.loc[summary["Query ID"]=="AVERAGE","Dot_Prec@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Dot_Recall@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Dot_F1@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Dot_AvgPrec"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Dot_RR"].item(),
    ],
    "Euclidean": [
        summary.loc[summary["Query ID"]=="AVERAGE","Euclidean_Prec@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Euclidean_Recall@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Euclidean_F1@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Euclidean_AvgPrec"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Euclidean_RR"].item(),
    ],
    "Manh": [
        summary.loc[summary["Query ID"]=="AVERAGE","Manhattan_Prec@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Manhattan_Recall@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Manhattan_F1@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Manhattan_AvgPrec"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Manhattan_RR"].item(),
    ],
})

# Print the final values
print(final_summary)


       Metric    Cosine       Dot  Euclidean      Manh
0      Prec@5  0.514667  0.496000   0.234667  0.418667
1    Recall@5  0.281228  0.272390   0.125755  0.218421
2        F1@5  0.353943  0.341985   0.160339  0.284768
3    Avg Prec  0.238683  0.233784   0.098797  0.184127
4  Recip Rank  0.801111  0.787778   0.418222  0.738889


In [None]:
#multi page queries
import pandas as pd
import re
import ast
import math


def extract_alias_from_textbook_name(name: str) -> str | None:
    """
    Textbook Name looks like:  Artificial Intelligence a Modern Approach ("aima")
    Return the alias inside the quotes -> 'aima'
    """
    if not isinstance(name, str):
        return None
    m = re.search(r'\("([^"]+)"\)', name)
    return m.group(1).strip() if m else None

def parse_ground_truth_ranges(gt_str: str) -> list[tuple[int,int]]:

    if not isinstance(gt_str, str):
        return []
    # pull out all [a,b] pairs
    pairs = re.findall(r'\[\s*(\d+)\s*,\s*(\d+)\s*\]', gt_str)
    return [(int(a), int(b)) for a, b in pairs]

def range_to_pages(ranges: list[tuple[int,int]]) -> set[int]:

    pages = set()
    for a, b in ranges:
        if a <= b:
            pages.update(range(a, b+1))
        else:
            pages.update(range(b, a+1))
    return pages

def parse_top5_string(s: str) -> list[tuple[str,int]]:

    if not isinstance(s, str):
        return []
    m = re.search(r't5\s*=\s*\((.*)\)\s*$', s.strip())
    inside = m.group(1) if m else s.strip()

    groups = re.findall(r'([A-Za-z0-9_]+)\s*-\s*\(\s*([0-9,\s]+)\s*\)', inside)
    results = []
    for alias, nums in groups:
        pages = [int(x) for x in re.findall(r'\d+', nums)]
        for p in pages:
            results.append((alias.strip(), p))
    return results[:5]


def metrics_at_k(retrieved_pairs: list[tuple[str,int]], gt_alias: str, gt_pages: set[int], k: int = 5):

    topk = retrieved_pairs[:k]
    rel = []
    for alias, page in topk:
        rel.append(1 if (alias == gt_alias and page in gt_pages) else 0)

    # Precision@k
    prec = sum(rel) / max(k, 1)

    # Recall@k (denominator = total number of relevant pages)
    denom = max(len(gt_pages), 1)
    rec = sum(rel) / denom

    # F1@k
    f1 = 0.0 if (prec + rec) == 0 else 2 * prec * rec / (prec + rec)

    # Average Precision (AP)
    running_hits = 0
    ap_sum = 0.0
    for i, r in enumerate(rel, start=1):
        if r == 1:
            running_hits += 1
            ap_sum += running_hits / i
    ap = ap_sum / denom  # standard AP normalization by total relevant

    # Reciprocal Rank (RR)
    rr = 0.0
    for i, r in enumerate(rel, start=1):
        if r == 1:
            rr = 1.0 / i
            break

    return dict(Prec5=prec, Recall5=rec, F15=f1, AvgPrec=ap, RR=rr)


EXCEL_PATH = "Vector Discovery Benchmark 75.xlsx"
df = pd.read_excel(EXCEL_PATH, sheet_name="Sheet1")
# Define the column names in the dataset
COL_TEXTBOOK = "Textbook Name"
COL_GT = "Ground Truth: [] means all pages in sequence"
COL_COS = "Cosine Distance top 5"
COL_DOT = "Dot product top 5"
COL_EUC = "Euclidean distance top 5"
COL_MAN = "Manhattan distance top 5"

df_filtered = df[df["Query ID"].str.startswith("MP-")]

rows = []
for _, row in df_filtered.iterrows():
    gt_alias = extract_alias_from_textbook_name(row.get(COL_TEXTBOOK, ""))
    gt_ranges = parse_ground_truth_ranges(row.get(COL_GT, ""))
    gt_pages = range_to_pages(gt_ranges)

    cos_pairs = parse_top5_string(str(row.get(COL_COS, "")))
    dot_pairs = parse_top5_string(str(row.get(COL_DOT, "")))
    euc_pairs = parse_top5_string(str(row.get(COL_EUC, "")))
    man_pairs = parse_top5_string(str(row.get(COL_MAN, "")))

    cos_m = metrics_at_k(cos_pairs, gt_alias, gt_pages, k=5)
    dot_m = metrics_at_k(dot_pairs, gt_alias, gt_pages, k=5)
    euc_m = metrics_at_k(euc_pairs, gt_alias, gt_pages, k=5)
    man_m = metrics_at_k(man_pairs, gt_alias, gt_pages, k=5)

    rows.append({
        "Query ID": row.get("Query ID"),
        "Cosine_Prec@5": cos_m["Prec5"],   "Cosine_Recall@5": cos_m["Recall5"], "Cosine_F1@5": cos_m["F15"],
        "Cosine_AvgPrec": cos_m["AvgPrec"], "Cosine_RR": cos_m["RR"],

        "Dot_Prec@5": dot_m["Prec5"],      "Dot_Recall@5": dot_m["Recall5"],    "Dot_F1@5": dot_m["F15"],
        "Dot_AvgPrec": dot_m["AvgPrec"],   "Dot_RR": dot_m["RR"],

        "Euclidean_Prec@5": euc_m["Prec5"],   "Euclidean_Recall@5": euc_m["Recall5"], "Euclidean_F1@5": euc_m["F15"],
        "Euclidean_AvgPrec": euc_m["AvgPrec"], "Euclidean_RR": euc_m["RR"],

        "Manhattan_Prec@5": man_m["Prec5"],   "Manhattan_Recall@5": man_m["Recall5"], "Manhattan_F1@5": man_m["F15"],
        "Manhattan_AvgPrec": man_m["AvgPrec"], "Manhattan_RR": man_m["RR"],
    })

per_query = pd.DataFrame(rows)

avg_row = {"Query ID": "AVERAGE"}
for col in per_query.columns:
    if col == "Query ID":
        continue
    avg_row[col] = per_query[col].mean()

# Append the average row to the DataFrame
summary = pd.concat([per_query, pd.DataFrame([avg_row])], ignore_index=True)

# Create a final summary table
final_summary_multipage = pd.DataFrame({
    "Metric": ["Prec@5", "Recall@5", "F1@5", "Avg Prec", "Recip Rank"],
    "Cosine": [
        summary.loc[summary["Query ID"]=="AVERAGE","Cosine_Prec@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Cosine_Recall@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Cosine_F1@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Cosine_AvgPrec"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Cosine_RR"].item(),
    ],
    "Dot": [
        summary.loc[summary["Query ID"]=="AVERAGE","Dot_Prec@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Dot_Recall@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Dot_F1@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Dot_AvgPrec"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Dot_RR"].item(),
    ],
    "Euclidean": [
        summary.loc[summary["Query ID"]=="AVERAGE","Euclidean_Prec@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Euclidean_Recall@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Euclidean_F1@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Euclidean_AvgPrec"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Euclidean_RR"].item(),
    ],
    "Manh": [
        summary.loc[summary["Query ID"]=="AVERAGE","Manhattan_Prec@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Manhattan_Recall@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Manhattan_F1@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Manhattan_AvgPrec"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Manhattan_RR"].item(),
    ],
})

# Print the final values
print(final_summary_multipage)


       Metric    Cosine       Dot  Euclidean      Manh
0      Prec@5  0.457143  0.442857   0.157143  0.342857
1    Recall@5  0.239042  0.231106   0.069706  0.176786
2        F1@5  0.306394  0.296190   0.093250  0.232598
3    Avg Prec  0.176450  0.169156   0.037479  0.138313
4  Recip Rank  0.690476  0.654762   0.246429  0.571429


In [None]:
#conceptual queries
import pandas as pd
import re
import ast
import math


def extract_alias_from_textbook_name(name: str) -> str | None:

    if not isinstance(name, str):
        return None
    m = re.search(r'\("([^"]+)"\)', name)
    return m.group(1).strip() if m else None

def parse_ground_truth_ranges(gt_str: str) -> list[tuple[int,int]]:

    if not isinstance(gt_str, str):
        return []
    # pull out all [a,b] pairs
    pairs = re.findall(r'\[\s*(\d+)\s*,\s*(\d+)\s*\]', gt_str)
    return [(int(a), int(b)) for a, b in pairs]

def range_to_pages(ranges: list[tuple[int,int]]) -> set[int]:

    pages = set()
    for a, b in ranges:
        if a <= b:
            pages.update(range(a, b+1))
        else:
            pages.update(range(b, a+1))
    return pages

def parse_top5_string(s: str) -> list[tuple[str,int]]:

    if not isinstance(s, str):
        return []
    m = re.search(r't5\s*=\s*\((.*)\)\s*$', s.strip())
    inside = m.group(1) if m else s.strip()

    groups = re.findall(r'([A-Za-z0-9_]+)\s*-\s*\(\s*([0-9,\s]+)\s*\)', inside)
    results = []
    for alias, nums in groups:
        pages = [int(x) for x in re.findall(r'\d+', nums)]
        for p in pages:
            results.append((alias.strip(), p))
    return results[:5]


def metrics_at_k(retrieved_pairs: list[tuple[str,int]], gt_alias: str, gt_pages: set[int], k: int = 5):

    topk = retrieved_pairs[:k]
    rel = []
    for alias, page in topk:
        rel.append(1 if (alias == gt_alias and page in gt_pages) else 0)

    # Precision@k
    prec = sum(rel) / max(k, 1)

    # Recall@k (denominator = total number of relevant pages)
    denom = max(len(gt_pages), 1)
    rec = sum(rel) / denom

    # F1@k
    f1 = 0.0 if (prec + rec) == 0 else 2 * prec * rec / (prec + rec)

    # Average Precision (AP)
    running_hits = 0
    ap_sum = 0.0
    for i, r in enumerate(rel, start=1):
        if r == 1:
            running_hits += 1
            ap_sum += running_hits / i
    ap = ap_sum / denom  # standard AP normalization by total relevant

    # Reciprocal Rank (RR)
    rr = 0.0
    for i, r in enumerate(rel, start=1):
        if r == 1:
            rr = 1.0 / i
            break

    return dict(Prec5=prec, Recall5=rec, F15=f1, AvgPrec=ap, RR=rr)


EXCEL_PATH = "Vector Discovery Benchmark 75.xlsx"
# Read the Excel file into a DataFrame
df = pd.read_excel(EXCEL_PATH, sheet_name="Sheet1")
COL_TEXTBOOK = "Textbook Name"
COL_GT = "Ground Truth: [] means all pages in sequence"
COL_COS = "Cosine Distance top 5"
COL_DOT = "Dot product top 5"
COL_EUC = "Euclidean distance top 5"
COL_MAN = "Manhattan distance top 5"

df_filtered = df[df["Query ID"].str.startswith("CP-")]

rows = []
for _, row in df_filtered.iterrows():
    gt_alias = extract_alias_from_textbook_name(row.get(COL_TEXTBOOK, ""))
    gt_ranges = parse_ground_truth_ranges(row.get(COL_GT, ""))
    gt_pages = range_to_pages(gt_ranges)

    cos_pairs = parse_top5_string(str(row.get(COL_COS, "")))
    dot_pairs = parse_top5_string(str(row.get(COL_DOT, "")))
    euc_pairs = parse_top5_string(str(row.get(COL_EUC, "")))
    man_pairs = parse_top5_string(str(row.get(COL_MAN, "")))

    cos_m = metrics_at_k(cos_pairs, gt_alias, gt_pages, k=5)
    dot_m = metrics_at_k(dot_pairs, gt_alias, gt_pages, k=5)
    euc_m = metrics_at_k(euc_pairs, gt_alias, gt_pages, k=5)
    man_m = metrics_at_k(man_pairs, gt_alias, gt_pages, k=5)

    rows.append({
        "Query ID": row.get("Query ID"),
        "Cosine_Prec@5": cos_m["Prec5"],   "Cosine_Recall@5": cos_m["Recall5"], "Cosine_F1@5": cos_m["F15"],
        "Cosine_AvgPrec": cos_m["AvgPrec"], "Cosine_RR": cos_m["RR"],

        "Dot_Prec@5": dot_m["Prec5"],      "Dot_Recall@5": dot_m["Recall5"],    "Dot_F1@5": dot_m["F15"],
        "Dot_AvgPrec": dot_m["AvgPrec"],   "Dot_RR": dot_m["RR"],

        "Euclidean_Prec@5": euc_m["Prec5"],   "Euclidean_Recall@5": euc_m["Recall5"], "Euclidean_F1@5": euc_m["F15"],
        "Euclidean_AvgPrec": euc_m["AvgPrec"], "Euclidean_RR": euc_m["RR"],

        "Manhattan_Prec@5": man_m["Prec5"],   "Manhattan_Recall@5": man_m["Recall5"], "Manhattan_F1@5": man_m["F15"],
        "Manhattan_AvgPrec": man_m["AvgPrec"], "Manhattan_RR": man_m["RR"],
    })

per_query = pd.DataFrame(rows)

avg_row = {"Query ID": "AVERAGE"}
for col in per_query.columns:
    if col == "Query ID":
        continue
    avg_row[col] = per_query[col].mean()

# Append the average row to the DataFrame
summary = pd.concat([per_query, pd.DataFrame([avg_row])], ignore_index=True)

# Create a final summary table
final_summary_conceptual = pd.DataFrame({
    "Metric": ["Prec@5", "Recall@5", "F1@5", "Avg Prec", "Recip Rank"],
    "Cosine": [
        summary.loc[summary["Query ID"]=="AVERAGE","Cosine_Prec@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Cosine_Recall@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Cosine_F1@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Cosine_AvgPrec"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Cosine_RR"].item(),
    ],
    "Dot": [
        summary.loc[summary["Query ID"]=="AVERAGE","Dot_Prec@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Dot_Recall@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Dot_F1@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Dot_AvgPrec"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Dot_RR"].item(),
    ],
    "Euclidean": [
        summary.loc[summary["Query ID"]=="AVERAGE","Euclidean_Prec@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Euclidean_Recall@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Euclidean_F1@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Euclidean_AvgPrec"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Euclidean_RR"].item(),
    ],
    "Manh": [
        summary.loc[summary["Query ID"]=="AVERAGE","Manhattan_Prec@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Manhattan_Recall@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Manhattan_F1@5"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Manhattan_AvgPrec"].item(),
        summary.loc[summary["Query ID"]=="AVERAGE","Manhattan_RR"].item(),
    ],
})

# Print the final values
print(final_summary_conceptual)


       Metric    Cosine       Dot  Euclidean      Manh
0      Prec@5  0.461538  0.400000   0.184615  0.323077
1    Recall@5  0.226597  0.198625   0.087891  0.157056
2        F1@5  0.303449  0.264988   0.118895  0.210958
3    Avg Prec  0.182949  0.164806   0.059937  0.126731
4  Recip Rank  0.692308  0.653846   0.358974  0.673077
