# Retrieval Results Analysis

The goal of this notebok is to investigate the quality of the retrieval when using RAG and the Datamodels re-ranking  
To be able to analyse this we will see how each method ranks the golden documents for 50 test samples and how different the ranking were to similar and different performance situations  
Finally, we will further oberserve the special scenarios where the baseline performed well but neither the methods acheived the expected answer

## Setup

In [1]:
import polars as pl
import seaborn as sns
import matplotlib.pyplot as plt
import json
import os
import torch

In [2]:
## Load wiki
wiki = pl.read_ipc("../../data/wiki_dump2018_nq_open/processed/wiki.feather")
wiki.head()

text,title
str,str
"""Aaron Aaron ( or ; ""Ahärôn"") i…","""Aaron"""
"""God at Sinai granted Aaron the…","""Aaron"""
"""his rod turn into a snake. The…","""Aaron"""
"""however, Aaron and Hur remaine…","""Aaron"""
"""Aaron and his sons to the prie…","""Aaron"""


In [33]:
## Read test df
test_df  = pl.read_ipc("previews/50_test.feather").with_row_index("idx")
test_df.head()

idx,example_id,question,answers,text,idx_gold_in_corpus
u32,i64,str,list[str],str,i64
0,-5266947057771511513,"""who was the first woman to win…","[""Judith Cynthia Aline Keppel""]","""Judith Cynthia Aline Keppel (b…",21033303
1,-7256500086799415081,"""who carried florida by 537 vot…","[""George W. Bush"", ""Bush""]","""United States presidential ele…",20988218
2,7104554727863075462,"""who's the original singer of h…","[""Kris Kristofferson""]","""'' Help Me Make It Through The…",20972472
3,-5172636572480122331,"""when did the subway open in ne…","[""October 27 , 1904"", ""1904""]","""The New York City Subway is a …",20995376
4,-7121343730302515106,"""when did the passion of the ch…","[""2004"", ""February 25 , 2004""]","""The Passion of the Christ ( al…",21033680


In [4]:
## Example of gold passage based on the last sample showed in the previous cell
wiki[21033680]

text,title
str,str
"""The Passion of the Christ ( al…","""The Passion of the Christ"""


In [45]:
#### Load generations results
PREFIX_PATH = "previews"

previews = {
    "0": "preview_50_L2",
    "1": "preview_50_IP",
    "2": "new_prompt_preview_50_cosine",
    "3": "preview_50_L2_k8",
    "4": "new_prompt_preview_50_L2"
}

dfs = []

for f in os.listdir("previews_results"):

    _preview_num = f.split("_")[1]


    if f.endswith("datamodels.feather"):
        retrieval = "datamodels"
        type = previews[_preview_num]

    elif f.endswith("baseline.feather"):
        retrieval = "baseline"
        type = "baseline"

    else:
        retrieval = "rag"
        type = previews[_preview_num]

    _df = pl.read_ipc(f"previews_results/{f}")
    _df = _df.with_columns(pl.lit(retrieval).alias("retrieval"))
    _df = _df.with_columns(pl.lit(type).alias("type"))
    dfs.append(_df)

results = pl.concat(dfs).filter(pl.col("type") != "baseline")

In [46]:
results

idx,mean,max,metric,retrieval,type
i64,f64,f64,str,str,str
0,0.0,0.0,"""rouge_l""","""rag""","""new_prompt_preview_50_L2"""
1,0.8,1.0,"""rouge_l""","""rag""","""new_prompt_preview_50_L2"""
2,0.0,0.0,"""rouge_l""","""rag""","""new_prompt_preview_50_L2"""
3,0.066667,0.333333,"""rouge_l""","""rag""","""new_prompt_preview_50_L2"""
4,0.057143,0.285714,"""rouge_l""","""rag""","""new_prompt_preview_50_L2"""
…,…,…,…,…,…
45,0.0,0.0,"""squad_v2_best_exact""","""rag""","""preview_50_L2_k8"""
46,0.0,0.0,"""squad_v2_best_exact""","""rag""","""preview_50_L2_k8"""
47,0.0,0.0,"""squad_v2_best_exact""","""rag""","""preview_50_L2_k8"""
48,1.0,1.0,"""squad_v2_best_exact""","""rag""","""preview_50_L2_k8"""


In [47]:
results = results.join(test_df.select(["idx", "idx_gold_in_corpus"]), on="idx", how="left").rename({"idx_gold_in_corpus": "gold"})

In [48]:
retrievals = {
    "idx": [],
    "retrieval": [],
    "type": [],
    "docs": []
}



for dir in os.listdir("previews"):
    if dir in previews.values():

        print(dir)
        docs = json.load(open(f"previews/{dir}/retrieval/rag_retrieval_indexes.json", "r"))

        if dir == "preview_50_L2_k8":
            for _key in docs.keys():

                retrievals["idx"].append(int(_key))
                retrievals["retrieval"].append("rag")
                retrievals["type"].append(dir)
                retrievals["docs"].append(docs[_key][:8])
        
        else:

            for _key in docs.keys():

                retrievals["idx"].append(int(_key))
                retrievals["retrieval"].append("rag")
                retrievals["type"].append(dir)
                retrievals["docs"].append(docs[_key][:4])


        
        for f in os.listdir(f"previews/{dir}/retrieval"):

            if f.endswith("retrieval_indexes.json") and not f.startswith("rag"):

                if dir == "preview_50_L2_k8":
                    docs = json.load(open(f, "r"))
                    for _key in docs.keys():

                        retrievals["idx"].append(int(_key))
                        retrievals["retrieval"].append("datamodels")
                        retrievals["type"].append(dir)
                        retrievals["docs"].append(docs[_key][:8])

                else:
                    docs = json.load(open(f, "r"))
                    for _key in docs.keys():

                        retrievals["idx"].append(int(_key))
                        retrievals["retrieval"].append("datamodels")
                        retrievals["type"].append(dir)
                        retrievals["docs"].append(docs[_key][:4])

df_retrievals = pl.DataFrame(retrievals)
results = results.join(df_retrievals, right_on=["idx", "retrieval", "type"], left_on=["idx", "retrieval", "type"], how="left")

preview_50_IP
preview_50_L2
preview_50_L2_k8
new_prompt_preview_50_cosine
new_prompt_preview_50_L2


In [None]:
results.filter(pl.col("type") == "preview_50_L2_k8")

idx,mean,max,metric,retrieval,type,gold,docs
i64,f64,f64,str,str,str,i64,list[i64]
0,0.666667,0.666667,"""rouge_l""","""datamodels""","""preview_50_L2_k8""",21033303,
1,0.8,1.0,"""rouge_l""","""datamodels""","""preview_50_L2_k8""",20988218,
2,0.2,1.0,"""rouge_l""","""datamodels""","""preview_50_L2_k8""",20972472,
3,1.0,1.0,"""rouge_l""","""datamodels""","""preview_50_L2_k8""",20995376,
4,0.8,1.0,"""rouge_l""","""datamodels""","""preview_50_L2_k8""",21033680,
…,…,…,…,…,…,…,…
45,0.0,0.0,"""squad_v2_best_exact""","""rag""","""preview_50_L2_k8""",20999656,"[15127581, 16988707, … 3117532]"
46,0.0,0.0,"""squad_v2_best_exact""","""rag""","""preview_50_L2_k8""",21034797,"[16185470, 17861892, … 13842891]"
47,0.0,0.0,"""squad_v2_best_exact""","""rag""","""preview_50_L2_k8""",21033869,"[20073387, 446950, … 15594583]"
48,1.0,1.0,"""squad_v2_best_exact""","""rag""","""preview_50_L2_k8""",21034377,"[9800132, 5350486, … 3075070]"
