In [1]:
import pandas as pd
from datetime import datetime
from textwrap import shorten
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
from bson.objectid import ObjectId
from dotenv import load_dotenv
import os
import time
from ast import literal_eval

from LS_AMG_RAG import utils
from tqdm.notebook import tqdm

# Load environment variables from .env file
load_dotenv()

uri = os.getenv("MULTIHOP_RAG_URI")

# Create a new client and connect to the server
client = MongoClient(uri, server_api=ServerApi('1'))

# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

db = client['multihop-rag']
corpus = db['corpus']
metadata = db['metadata']

Pinged your deployment. You successfully connected to MongoDB!


In [None]:
corpus.find_one(filter={
    '_id': ObjectId('6610e0ead4e973cea8c7f04c')
})

In [2]:
queries = pd.read_csv("multi_hop_rag_queries.csv")
queries.head()

Unnamed: 0,query,question_type,answer,evidence_list
0,Who is the individual associated with the cryp...,inference_query,Sam Bankman-Fried,"[{'author': 'Elizabeth Lopatto', 'category': '..."
1,Which individual is implicated in both inflati...,inference_query,Donald Trump,"[{'author': 'Michael R. Sisak, The Associated ..."
2,Who is the figure associated with generative A...,inference_query,Sam Altman,"[{'author': ""Matt O'Brien, The Associated Pres..."
3,Do the TechCrunch article on software companie...,comparison_query,Yes,"[{'author': 'Christine Hall', 'category': 'tec..."
4,Which online betting platform provides a welco...,inference_query,Caesars Sportsbook,"[{'author': 'CBS Sports Staff', 'category': 's..."


In [3]:
print(f"Query: {queries.loc[0,'query']}")
print(f"Question Type: {queries.loc[0,'question_type']}")
print(f"Answer: {queries.loc[0,'answer']}")
print(f"Evidence List: {literal_eval(queries.loc[0,'evidence_list'])}")

Query: Who is the individual associated with the cryptocurrency industry facing a criminal trial on fraud and conspiracy charges, as reported by both The Verge and TechCrunch, and is accused by prosecutors of committing fraud for personal gain?
Question Type: inference_query
Answer: Sam Bankman-Fried
Evidence List: [{'author': 'Elizabeth Lopatto', 'category': 'technology', 'fact': 'Before his fall, Bankman-Fried made himself out to be the Good Boy of crypto — the trustworthy face of a sometimes-shady industry.', 'published_at': '2023-09-28T12:00:00+00:00', 'source': 'The Verge', 'title': 'The FTX trial is bigger than Sam Bankman-Fried', 'url': 'https://www.theverge.com/2023/9/28/23893269/ftx-sam-bankman-fried-trial-evidence-crypto'}, {'author': 'Jacquelyn Melinek', 'category': 'technology', 'fact': 'The highly anticipated criminal trial for Sam Bankman-Fried, former CEO of bankrupt crypto exchange FTX, started Tuesday to determine whether he’s guilty of seven counts of fraud and conspi

In [34]:
queries[queries['answer']=='Sam Bankman-Fried']

Unnamed: 0,query,question_type,answer,evidence_list
0,Who is the individual associated with the cryp...,inference_query,Sam Bankman-Fried,"[{'author': 'Elizabeth Lopatto', 'category': '..."
5,Who is the individual alleged to have built a ...,inference_query,Sam Bankman-Fried,"[{'author': 'Ben Weiss', 'category': 'business..."
61,"Which individual, associated with both the beg...",inference_query,Sam Bankman-Fried,"[{'author': 'Jacquelyn Melinek', 'category': '..."
64,Who is the individual facing a criminal trial ...,inference_query,Sam Bankman-Fried,"[{'author': 'Jacquelyn Melinek', 'category': '..."
78,Who is the individual associated with FTX whos...,inference_query,Sam Bankman-Fried,"[{'author': 'Ben Weiss', 'category': 'business..."
...,...,...,...,...
2504,Which individual is at the center of legal pro...,inference_query,Sam Bankman-Fried,"[{'author': 'Ben Weiss', 'category': 'business..."
2539,Who is the individual implicated in instructin...,inference_query,Sam Bankman-Fried,"[{'author': 'Morgan Little', 'category': 'tech..."
2549,"Who is the individual who, after Judge Lewis K...",inference_query,Sam Bankman-Fried,"[{'author': 'Elizabeth Lopatto', 'category': '..."
2551,Who is the individual associated with using FT...,inference_query,Sam Bankman-Fried,"[{'author': 'Elizabeth Lopatto', 'category': '..."


In [46]:
queries.describe()

Unnamed: 0,query,question_type,answer,evidence_list
count,2556,2556,2556,2556
unique,2556,4,108,1398
top,Who is the individual associated with the cryp...,comparison_query,Yes,[]
freq,1,856,782,301
