In [10]:
import os
from lightrag import LightRAG, QueryParam
from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete

#########
# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
# import nest_asyncio
# nest_asyncio.apply()
#########

WORKING_DIR = r"C:\Users\Terry_Xu\Desktop\LightRAG_MultiHopRAG"


if not os.path.exists(WORKING_DIR):
    os.mkdir(WORKING_DIR)

rag = LightRAG(
    working_dir=WORKING_DIR,
    llm_model_func=gpt_4o_mini_complete  # Use gpt_4o_mini_complete LLM model
)

INFO:lightrag:Logger initialized for working directory: C:\Users\Terry_Xu\Desktop\LightRAG_MultiHopRAG
INFO:lightrag:Load KV json_doc_status_storage with 0 data
INFO:lightrag:Load KV llm_response_cache with 0 data


INFO:lightrag:Load KV full_docs with 1 data
INFO:lightrag:Load KV text_chunks with 1266 data
INFO:lightrag:Loaded graph from C:\Users\Terry_Xu\Desktop\LightRAG_MultiHopRAG\graph_chunk_entity_relation.graphml with 19481 nodes, 11123 edges
INFO:nano-vectordb:Load (19237, 1536) data
INFO:nano-vectordb:Init {'embedding_dim': 1536, 'metric': 'cosine', 'storage_file': 'C:\\Users\\Terry_Xu\\Desktop\\LightRAG_MultiHopRAG\\vdb_entities.json'} 19237 data
INFO:nano-vectordb:Load (11123, 1536) data
INFO:nano-vectordb:Init {'embedding_dim': 1536, 'metric': 'cosine', 'storage_file': 'C:\\Users\\Terry_Xu\\Desktop\\LightRAG_MultiHopRAG\\vdb_relationships.json'} 11123 data
INFO:nano-vectordb:Load (1266, 1536) data
INFO:nano-vectordb:Init {'embedding_dim': 1536, 'metric': 'cosine', 'storage_file': 'C:\\Users\\Terry_Xu\\Desktop\\LightRAG_MultiHopRAG\\vdb_chunks.json'} 1266 data
INFO:lightrag:Loaded document status storage with 1 records


In [11]:
import nest_asyncio
nest_asyncio.apply()
import tiktoken
def count_tokens(text: str, model: str = "gpt-4") -> int:
    """
    Count the number of tokens in a text string using tiktoken
    
    Args:
        text (str): The text to count tokens for
        model (str): The model to use for tokenization (default: "gpt-4")
        
    Returns:
        int: Number of tokens in the text
    """
    try:
        encoding = tiktoken.encoding_for_model(model)
    except KeyError:
        # Fallback to cl100k_base encoding if model not found
        encoding = tiktoken.get_encoding("cl100k_base")
        
    tokens = encoding.encode(text)
    return len(tokens)

In [12]:
context,retrieval_context=rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid",with_retrieval_context=True))
print(context)
print(count_tokens(retrieval_context))


INFO:lightrag:kw_prompt result:
INFO:lightrag:Using hybrid mode for query processing


{
  "high_level_keywords": ["Top themes", "Story analysis", "Literary themes"],
  "low_level_keywords": ["Character development", "Plot summary", "Conflict", "Setting", "Symbolism"]
}


INFO:lightrag:Local query uses 15 entites, 4 relations, 1 text units
INFO:lightrag:Global query uses 19 entites, 15 relations, 1 text units


Based on the provided entities and relationships, several prominent themes emerge that might define the various narratives referenced, especially in relation to specific works:

1. **Cultural Experiences**: The interaction between cultures is notably represented in *Venba*, where themes revolve around the immigrant experience and the significance of food in bridging cultures. Similarly, *Humanity* incorporates aspects of societal interactions in a creative gaming format, showcasing how experiences shape narratives.

2. **Systemic Issues**: In *Killers of the Flower Moon*, themes of systemic racism and white supremacy are critically explored. These themes shed light on historic injustices and the continual repercussions faced by marginalized communities, highlighting the importance of historical context in contemporary discussions about race relations.

3. **Identity and Independence**: The film *Poor Things* engages with themes of identity, particularly through a reanimated woman’s jou

In [3]:
import pandas as pd
musique_df=pd.read_parquet(r"D:\github\TGRAG_eval\MultiHop-RAG\MultiHopRAG_175_sampled.parquet")
musique_df.head()


Unnamed: 0,query,answer,question_type
1266,Did Polygon recommend Nintendo Switch games be...,Yes,temporal_query
1749,After The Independent - Life and Style's repor...,Yes,temporal_query
2050,Who is the individual facing a criminal trial ...,Sam Bankman-Fried,inference_query
393,"Who is the individual that, despite not being ...",Sam Bankman-Fried,inference_query
1544,Who is the individual that persuaded Adam Yedi...,Sam Bankman-Fried,inference_query


In [13]:
import time
from tqdm import tqdm

# Initialize lists to store retrieved contexts and responses
retrieved_contexts = []
responses = []
processing_time = []
retrieval_tokens = []

# Iterate over the first 175 samples of the musique_data dataframe
for index in tqdm(range(len(musique_df)), desc="Processing samples"):
    question = musique_df.iloc[index]['query']  # Get the question from the dataframe
    start_time = time.time()  # Start timing

    # Get the answer from TGRAG_search
    response, retrieved_context = rag.query(question, param=QueryParam(mode="hybrid",with_retrieval_context=True))

    # Calculate processing time
    elapsed_time = time.time() - start_time

    # Append the retrieved context and response to the lists
    retrieved_contexts.append(retrieved_context)
    responses.append(response)
    processing_time.append(elapsed_time)
    retrieval_tokens.append(count_tokens(retrieved_context))

# Add the retrieved contexts and responses to the musique_data dataframe
musique_df['retrieved_context'] = retrieved_contexts
musique_df['response'] = responses
musique_df['processing_time'] = processing_time
musique_df['retrieval_tokens'] = retrieval_tokens


In [5]:
output_file_path = r"C:\Users\Terry_Xu\Desktop\LightRAG_MultiHopRAG\output\LightRAG_MultiHopRAG_175sample_responses2.parquet"
musique_df.to_parquet(output_file_path, index=False)


In [6]:
average_processing_time = musique_df['processing_time'].mean()
display(f"Average processing time: {average_processing_time:.2f} seconds")



'Average processing time: 6.85 seconds'

In [7]:
from openai import OpenAI
from tqdm import tqdm  # Ensure tqdm is imported

# Initialize OpenAI client
client = OpenAI()

def get_gpt4_response(answer, response):
    try:
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are a helpful assistant. Please evaluate if the response matches the reference answer."},
                {"role": "user", "content": f"Instructions\nYou will receive a ground truth answer (referred to as Answer) and a model-generated answer (referred to as Response). Your task is to compare the two and determine whether they align.\n\nNote: The ground truth answer may sometimes be embedded within the model-generated answer. You need to carefully analyze and discern whether they align.\nYour Output:\nIf the two answers align, respond with yes.\nIf they do not align, respond with no.\nIf you are very uncertain, respond with unclear.\nYour response should first include yes, no, or unclear, followed by an explanation.\n\nExample 1\nAnswer: Houston Rockets\nResponse: The basketball player who was drafted 18th overall in 2001 is Jason Collins, who was selected by the Houston Rockets.\nExpeted output: yes\n\nExample 2\nAnswer: no\nResponse: Yes, both Variety and The Advocate are LGBT-interest magazines. The Advocate is explicitly identified as an American LGBT-interest magazine, while Variety, although primarily known for its coverage of the entertainment industry, also addresses topics relevant to the LGBT community.\n Expected output: no\n\nInput Data Format\nGround Truth Answer: {answer}\nModel Generated Answer: {response}\n\nExpected Output\nyes, no, or unclear\nAn explanation of your choice.\n\nOutput:"}
            ],
            temperature=0,
            max_tokens=1000
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"Error getting GPT-4 response: {e}")
        return str(e)

# Wrap the loop with tqdm to monitor progress
for idx, row in tqdm(musique_df.iterrows(), total=len(musique_df), desc="Processing evaluations"):
    
    # Get evaluation from GPT-4
    evaluation = get_gpt4_response(row['answer'], row['response'])
    
    # Update dataframe with evaluation
    musique_df.at[idx, 'gpt4_evaluation'] = evaluation
    


Processing evaluations:  73%|███████▎  | 128/175 [05:59<01:27,  1.86s/it]INFO:openai._base_client:Retrying request to /chat/completions in 0.421238 seconds
Processing evaluations: 100%|██████████| 175/175 [07:36<00:00,  2.61s/it]


In [8]:
output_file_path = r"C:\Users\Terry_Xu\Desktop\LightRAG_MultiHopRAG\output\LightRAG_MultiHopRAG_175sample_responses_with_gpt4_evaluation2.parquet"
musique_df.to_parquet(output_file_path, index=False)

In [9]:
musique_df['evaluation_result'] = musique_df['gpt4_evaluation'].apply(lambda x: 1 if 'yes' in x.lower() else 0)

# Calculate the percentage of 1s
percentage_yes = (musique_df['evaluation_result'].sum() / len(musique_df)) * 100

print(f"Percentage of evaluations starting with 'yes': {percentage_yes:.2f}%")

Percentage of evaluations starting with 'yes': 81.14%


In [10]:
musique_df['retrieval_tokens'].describe()

count      175.000000
mean      8920.731429
std       1289.194911
min       5717.000000
25%       8231.500000
50%       9134.000000
75%       9896.500000
max      12006.000000
Name: retrieval_tokens, dtype: float64