In [1]:
import os
from lightrag import LightRAG, QueryParam
from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete

#########
# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
# import nest_asyncio
# nest_asyncio.apply()
#########

WORKING_DIR = r"C:\Users\Terry_Xu\Desktop\LightRAG_HotpotQA"


if not os.path.exists(WORKING_DIR):
    os.mkdir(WORKING_DIR)

rag = LightRAG(
    working_dir=WORKING_DIR,
    llm_model_func=gpt_4o_mini_complete  # Use gpt_4o_mini_complete LLM model
)

  from .autonotebook import tqdm as notebook_tqdm
INFO:lightrag:Load KV json_doc_status_storage with 0 data
INFO:lightrag:Load KV llm_response_cache with 0 data
INFO:lightrag:Load KV full_docs with 1 data
INFO:lightrag:Load KV text_chunks with 1729 data
INFO:lightrag:Loaded graph from C:\Users\Terry_Xu\Desktop\LightRAG_HotpotQA\graph_chunk_entity_relation.graphml with 37799 nodes, 15142 edges
INFO:nano-vectordb:Load (36668, 1536) data
INFO:nano-vectordb:Init {'embedding_dim': 1536, 'metric': 'cosine', 'storage_file': 'C:\\Users\\Terry_Xu\\Desktop\\LightRAG_HotpotQA\\vdb_entities.json'} 36668 data
INFO:nano-vectordb:Load (15142, 1536) data
INFO:nano-vectordb:Init {'embedding_dim': 1536, 'metric': 'cosine', 'storage_file': 'C:\\Users\\Terry_Xu\\Desktop\\LightRAG_HotpotQA\\vdb_relationships.json'} 15142 data
INFO:nano-vectordb:Load (1729, 1536) data
INFO:nano-vectordb:Init {'embedding_dim': 1536, 'metric': 'cosine', 'storage_file': 'C:\\Users\\Terry_Xu\\Desktop\\LightRAG_HotpotQA\\vdb_chu

In [2]:
import nest_asyncio
nest_asyncio.apply()
import tiktoken
def count_tokens(text: str, model: str = "gpt-4") -> int:
    """
    Count the number of tokens in a text string using tiktoken
    
    Args:
        text (str): The text to count tokens for
        model (str): The model to use for tokenization (default: "gpt-4")
        
    Returns:
        int: Number of tokens in the text
    """
    try:
        encoding = tiktoken.encoding_for_model(model)
    except KeyError:
        # Fallback to cl100k_base encoding if model not found
        encoding = tiktoken.get_encoding("cl100k_base")
        
    tokens = encoding.encode(text)
    return len(tokens)

In [3]:
# context,retrieval_context=rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid",with_retrieval_context=True))
# print(context)
# print(count_tokens(retrieval_context))


INFO:lightrag:Local query uses 15 entites, 2 relations, 1 text units
INFO:openai._base_client:Retrying request to /embeddings in 0.464971 seconds
INFO:lightrag:Global query uses 30 entites, 15 relations, 1 text units


The story of "Maggie, A Girl of the Streets" by Stephen Crane revolves around several prominent themes that are central to its narrative. Here are the key themes present in the story:

1. **Poverty and Social Realism**: One of the most significant themes is the impact of poverty on individuals and families. The narrative depicts the harsh realities faced by the characters living in the Bowery, highlighting how economic conditions can lead to desperation and tragic outcomes.

2. **Isolation and Loneliness**: Maggie experiences profound loneliness stemming from her impoverished upbringing. The theme of solitude is intertwined with her relationships, or lack thereof, reflecting how her environment contributes to her isolation.

3. **Gender and Femininity**: The story explores the challenges and societal expectations placed on women. Maggie's struggles in a male-dominated world illustrate the limited options available to women, particularly those from impoverished backgrounds.

4. **The In

In [4]:
import pandas as pd
musique_df=pd.read_parquet(r"D:\github\TGRAG_eval\HotpotQA\hotpotqa_question_answer_type_200.parquet")
musique_df.head()


Unnamed: 0,question,answer,type
1116,Out to Win is an American documentary film tha...,Houston Rockets,bridge
1368,Are both Variety and The Advocate LGBT-interes...,no,comparison
422,"Who is this American rapper, songwriter, recor...",Lil' Kim,bridge
413,In which year was this American country music ...,2000,bridge
451,What is the nationality of the actor who starr...,Scottish,bridge


In [None]:
import time
from tqdm import tqdm

# Initialize lists to store retrieved contexts and responses
retrieved_contexts = []
responses = []
processing_time = []
retrieval_tokens = []

# Iterate over the first 175 samples of the musique_data dataframe
for index in tqdm(range(len(musique_df)), desc="Processing samples"):
    question = musique_df.iloc[index]['question']  # Get the question from the dataframe
    start_time = time.time()  # Start timing

    # Get the answer from TGRAG_search
    response, retrieved_context = rag.query(question, param=QueryParam(mode="hybrid",with_retrieval_context=True))

    # Calculate processing time
    elapsed_time = time.time() - start_time

    # Append the retrieved context and response to the lists
    retrieved_contexts.append(retrieved_context)
    responses.append(response)
    processing_time.append(elapsed_time)
    retrieval_tokens.append(count_tokens(retrieved_context))

# Add the retrieved contexts and responses to the musique_data dataframe
musique_df['retrieved_context'] = retrieved_contexts
musique_df['response'] = responses
musique_df['processing_time'] = processing_time
musique_df['retrieval_tokens'] = retrieval_tokens


In [7]:
output_file_path = r"C:\Users\Terry_Xu\Desktop\LightRAG_HotpotQA\output\LightRAG_hotpotqa_200sample_responses2.parquet"
musique_df.to_parquet(output_file_path, index=False)


In [8]:
average_processing_time = musique_df['processing_time'].mean()
print(f"Average processing time: {average_processing_time:.2f} seconds")

Average processing time: 5.58 seconds


In [9]:
from openai import OpenAI
from tqdm import tqdm  # Ensure tqdm is imported

# Initialize OpenAI client
client = OpenAI()

def get_gpt4_response(answer, response):
    try:
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are a helpful assistant. Please evaluate if the response matches the reference answer."},
                {"role": "user", "content": f"Instructions\nYou will receive a ground truth answer (referred to as Answer) and a model-generated answer (referred to as Response). Your task is to compare the two and determine whether they align.\n\nNote: The ground truth answer may sometimes be embedded within the model-generated answer. You need to carefully analyze and discern whether they align.\nYour Output:\nIf the two answers align, respond with yes.\nIf they do not align, respond with no.\nIf you are very uncertain, respond with unclear.\nYour response should first include yes, no, or unclear, followed by an explanation.\n\nExample 1\nAnswer: Houston Rockets\nResponse: The basketball player who was drafted 18th overall in 2001 is Jason Collins, who was selected by the Houston Rockets.\nExpeted output: yes\n\nExample 2\nAnswer: no\nResponse: Yes, both Variety and The Advocate are LGBT-interest magazines. The Advocate is explicitly identified as an American LGBT-interest magazine, while Variety, although primarily known for its coverage of the entertainment industry, also addresses topics relevant to the LGBT community.\n Expected output: no\n\nInput Data Format\nGround Truth Answer: {answer}\nModel Generated Answer: {response}\n\nExpected Output\nyes, no, or unclear\nAn explanation of your choice.\n\nOutput:"}
            ],
            temperature=0,
            max_tokens=1000
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"Error getting GPT-4 response: {e}")
        return str(e)

# Wrap the loop with tqdm to monitor progress
for idx, row in tqdm(musique_df.iterrows(), total=len(musique_df), desc="Processing evaluations"):
    
    # Get evaluation from GPT-4
    evaluation = get_gpt4_response(row['answer'], row['response'])
    
    # Update dataframe with evaluation
    musique_df.at[idx, 'gpt4_evaluation'] = evaluation
    


Processing evaluations: 100%|██████████| 200/200 [07:58<00:00,  2.39s/it]


In [10]:
output_file_path = r"C:\Users\Terry_Xu\Desktop\LightRAG_HotpotQA\output\LightRAG_hotpotqa_200sample_responses_with_gpt4_evaluation2.parquet"
musique_df.to_parquet(output_file_path, index=False)
musique_df['evaluation_result'] = musique_df['gpt4_evaluation'].apply(lambda x: 1 if 'yes' in x.lower() else 0)

# Calculate the percentage of 1s
percentage_yes = (musique_df['evaluation_result'].sum() / len(musique_df)) * 100

print(f"Percentage of evaluations starting with 'yes': {percentage_yes:.2f}%")

Percentage of evaluations starting with 'yes': 79.00%


In [11]:
musique_df['retrieval_tokens'].describe()

count      200.000000
mean      7176.735000
std        572.546006
min       5390.000000
25%       6945.750000
50%       7197.500000
75%       7495.500000
max      10087.000000
Name: retrieval_tokens, dtype: float64