In [None]:
import os
import LightRAG, QueryParam
from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete

#########
# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
import nest_asyncio
nest_asyncio.apply()

# Set your OpenAI API key
os.environ["OPENAI_API_KEY"] = "Insert API Key"

#########

## Graph Visualization

### - Version 1

In [4]:
import networkx as nx
from pyvis.network import Network

# Load the GraphML file
G = nx.read_graphml('./llama_3_quant_cs/graph_chunk_entity_relation.graphml')

# Create a Pyvis network
net = Network(notebook=True, height='750px', width='100%')

# Color map for nodes
color_map = {
    'type1': 'darkblue',
    'type2': 'blue',
    'type3': 'green',
    'default': 'gray'
}

# Add nodes with color and size customization
for node in G.nodes(data=True):
    category = node[1].get('category', 'type1')
    color = color_map.get(category, 'darkgrey')
    degree = G.degree(node[0])
    net.add_node(
        node[0], 
        label=node[1].get('label', node[0]), 
        color=color, 
        size=(degree**1/3) + 15
    )

# Add edges with customization
for source, target, edge_data in G.edges(data=True):
    weight = edge_data.get('weight', 1)
    width = 2 if weight < 5 else 5
    net.add_edge(source, target, width=width)

# Set physics options for better layout
net.set_options("""
var options = {
  "physics": {
    "barnesHut": {
      "gravitationalConstant": -20000,
      "centralGravity": 0.3,
      "springLength": 95
    }
  },
  "edges": {
    "smooth": {
      "type": "cubicBezier",
      "forceDirection": "none"
    }
  }
}
""")

# Save and display the network
net.show('knowledge_graph.html')


knowledge_graph.html


### - Version 2

In [1]:
import networkx as nx
from pyvis.network import Network

# Load the GraphML file
G = nx.read_graphml('./gpt_book/graph_chunk_entity_relation.graphml')

# Create a Pyvis network
net = Network(notebook=True)

# Convert NetworkX graph to Pyvis network
net.from_nx(G)

# Save and display the network
net.show('knowledge_graph.html')

knowledge_graph.html


## Auth to HuggingFace

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4"
token = "<Insert Token>"  # Replace with your actual token

# Use token to load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=token, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=token, trust_remote_code=True)

print("Successfully loaded the model.")

In [None]:
import openai
print(f"OpenAI library version: {openai.__version__}")

In [None]:
import requests
import os

## Process Input Document Files and Generate Knowledge Graphs

In [None]:
import torch
import os
import requests
import json
import time
import re
import psutil
import GPUtil
import os
from lightrag import LightRAG, QueryParam
from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete
os.environ["OPENAI_API_KEY"] = "<insert api>"


#########
# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
import nest_asyncio
nest_asyncio.apply()

# Determine the device to use
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# List of BASE values to represent multiple datasets
BASE_LIST = ["cs", "legal", "mix", "agriculture"]

# Set your Hugging Face token
HF_TOKEN = "<HF_TOKEN>"  # Replace with your actual token

# Initialize LightRAG with Hugging Face model
from lightrag.llm import hf_model_complete, hf_embedding
from transformers import AutoModel, AutoTokenizer
from lightrag.utils import EmbeddingFunc

# Model information
MODEL_NAME = 'hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4'
EMBEDDING_MODEL_NAME = 'sentence-transformers/all-MiniLM-L6-v2'

# Define the function to preprocess the text
def preprocess_text(file_path, output_path, max_lines=100):
    with open(file_path, "r", encoding="utf-8") as infile, open(output_path, "w", encoding="utf-8") as outfile:
        count = 0
        for line in infile:
            # Stop after max_lines lines for faster processing
            if count >= max_lines:
                break
            
            # Remove numbered artifacts like _100, _115, etc.
            cleaned_line = re.sub(r'_\d+\n?', '', line)
            # Replace 3 or more consecutive newlines with 2 newlines
            cleaned_line = re.sub(r'\n{3,}', '\n\n', cleaned_line)
            # Strip leading/trailing whitespace
            cleaned_line = cleaned_line.strip()
            # Write the cleaned line to the output file
            outfile.write(cleaned_line + "\n")
            
            count += 1


def clean_text(text):
    # Replace escaped newlines (\\n) with actual newlines
    cleaned_text = text.replace('\\n', '\n')
    # Replace multiple newlines with a maximum of two
    cleaned_text = re.sub(r'\\n{2,}', '\n\n', cleaned_text)
        # Replace multiple newlines with a maximum of two
    cleaned_text = re.sub(r'\n{2,}', '\n\n', cleaned_text)
    # Strip leading/trailing whitespace
    cleaned_text = cleaned_text.strip()
    return cleaned_text

# Load tokenizer and embedding model, and move the model to GPU
#tokenizer = AutoTokenizer.from_pretrained(EMBEDDING_MODEL_NAME)
#embedding_model = AutoModel.from_pretrained(EMBEDDING_MODEL_NAME).to(device)

# Iterate over each base in the list
for BASE in BASE_LIST:
    # Define paths dynamically using BASE
    WORKING_DIR = f"./gpt_{BASE}"
    LOG_DIR = os.path.join(WORKING_DIR, "logs")
    LOG_FILE = os.path.join(LOG_DIR, f"insertion_log_gpt_{BASE}.txt")
    FILE = f"./{BASE}.jsonl"
    CLEANED_FILE = f"./cleaned_{BASE}.jsonl"

    # Check if the working directory exists, if not, create it
    if not os.path.exists(WORKING_DIR):
        os.makedirs(WORKING_DIR)
        print(f"Created working directory: {WORKING_DIR}")
    else:
        print(f"Working directory already exists: {WORKING_DIR}")

    # Preprocess the text to remove artifacts and store only 100 lines for testing purposes
    try:
        preprocess_text(FILE, CLEANED_FILE)
    except FileNotFoundError:
        print(f"File {FILE} not found. Skipping to the next BASE.")
        continue

    # Initialize LightRAG with Hugging Face model
    rag_gpt = LightRAG(
        working_dir=WORKING_DIR,
        llm_model_func=gpt_4o_mini_complete  # Use gpt_4o_mini_complete LLM model
    )

    
    
    # Start time measurement before processing the file
    start_time = time.time()

    # Open and read the cleaned JSON lines file
    try:
        X = 100 #Read 100 documents; each document is a line in the input file
        with open(FILE, encoding="utf-8") as f:
            count = 0  # Initialize line counter
            for line in f:
                # Stop after reading X lines for faster processing
                if count >= X:
                    break
                
                # Parse the JSON line
                data = json.loads(line.strip())

                # Extract fields: input, context, answers, etc.
                input_text = data.get("input", "")
                context = data.get("context", "")

                # Clean up input and context
                input_text = clean_text(input_text)
                context = clean_text(context)

                # Construct the text to be inserted into the knowledge graph
                combined_text = f"Input: {input_text}\nContext: {context}"

                # Insert into LightRAG
                rag_gpt.insert(combined_text)

                # Increment the line counter
                count += 1
                print("Successfully read a full line")
                print(count)

        # End time measurement after processing all lines
        end_time = time.time()

        # Calculate and log the elapsed time
        elapsed_time = end_time - start_time

        # Collect CPU and Memory usage stats
        cpu_usage = psutil.cpu_percent(interval=1)  # CPU usage percentage
        memory_info = psutil.virtual_memory()  # Memory stats
        memory_usage = memory_info.percent  # Memory usage percentage

        # Collect GPU usage stats (if applicable)
        gpu_status = "N/A"
        #if device == "cuda":
        #    gpus = GPUtil.getGPUs()
        #    if gpus:
        #        gpu = gpus[0]  # Assuming one GPU
        #        gpu_status = f"GPU ID: {gpu.id}, GPU Load: {gpu.load * 100:.2f}%, VRAM Used: {gpu.memoryUsed}MB / {gpu.memoryTotal}MB, Temperature: {gpu.temperature}Â°C"
        
        # Prepare log information
        log_data = f"""
        ==== Data Insertion Log ====
        Date: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}
        Base: {BASE}
        Device Used: {device}
        Working Directory: {WORKING_DIR}
        Model Name: {MODEL_NAME}
        Embedding Model Name: {EMBEDDING_MODEL_NAME}
        Hugging Face Token Used: {'Provided' if HF_TOKEN else 'Not Provided'}
        Total Time Taken for Insertion: {elapsed_time:.2f} seconds
        Total Number of Entries Processed: {count}
        CPU Usage: {cpu_usage:.2f}%
        Memory Usage: {memory_usage:.2f}%
        ===========================
        """

        # Print log data to console
        print(log_data)

        # Write log data to a file
        with open(LOG_FILE, "a") as log_file:
            log_file.write(log_data)

    except FileNotFoundError:
        print(f"File {CLEANED_FILE} not found. Skipping to the next BASE.")


In [None]:
import json
import os

def log_query_response(base, query, response, log_dir="./query_logs"):
    # Ensure log directory exists
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    # Construct the log file path
    log_file = os.path.join(log_dir, f"{base}_responses.json")

    # Load existing responses (if any)
    if os.path.exists(log_file):
        with open(log_file, "r") as f:
            logged_data = json.load(f)
    else:
        logged_data = {}

    # Append the new response
    logged_data[query] = response

    # Write back to the file
    with open(log_file, "w") as f:
        json.dump(logged_data, f, indent=4)

    print(f"Response for {base} logged successfully.")

In [None]:
import json
import os
from lightrag import LightRAG, QueryParam
import torch
import os
import json
import time
import re
import psutil
#import GPUtil  # For GPU usage stats
import os
from lightrag import LightRAG, QueryParam
from lightrag.llm import gpt_4o_mini_complete, gpt_4o_complete
#from sentence_transformers import SentenceTransformer

# Define the list of knowledge graph domains
BASE_LIST = ["cs", "legal", "mix", "agriculture"]

# Function to query the knowledge graph
def query_knowledge_graph(rag_model, query, graph_name):
    print(f"Querying with knowledge graph: {graph_name}")
    
    # Configure query parameters
    params = QueryParam(query=query)

    # Perform the query
    response = rag_model.query(params)
    
    # Return the query response
    return response

# Function to log responses
def log_query_response(base, query, model_name, response, log_dir="./query_logs"):
    # Ensure log directory exists
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    # Construct the log file path
    log_file = os.path.join(log_dir, f"{base}_responses.json")

    # Load existing responses (if any)
    if os.path.exists(log_file):
        with open(log_file, "r") as f:
            logged_data = json.load(f)
    else:
        logged_data = {}

    # Add response under the model name
    if query not in logged_data:
        logged_data[query] = {}
    logged_data[query][model_name] = response

    # Write back to the file
    with open(log_file, "w") as f:
        json.dump(logged_data, f, indent=4)

    print(f"Response for {base} query '{query}' logged successfully.")

# Define queries and user profiles
users = {
    "agriculture": "Agricultural Researcher",
    "cs": "Data Scientist",
    "legal": "Corporate Legal Advisor",
    "mix": "Literary Scholar"
}

queries = {
    "agriculture": [
        "What are the best practices for hive management?",
        "What are the most common challenges faced by beekeepers when managing hive health?",
        "How do crop production practices influence bee populations and hive productivity?",
        "What methods are most effective for disease prevention in hive management?",
        "What role does honey extraction and straining play in ensuring product quality and sustainability?",
        "How can agricultural practices be adapted to mitigate the impact of climate change on pollination?"
    ],
    "cs": [
        "Explain recommendation systems in machine learning.",
        "What are the key features of Spark Streaming that make it suitable for real-time analytics?",
        "How do classification algorithms differ when applied to recommendation systems versus real-time analytics?",
        "What challenges arise when processing large-scale data sets for recommendation systems, and how are they addressed?",
        "How can Spark's architecture optimize the handling of streaming data in real-time scenarios?",
        "What are the trade-offs between batch processing and real-time analytics in data science applications?"
    ],
    "legal": [
        "What is the primary purpose of a Restructuring Support Agreement, and how is it typically negotiated?",
        "What legal considerations are critical during corporate governance restructuring?",
        "How does regulatory compliance vary across industries during restructuring?",
        "What are the potential risks and mitigations in legal agreements related to restructuring?",
        "How do financial sector regulations influence corporate restructuring processes?"
    ],
    "mix": [
        "What is the historical impact of skepticism in philosophy?",
        "What is the main objection Mary has to the poem 'The Witch of Atlas,' and how does it reflect her literary perspective?",
        "How do the philosophical themes in 'The Witch of Atlas' relate to broader historical trends in literature?",
        "What cultural influences are evident in the poem 'The Witch of Atlas,' and how do they shape its interpretation?",
        "How do objections to specific works contribute to literary debates on artistic merit and intention?",
        "What role does biographical context play in interpreting objections to 'The Witch of Atlas'?"
    ]
}


# Iterate over each domain and each model
models = {
    "llama_3_quant": gpt_4o_mini_complete,
    "gpt": gpt_4o_mini_complete # Replace with the actual GPT model function
}

for BASE in BASE_LIST:
    for model_name, model_func in models.items():
        # Define working directory for the current knowledge graph
        WORKING_DIR = f"./{model_name}_{BASE}"
        
        # Initialize the LightRAG model with the corresponding knowledge graph
        #embedding_func = get_embedding_function(WORKING_DIR)
        rag_model = LightRAG(working_dir=WORKING_DIR,
                             llm_model_func=model_func)
        
        # Perform each query for the domain
        for query in queries[BASE]:
            response = query_knowledge_graph(rag_model, query, BASE)
            print(f"Query: {query}\nResponse ({model_name}): {response}\n")
            
            # Log the response
            log_query_response(BASE, query, model_name, response)


In [None]:
import requests
import os

# OpenAI API Key
api_key = os.getenv("OPENAI_API_KEY")

if not api_key:
    raise ValueError("OpenAI API key not found. Please set the OPENAI_API_KEY environment variable.")

# Function to call the API for evaluation
def evaluate_responses(query, answer1, answer2):
    headers = {
        'Content-Type': 'application/json',
        'Authorization': f'Bearer {api_key}',
    }

    # Construct evaluation prompt
    instruction_prompt = """
    You are an expert tasked with evaluating two answers to the same question based on three criteria: Comprehensiveness, Diversity, and Empowerment.
    For each criterion, give a grade (1 to 5), choose the better answer, and explain why. Then, select an overall winner based on these criteria.
    """
    input_prompt = f"""
    Here is the question: {query}

    Here are the two answers:
    Answer 1: {answer1}
    Answer 2: {answer2}

    Evaluate both answers using the criteria and provide detailed explanations for each.
    Output your evaluation in the following JSON format:
    {{
      "Comprehensiveness": {{"Winner": "[Answer 1 or Answer 2]", "Explanation": "[Explanation]", "Grades": [1, 2]}},
      "Diversity": {{"Winner": "[Answer 1 or Answer 2]", "Explanation": "[Explanation]", "Grades": [1, 2]}},
      "Empowerment": {{"Winner": "[Answer 1 or Answer 2]", "Explanation": "[Explanation]", "Grades": [1, 2]}},
      "Overall Winner": {{"Winner": "[Answer 1 or Answer 2]", "Explanation": "[Summary]", "Grades": [1, 2]}}
    }}
    """

    # API request payload
    data = {
        "model": "gpt-4o",
        "messages": [
            {"role": "system", "content": instruction_prompt},
            {"role": "user", "content": input_prompt}
        ],
    }

    response = requests.post('https://api.openai.com/v1/chat/completions', headers=headers, json=data)

    # Check response
    if response.status_code == 200:
        evaluation = response.json()['choices'][0]['message']['content']
        return evaluation
    else:
        raise ValueError(f"API request failed with status code {response.status_code}: {response.text}")
f

In [None]:
import networkx as nx

# Function to calculate graph metrics
def calculate_graph_metrics(graph_path):
    # Load the graph
    G = nx.read_graphml(graph_path)
    
    # Calculate metrics
    num_nodes = G.number_of_nodes()
    num_edges = G.number_of_edges()
    avg_degree = sum(dict(G.degree()).values()) / num_nodes
    connected_components = nx.number_connected_components(G)
    
    # Return metrics
    return {
        "num_nodes": num_nodes,
        "num_edges": num_edges,
        "avg_degree": avg_degree,
        "connected_components": connected_components
    }
BASE_LIST = ["cs", "legal", "mix", "agriculture"]
# Compare graphs for each domain
graph_metrics = {}
for BASE in BASE_LIST:
    for model in ["llama_3_quant", "gpt"]:
        graph_path = f"./{model}_{BASE}/graph_chunk_entity_relation.graphml"
        try:
            metrics = calculate_graph_metrics(graph_path)
            graph_metrics[f"{model}_{BASE}"] = metrics
            print(f"Metrics for {model}_{BASE}: {metrics}")
        except FileNotFoundError:
            print(f"Graph file not found for {model}_{BASE}")

# Example output visualization
import pandas as pd
import matplotlib.pyplot as plt

# Create a DataFrame from the metrics dictionary
metrics_df = pd.DataFrame.from_dict(graph_metrics, orient='index')
metrics_df.index.name = 'Model_Domain'

# Display metrics as a table
print(metrics_df)

# Visualize the metrics
metrics_df.plot(kind='bar', figsize=(12, 6), subplots=True, layout=(2, 2), title="Graph Metrics")
plt.tight_layout()
plt.show()


In [None]:
import requests
import os

# OpenAI API Key
api_key = os.getenv("OPENAI_API_KEY")

if not api_key:
    raise ValueError("OpenAI API key not found. Please set the OPENAI_API_KEY environment variable.")

# Function to call the API for evaluation
def evaluate_responses(query, answer1, answer2):
    headers = {
        'Content-Type': 'application/json',
        'Authorization': f'Bearer {api_key}',
    }

    # Construct evaluation prompt
    instruction_prompt = """
    You are an expert tasked with evaluating two answers to the same question based on three criteria: Comprehensiveness, Diversity, and Empowerment.
    For each criterion, give a grade (1 to 5), choose the better answer, and explain why. Then, select an overall winner based on these criteria.
    """
    input_prompt = f"""
    Here is the question: {query}

    Here are the two answers:
    Answer 1: {answer1}
    Answer 2: {answer2}

    Evaluate both answers using the criteria and provide detailed explanations for each.
    Output your evaluation in the following JSON format:
    {{
      "Comprehensiveness": {{"Winner": "[Answer 1 or Answer 2]", "Explanation": "[Explanation]", "Grades": [1, 2]}},
      "Diversity": {{"Winner": "[Answer 1 or Answer 2]", "Explanation": "[Explanation]", "Grades": [1, 2]}},
      "Empowerment": {{"Winner": "[Answer 1 or Answer 2]", "Explanation": "[Explanation]", "Grades": [1, 2]}},
      "Overall Winner": {{"Winner": "[Answer 1 or Answer 2]", "Explanation": "[Summary]", "Grades": [1, 2]}}
    }}
    """

    # API request payload
    data = {
        "model": "gpt-4o",
        "messages": [
            {"role": "system", "content": instruction_prompt},
            {"role": "user", "content": input_prompt}
        ],
    }

    response = requests.post('https://api.openai.com/v1/chat/completions', headers=headers, json=data)

    # Check response
    if response.status_code == 200:
        evaluation = response.json()['choices'][0]['message']['content']
        return evaluation
    else:
        raise ValueError(f"API request failed with status code {response.status_code}: {response.text}")


In [None]:
import os
import pandas as pd
from lightrag import LightRAG, QueryParam
from transformers import AutoTokenizer, AutoModel
from lightrag import EmbeddingFunc

# Queries to be used for each knowledge graph domain
queries = {
    "agriculture": [
        "What are the best practices for hive management?",
        "What are the most common challenges faced by beekeepers when managing hive health?",
        "How do crop production practices influence bee populations and hive productivity?",
        "What methods are most effective for disease prevention in hive management?",
        "What role does honey extraction and straining play in ensuring product quality and sustainability?",
        "How can agricultural practices be adapted to mitigate the impact of climate change on pollination?"
    ],
    "cs": [
        "Explain recommendation systems in machine learning.",
        "What are the key features of Spark Streaming that make it suitable for real-time analytics?",
        "How do classification algorithms differ when applied to recommendation systems versus real-time analytics?",
        "What challenges arise when processing large-scale data sets for recommendation systems, and how are they addressed?",
        "How can Spark's architecture optimize the handling of streaming data in real-time scenarios?",
        "What are the trade-offs between batch processing and real-time analytics in data science applications?"
    ],
    "legal": [
        "What is the primary purpose of a Restructuring Support Agreement, and how is it typically negotiated?",
        "What legal considerations are critical during corporate governance restructuring?",
        "How does regulatory compliance vary across industries during restructuring?",
        "What are the potential risks and mitigations in legal agreements related to restructuring?",
        "How do financial sector regulations influence corporate restructuring processes?"
    ],
    "mix": [
        "What is the historical impact of skepticism in philosophy?",
        "What is the main objection Mary has to the poem 'The Witch of Atlas,' and how does it reflect her literary perspective?",
        "How do the philosophical themes in 'The Witch of Atlas' relate to broader historical trends in literature?",
        "What cultural influences are evident in the poem 'The Witch of Atlas,' and how do they shape its interpretation?",
        "How do objections to specific works contribute to literary debates on artistic merit and intention?",
        "What role does biographical context play in interpreting objections to 'The Witch of Atlas'?"
    ]
}

# Models to be used
models = {
    "llama_3_quant": {
        "llm_model_func": hf_model_complete,
        "llm_model_name": 'hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4',
        "embedding_func": EmbeddingFunc(
            embedding_dim=384,
            max_token_size=5000,
            func=lambda texts: AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")(
                **AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")(texts, return_tensors='pt', padding=True)
            )
        )
    },
    "gpt": {
        "llm_model_func": gpt_4o_mini_complete,
    }
}

# Function to initialize LightRAG model for a given domain and model
def initialize_model(model_name, domain):
    working_dir = f"./{model_name}_{domain}"
    model_details = models[model_name]

    # For Llama, additional parameters are passed
    if model_name == "llama_3_quant":
        return LightRAG(
            working_dir=working_dir,
            llm_model_func=model_details["llm_model_func"],
            llm_model_name=model_details["llm_model_name"],
            embedding_func=model_details["embedding_func"]
        )
    # For GPT, it's simpler
    else:
        return LightRAG(
            working_dir=working_dir,
            llm_model_func=model_details["llm_model_func"]
        )

# DataFrame to store the results
columns = ["domain", "query", "answer_gpt", "answer_llama"]
responses_df = pd.DataFrame(columns=columns)

# Iterate through each domain and query both models
for base, query_list in queries.items():
    # Initialize models for the given domain
    model_gpt = initialize_model("gpt", base)
    model_llama = initialize_model("llama_3_quant", base)

    # Perform each query for the current models and domain
    for query in query_list:
        # Query GPT model
        response_gpt = model_gpt.query(QueryParam(query=query))
        
        # Query Llama model
        response_llama = ""
        #response_llama = model_llama.query(QueryParam(query=query))

        # Append the response to the DataFrame
        responses_df = responses_df.append({
            "domain": base,
            "query": query,
            "answer_gpt": response_gpt,
            "answer_llama": response_llama
        }, ignore_index=True)

# Save the DataFrame to a CSV file for easy comparison
output_file = "./responses_comparison.csv"
responses_df.to_csv(output_file, index=False)

print(f"All queries executed and responses saved successfully to {output_file}.")


## Iterate over list of domains for GPT Model and log responses

In [None]:
import os
import pandas as pd
from lightrag import LightRAG, QueryParam

# Queries to be used for each knowledge graph domain
queries = {
    "agriculture": [
        "What are the best practices for hive management?",
        "What are the most common challenges faced by beekeepers when managing hive health?",
        "How do crop production practices influence bee populations and hive productivity?",
        "What methods are most effective for disease prevention in hive management?",
        "What role does honey extraction and straining play in ensuring product quality and sustainability?",
        "How can agricultural practices be adapted to mitigate the impact of climate change on pollination?"
    ],
    "cs": [
        "Explain recommendation systems in machine learning.",
        "What are the key features of Spark Streaming that make it suitable for real-time analytics?",
        "How do classification algorithms differ when applied to recommendation systems versus real-time analytics?",
        "What challenges arise when processing large-scale data sets for recommendation systems, and how are they addressed?",
        "How can Spark's architecture optimize the handling of streaming data in real-time scenarios?",
        "What are the trade-offs between batch processing and real-time analytics in data science applications?"
    ],
    "legal": [
        "What is the primary purpose of a Restructuring Support Agreement, and how is it typically negotiated?",
        "What legal considerations are critical during corporate governance restructuring?",
        "How does regulatory compliance vary across industries during restructuring?",
        "What are the potential risks and mitigations in legal agreements related to restructuring?",
        "How do financial sector regulations influence corporate restructuring processes?"
    ],
    "mix": [
        "What is the historical impact of skepticism in philosophy?",
        "What is the main objection Mary has to the poem 'The Witch of Atlas,' and how does it reflect her literary perspective?",
        "How do the philosophical themes in 'The Witch of Atlas' relate to broader historical trends in literature?",
        "What cultural influences are evident in the poem 'The Witch of Atlas,' and how do they shape its interpretation?",
        "How do objections to specific works contribute to literary debates on artistic merit and intention?",
        "What role does biographical context play in interpreting objections to 'The Witch of Atlas'?"
    ]
}

# Models to be used
gpt_model = {
    "llm_model_func": gpt_4o_mini_complete,
}

# Function to initialize LightRAG model for a given domain and model
def initialize_gpt_model(domain):
    working_dir = f"./gpt_{domain}"
    return LightRAG(
        working_dir=working_dir,
        llm_model_func=gpt_model["llm_model_func"]
    )

# DataFrame to store the results
columns = ["domain", "query", "answer_gpt", "answer_llama"]
responses_df = pd.DataFrame(columns=columns)

# Iterate through each domain and query only the GPT model
for base, query_list in queries.items():
    # Initialize GPT model for the given domain
    model_gpt = initialize_gpt_model(base)

    # Perform each query for the current model and domain
    for query in query_list:
        # Query GPT model
        response_gpt = model_gpt.query(query=query)
        
        # Append the response to the DataFrame
        responses_df = pd.concat([responses_df, pd.DataFrame([{
            "domain": base,
            "query": query,
            "answer_gpt": response_gpt,
            "answer_llama": ""  # Leave Llama responses empty for now
        }])], ignore_index=True)

# Save the DataFrame to a CSV file for easy comparison
output_file = "./responses_comparison.csv"
responses_df.to_csv(output_file, index=False)

print(f"All queries executed and responses saved successfully to {output_file}.")


## Iterate over list of domains for Quantized Model and log responses

In [None]:
import os
import pandas as pd
from lightrag import LightRAG, QueryParam
from transformers import AutoTokenizer, AutoModel
from lightrag import EmbeddingFunc

# Load the DataFrame generated previously
input_file = "./responses_comparison.csv"

responses_df = pd.read_csv(input_file)

# Model to be used for Llama
llama_model = {
    "llm_model_func": hf_model_complete,
    "llm_model_name": 'hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4',
    "embedding_func": EmbeddingFunc(
        embedding_dim=384,
        max_token_size=5000,
        func=lambda texts: AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")(
            **AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")(texts, return_tensors='pt', padding=True)
        )
    )
}

# Function to initialize LightRAG model for a given domain and model
def initialize_llama_model(domain):
    working_dir = f"./llama_3_quant_{domain}"
    return LightRAG(
        working_dir=working_dir,
        llm_model_func=llama_model["llm_model_func"],
        llm_model_name=llama_model["llm_model_name"],
        embedding_func=llama_model["embedding_func"]
    )

# Iterate over the DataFrame and fill in the Llama responses
for index, row in responses_df.iterrows():
    if pd.isna(row['answer_llama']) or row['answer_llama'] == "":
        # Initialize Llama model for the given domain
        model_llama = initialize_llama_model(row['domain'])
        
        # Query Llama model
        response_llama = model_llama.query(query=row['query'])
        
        # Update the DataFrame with the Llama response
        responses_df.at[index, 'answer_llama'] = response_llama

# Save the updated DataFrame to the CSV file
responses_df.to_csv(input_file, index=False)

print(f"All Llama queries executed and responses updated successfully in {input_file}.")


## Evaluation of generated document quality with GPT

In [None]:
import pandas as pd
import json
import openai

# Load the CSV file containing the responses
responses_df = pd.read_csv("./responses_comparison_new.csv")

# Log to save the evaluations
evaluation_log = []

# Function to generate the evaluation prompt
def generate_evaluation_prompt(query, answer1, answer2):
    return f"""
    Evaluation Instruction Prompt

    You are an expert tasked with evaluating two answers to the same question based on four criteria: Comprehensiveness, Diversity, and Empowerment.

    Goal:
    You will evaluate two answers to the same question based on our criteria: Comprehensiveness, Diversity, and Empowerment.

    Comprehensiveness: How much detail does the answer provide to cover all aspects and details of the question?
    Diversity: How varied and rich is the answer in providing different perspectives and insights on the question?
    Empowerment: How well does the answer help the reader understand and make informed judgments about the topic?

    For each criterion, choose the better answer (either Answer 1 or Answer 2) and explain why. Then, select an overall winner based on these three categories and also grade each answer.

    Evaluation Input Prompt

    Here is the question: {query}

    Here are the two answers: Answer 1: {answer1}; Answer 2: {answer2}

    Evaluate both answers using the three criteria listed above and provide detailed explanations for each criterion.

    Output your evaluation in the following JSON format:

    {{
      "Comprehensiveness": {{ "Winner": "[Answer 1 or Answer 2]", "Explanation": "[Provide explanation here]", "Grade1": X, "Grade2": Y }},
      "Diversity": {{ "Winner": "[Answer 1 or Answer 2]", "Explanation": "[Provide explanation here]", "Grade1": X, "Grade2": Y }},
      "Empowerment": {{ "Winner": "[Answer 1 or Answer 2]", "Explanation": "[Provide explanation here]", "Grade1": X, "Grade2": Y }},
      "Overall Winner": {{ "Winner": "[Answer 1 or Answer 2]", "Explanation": "[Summarize why this answer is the overall winner based on the three criteria]", "Grade1": X, "Grade2": Y }}
    }}
    """

# Function to call the evaluation API using OpenAI's latest API
def gpt_evaluate(prompt):
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4",  # or "gpt-3.5-turbo"
            messages=[
                {"role": "system", "content": "You are an expert evaluator."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=1000,  # Increased max_tokens for a detailed response
            temperature=0.7  # Adjust as per your needs for more control or creativity
        )
        return response['choices'][0]['message']['content']
    except Exception as e:
        print(f"Error during OpenAI API call: {str(e)}")
        return None

# Iterate over each row in the DataFrame and evaluate the responses
for index, row in responses_df.iterrows():
    query = row['query']
    answer_gpt = row['answer_gpt']
    answer_llama = row['answer_llama']

    # Generate the evaluation prompt
    evaluation_prompt = generate_evaluation_prompt(query, answer_gpt, answer_llama)

    # Get evaluation response
    evaluation_result = gpt_evaluate(evaluation_prompt)

    # Proceed only if evaluation_result is not None
    if evaluation_result is None:
        continue

    # Convert the JSON string response to a dictionary
    try:
        evaluation_data = json.loads(evaluation_result)
    except json.JSONDecodeError:
        print(f"Error decoding JSON for query at index {index}: {evaluation_result}")
        continue

    # Add domain, query, and evaluation data to the log
    log_entry = {
        "domain": row['domain'],
        "query": query,
        "evaluation": evaluation_data
    }
    evaluation_log.append(log_entry)

# Convert the evaluation log into a structured DataFrame for summary
evaluation_summary = []
for entry in evaluation_log:
    domain = entry['domain']
    query = entry['query']
    evaluation = entry['evaluation']

    # Extract grades and overall winner
    gpt_grade = evaluation['Overall Winner']['Grade1']
    llama_grade = evaluation['Overall Winner']['Grade2']
    overall_winner = evaluation['Overall Winner']['Winner']

    # Add to summary list
    evaluation_summary.append({
        "domain": domain,
        "query": query,
        "gpt_grade": gpt_grade,
        "llama_grade": llama_grade,
        "overall_winner": overall_winner
    })

# Create DataFrame from evaluation summary
summary_df = pd.DataFrame(evaluation_summary)

# Save the evaluation summary to a CSV file
summary_output_file = "./evaluation_summary.csv"
summary_df.to_csv(summary_output_file, index=False)

# Save detailed log to a JSON file
detailed_log_file = "./evaluation_log.json"
with open(detailed_log_file, "w") as f:
    json.dump(evaluation_log, f, indent=4)

print(f"Evaluation summary saved to {summary_output_file}. Detailed log saved to {detailed


In [None]:
# Example for comparison
responses = {
    "agriculture": {
        "llama_3": "Response from llama_3 for agriculture",
        "gpt": "Response from gpt for agriculture"
    },
    # Add other domain responses...
}

for domain, answers in responses.items():
    query = queries[domain]
    answer1 = answers["llama_3"]
    answer2 = answers["gpt"]

    print(f"Evaluating for domain: {domain}")
    evaluation = evaluate_responses(query, answer1, answer2)
    print(f"Evaluation Results:\n{evaluation}")
