In [None]:
# Install autogen-agentchat
!pip install autogen-agentchat~=0.2
!pip install ratelimit
!pip install tiktoken
# Import libraries
import os
import json
import tiktoken
from autogen import ConversableAgent
from ratelimit import limits, sleep_and_retry
from concurrent.futures import ThreadPoolExecutor

# Set up OpenAI API key
os.environ["OPENAI_API_KEY"] = "place_holder" # Replace with your actual API key

# QPS and concurrency limits
MAX_QPS = 2  # 每秒最多请求数
MAX_CONCURRENT_REQUESTS = 2  # 最大并发请求数

# Token limit for different models
DEFAULT_TOKEN_LIMIT = 8192  # 默认 Token 限制（8k）
LLAMA_TOKEN_LIMIT = 20480   # Llama 模型 Token 限制（80k）
OPENAI_TOKEN_LIMIT = 20480 # OpenAI GPT-4 Turbo 模型 Token 限制（100k）


# Load data from file
TRAIN_FILE_100 = 'data/v1.0-simplified_nq-dev-all_sample100_seed42.jsonl'
DEV_FILE_100 = 'data/v1.0-simplified_nq-dev-all_sample100_seed42.jsonl'
SINGLE_ENTRY_FILE= 'data/first_entry_sample.jsonl'

# Configure LLM provider
LLM_PROVIDER = "ollama"
OLLAMA_API_BASE = "http://localhost:11434/v1"
MAX_TOKENS = LLAMA_TOKEN_LIMIT if LLM_PROVIDER == "ollama" else OPENAI_TOKEN_LIMIT


In [None]:
def get_llm_config():
    llm_configs = {
        "ollama": {
            "config_list": [
                {
                    "model": "llama3.2:latest",
                    "api_key": "ollama",
                    "base_url": OLLAMA_API_BASE,
                    "temperature": 0.7,
                }
            ]
        },
        "openai": {
            "config_list": [
                {
                    "model": "gpt-4o",
                    "api_key": os.environ.get("OPENAI_API_KEY"),
                    "temperature": 0.7,
                }
            ]
        },
    }

    # check llm config
    if LLM_PROVIDER not in llm_configs:
        raise ValueError(f"Unsupported LLM provider: {LLM_PROVIDER}")

    # return llm config
    return llm_configs[LLM_PROVIDER]


In [None]:
# Data Loading
# Define functions to read and parse JSONL files
def read_first_lines(file_path, num_lines=5):
    with open(file_path, 'r') as file:
        for _ in range(num_lines):
            print(file.readline().strip())

def parse_jsonl(file_path, num_lines=None):
    output = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for idx, line in enumerate(file):
            if num_lines is not None and idx >= num_lines:
                break
            data = json.loads(line.strip())
            output.append(data)
    return output

def list_to_jsonl(data_list, output_file):
    with open(output_file, 'w', encoding='utf-8') as file:
        for item in data_list:
            json_line = json.dumps(item, ensure_ascii=False)
            file.write(json_line + '\n')

# Define helper functions for processing NQ data
def get_nq_tokens(simplified_nq_example):
    """Returns list of blank-separated tokens."""
    if "document_text" not in simplified_nq_example:
        raise ValueError("`get_nq_tokens` should be called on a simplified NQ example that contains the `document_text` field.")
    return simplified_nq_example["document_text"].split(" ")

def get_short_answers(nq_example):
    document_tokens = get_nq_tokens(nq_example)
    short_answers = []
    for annotation in nq_example['annotations']:
        if annotation['short_answers']:
            for short_answer in annotation['short_answers']:
                short_answer_text = ' '.join(
                    document_tokens[short_answer['start_token']:short_answer['end_token']]
                )
                short_answers.append(short_answer_text)
    return short_answers

def strip_end_punctuation(text):
    punctuation = '.!?,;:)"'
    text = text.strip()
    while text and text[-1] in punctuation:
        text = text[:-1].strip()
    return text

json_top100 = parse_jsonl(TRAIN_FILE_100, num_lines=100)
json_top100_dev = parse_jsonl(DEV_FILE_100, num_lines=100)
json_single_entry = parse_jsonl(SINGLE_ENTRY_FILE, num_lines=1)

In [None]:
print(json_single_entry)

In [None]:
target_Data_List = json_single_entry

# Implement RAG by defining a retrieval function
# def retrieve_relevant_data_with_answers(question, data_list, top_k=5):
#     # For simplicity, retrieve entries where the question text contains keywords from the input question
#     question_keywords = set(question.lower().split())
#     relevance_scores = []
#     for item in data_list:
#         item_keywords = set(item['question_text'].lower().split())
#         common_keywords = question_keywords.intersection(item_keywords)
#         score = len(common_keywords)
#         relevance_scores.append((score, item))
#     # Sort based on relevance score
#     relevance_scores.sort(key=lambda x: x[0], reverse=True)
#     # Get top_k relevant data
#     retrieved_data = [item for score, item in relevance_scores if score > 0][:top_k]
#     return retrieved_data

def process_data(data_list):
    # For simplicity, retrieve entries where the question text contains keywords from the input question
    processed_document_list = []
    for item in data_list:
        question_text = item['question_text']
        document_text = item['document_text']
        example_id = item['example_id']
        item_keywords = set(item['question_text'].lower().split())
        processed_item = {
            'question_text': question_text,
            'document_text': document_text,
            'example_id': example_id,
            'item_keywords': item_keywords
        }
        processed_document_list.append(processed_item)
    # Sort based on relevance score
    # relevance_scores.sort(key=lambda x: x[0], reverse=True)
    # Get top_k relevant data
    # retrieved_data = [item for score, item in relevance_scores if score > 0][:top_k]
    return processed_document_list


processed_data = process_data(target_Data_List)
# Example usage of the retrieval function
single_data = processed_data[0]
single_question = processed_data[0]['question_text']
single_retrieved_document = processed_data[0]['document_text']


In [None]:
# Create ExtractContentAgent
def create_extract_content_agent():
    return ConversableAgent(
        name="ExtractContentAgent",
        system_message="""
#Role:
You are an expert text extractor that identifies exact passages from a given context that directly answer or relate to a specific question.

# Instructions

	1.	**Extract Exact Text**: Provide the **exact subsequences** from the context that may **contain or directly answer the question**. The extracted text must be **word-for-word from the original context** without any changes.
	2.	**No Alterations**: Do not paraphrase, summarize, or add any additional information. Avoid introducing personal opinions or external knowledge.
	3.	**Multiple Passages**: If multiple parts of the context are relevant, include all relevant excerpts.
	4.	**Step-by-Step Analysis**: Carefully analyze the context step-by-step to identify all passages that are relevant to the question.
	5.	**Formatting**: Your output must follow the following format.
# Format
## Input Format

[Question]  Question Text
[Context] Document Text

## Output Format
{
    "Relevant_Context": "Relevant Context Place Holder"
}

# Examples
## Example 1
**Input:**
[Question] What is the capital city of Japan?
[Context] Tokyo is the capital city of Japan. It is one of the largest cities in the world.

**Output:**
{
    "Relevant_Context": "Tokyo is the capital city of Japan."
}

## Example 2
**Input:**
[Question] Which element has the atomic number 6?
[Context] Carbon has the atomic number 6 and is essential to all known life forms.

**Output:**
{
    "Relevant_Context": "Carbon has the atomic number 6 and is essential to all known life forms."
}
### Example 3

**Input:**
[Question] Who painted the Mona Lisa?
[Context] Leonardo da Vinci was a Renaissance artist known for masterpieces such as the Mona Lisa and The Last Supper.

**Output:**
{
    "Relevant_Context": "Leonardo da Vinci was a Renaissance artist known for masterpieces such as the Mona Lisa and The Last Supper."
}
""",
        llm_config=get_llm_config(),
        human_input_mode="NEVER",
    )


# Create JudgerAgent
def create_judger_agent():
    return ConversableAgent(
        name="JudgerAgent",
        system_message="""You are a judging agent who evaluates the relevance of extracted passages to the given question. 
Your task is to:
1. Assess each extracted passage and rate its relevance to the question.
2. Choose the top 3 most relevant passages.
3. If only one passage is clearly the best match, return that one passage.

Output Format:
- [Top_Passages] (or the single best passage if applicable).
{
    "Top_Passages": Top_Passages place holder
}
""",
        llm_config=get_llm_config(),
        human_input_mode="NEVER",
    )

In [None]:
def extract_content(agent, chunk, question):
    input_message = f"[Question] {question}\n[Context] {chunk}"
    response = agent.generate_reply(messages=[{"content": input_message, "role": "user"}])
    #return response.get("Relevant_Context", "")
    return response

def judge_relevance(agent, extracted_contents, question):
    input_message = f"Evaluate the relevance of the following passages to the question: {question}\nPassages: {extracted_contents}"
    print("input_message: ", input_message)
    response = agent.generate_reply(messages=[{"content": input_message, "role": "user"}])
    #return response.get("Top_Passages", "")
    return response

# def split_document(document, chunk_size):
#     tokens = document.split()
#     return [' '.join(tokens[i:i + chunk_size]) for i in range(0, len(tokens), chunk_size)]

# split documents into chunks by token limit
def split_document(document, model_name="gpt-4", max_tokens=DEFAULT_TOKEN_LIMIT):
    tokenizer = get_tokenizer(model_name)
    tokens = tokenizer.encode(document)

    # split documents into chunks by token limit
    chunks = []
    start = 0

    while start < len(tokens):
        end = min(start + max_tokens, len(tokens))
        chunk_tokens = tokens[start:end]
        chunk_text = tokenizer.decode(chunk_tokens)
        chunks.append(chunk_text)
        start = end

    return chunks

@sleep_and_retry
@limits(calls=MAX_QPS, period=1)
def extract_content_with_rate_limit(agent, chunk, question):
    return extract_content(agent, chunk, question)

# 获取 OpenAI 的 Tokenizer
def get_tokenizer(model_name="gpt-4"):
    try:
        return tiktoken.encoding_for_model(model_name)
    except Exception as e:
        print(f"Error loading tokenizer for model {model_name}: {e}")
        # 默认使用 GPT-3.5 的 Tokenizer
        return tiktoken.get_encoding("gpt-3.5-turbo")

In [None]:
print(single_data)

In [None]:
def single_document_discussion(data, max_turns: int = 3, model_name: str = "gpt-4o", max_tokens: int = DEFAULT_TOKEN_LIMIT):
    document = data["document_text"]
    question = data["question_text"]
    # Step 1
    chunks = split_document(document, model_name=model_name, max_tokens=max_tokens)
    # Step 2
    extract_agents = [create_extract_content_agent() for _ in range(len(chunks))]

    extracted_contents = []
    with ThreadPoolExecutor(max_workers=MAX_CONCURRENT_REQUESTS) as executor:
        future_to_chunk = {
            executor.submit(extract_content_with_rate_limit, agent, chunk, question): chunk
            for agent, chunk in zip(extract_agents, chunks)
        }

        for future in future_to_chunk:
            try:
                result = future.result()
                # print(result)
                extracted_contents.append(result)
            except Exception as e:
                print(f"Error during extraction: {e}")

    # Step 3: Use JudgerAgent to judge reference
    judger_agent = create_judger_agent()
    # print("!!!!" + "extracted_contents", extracted_contents)
    top_passages = judge_relevance(judger_agent, extracted_contents, question)

    # Step 4: output answer
    if len(top_passages) == 1:
        print("Best Matched Passage:", top_passages[0])
    else:
        print("Top 3 Relevant Passages:" + top_passages)
        for passage in top_passages:
            print("\n\n\n-", passage)

In [None]:
result = single_document_discussion(single_data, max_turns=3, model_name="gpt-4o", max_tokens=OPENAI_TOKEN_LIMIT)
print(result)