In [9]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
from llama_index.retrievers import BM25Retriever
from llama_index.llms import OpenAI
from llama_index import (
    VectorStoreIndex,
    ServiceContext,
    StorageContext,
    SimpleDirectoryReader,
)

documents = SimpleDirectoryReader("../data/").load_data() 

llm = OpenAI(model="gpt-3.5-turbo")
service_context = ServiceContext.from_defaults(chunk_size=256, llm=llm)
nodes = service_context.node_parser.get_nodes_from_documents(documents)
storage_context = StorageContext.from_defaults()
storage_context.docstore.add_documents(nodes)

index = VectorStoreIndex(
    nodes, storage_context=storage_context, service_context=service_context
)

vector_retriever = index.as_retriever(similarity_top_k=10)
bm25_retriever = BM25Retriever.from_defaults(nodes=nodes, similarity_top_k=10)

from llama_index.retrievers import BaseRetriever
class HybridRetriever(BaseRetriever):
    def __init__(self, vector_retriever, bm25_retriever):
        self.vector_retriever = vector_retriever
        self.bm25_retriever = bm25_retriever

    def _retrieve(self, query, **kwargs):
        bm25_nodes = self.bm25_retriever.retrieve(query, **kwargs)
        vector_nodes = self.vector_retriever.retrieve(query, **kwargs)

        # combine the two lists of nodes
        all_nodes = []
        node_ids = set()
        for n in bm25_nodes + vector_nodes:
            if n.node.node_id not in node_ids:
                all_nodes.append(n)
                node_ids.add(n.node.node_id)
        return all_nodes


In [4]:
from llama_index.indices.postprocessor import SentenceTransformerRerank
from llama_index import QueryBundle
from llama_index.schema import NodeWithScore
from typing import List


reranker = SentenceTransformerRerank(top_n=4, model="BAAI/bge-reranker-base")

def query_engine(query) -> List[NodeWithScore]:
    # hybrid_retriever = HybridRetriever(vector_retriever, bm25_retriever)
    # response = hybrid_retriever.retrieve(query)
    hybrid_retriever = HybridRetriever(vector_retriever, bm25_retriever)
    nodes = hybrid_retriever.retrieve(query)
    reranked_nodes = reranker.postprocess_nodes(
        nodes,
        query_bundle=QueryBundle(query),
    )

    return [node.text for node in reranked_nodes]

In [5]:
# testing engine
query_engine("What is the introduction?") 

['Preprint. Under review.\nOriginal Prompt LongLLMLingua\nCompressed PromptInstruction: Answer the question based \non the given passages. Only give me the \nanswer and do not output any other \nwords. The following are given \npassages.\nDocument 1: Alberic  III of Dammartin\nAlberic  III of Dammartin  (Aubry de \nDammartin ) (c.\u20091138 – 19 September \n1200) was a French count and son of \nAlberic  II, Count of Dammartin , and \nClé mence  de Bar, daughter of Reginald \nI, Count of Bar…\nDocument 2:\n…\nDocument N: Pope Agapetus II\nPope Agapetus II (died 8 November 955) \nwas the bishop of Rome and ruler of the \nPapal States from 10 May 946 to his \ndeath.',
 ':ivalent What is to hold the lens the the star:\nWhy toason\na for behavior , or that the accepted of:ivalent of Perg What religion What country you the\nWhat does V:: Where I a goodboard for:: buyies on the the the: areter cookiespped with\ncres: theoe thated ofasticitations , as ‘ the rules to “: the three What do for an

In [6]:
import os
from autogen.agentchat import AssistantAgent

config_list = [
    {
        "model": "gpt-3.5-turbo",
        "api_key": os.getenv("OPENAI_API_KEY"),
    }
]

query_llm_config = {
    "functions": [
        {
            "name": "query_engine",
            "description": "An engine that answers queries on an scientific paper",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "Use a detailed plain text sentence command as the input to the tool.",
                    }
                },
                "required": ["query"],
            },
        }
    ],
    "config_list": config_list,
    "request_timeout": 120,
    "seed": 1,
    "temperature": 0.5,
}

llm_config = {
    "config_list": config_list,
    "request_timeout": 120,
    "seed": 2,
    "temperature": 0.5,
}

paper_names = ['LongLLMLingua: ACCELERATING AND ENHANCING LLMS IN LONG CONTEXT SCENARIOS VIA PROMPT COMPRESSION']
interviewee = AssistantAgent(
    name="interviewee",
    system_message=f"You are on a podcast show called 'Paper of the Day', being interviewed by the interviewer. To access your brain on the knowledge of the scientific paper, you should call the query_engine function to help you understand the paper. The papers you are talking about today are: {paper_names}. Your should always use the information from the query engine in your reply. Remember your interviewer and your audience has no domain knowledge so you need to keep the response simple and easy to digest. Keep your answer short, and more conversational.",
    llm_config=query_llm_config,
    max_consecutive_auto_reply=10,
)


interviewer = AssistantAgent(
    name="interviewer",
    system_message="You are the interviewer of today's show 'Paper of the Day', you will interview the interviewee on the scientific paper of the day. Your objective is to interview the interviewee to help the audience understand the content of the paper better. You do not know anything about the paper, make sure to keep asking questions in each response.",
    llm_config=query_llm_config,
    max_consecutive_auto_reply=10,
)

# register the functions
interviewee.register_function(
    function_map={
        "query_engine": query_engine,
    }
)

# register the functions
interviewer.register_function(
    function_map={
        "query_engine": query_engine,
    }
)


In [7]:
interviewee.initiate_chat(
    interviewer,
    message="Thank you for having me on the show, what do you want to know about?",
)

[33minterviewee[0m (to interviewer):

Thank you for having me on the show, what do you want to know about?

--------------------------------------------------------------------------------
[33minterviewer[0m (to interviewee):

Thank you for being here! Today, I would like to discuss a scientific paper. Could you please provide me with the title or a brief summary of the paper you would like to discuss?

--------------------------------------------------------------------------------
[33minterviewee[0m (to interviewer):

Sure! The paper I would like to discuss is titled "LongLLMLingua: ACCELERATING AND ENHANCING LLMS IN LONG CONTEXT SCENARIOS VIA PROMPT COMPRESSION". This paper focuses on improving the performance of language models in long context scenarios by using a technique called prompt compression.

--------------------------------------------------------------------------------
[33minterviewer[0m (to interviewee):

That sounds like an interesting paper! To start our disc

InvalidRequestError: This model's maximum context length is 4097 tokens. However, your messages resulted in 4877 tokens (4823 in the messages, 54 in the functions). Please reduce the length of the messages or functions.