# 使用AutoGen、LangChian、RAG以及函数调用构建超级对话系统

## 1、安装环境及所需要的包

In [None]:
!pip install langchain , "pyautogen[retrievechat]" , PyPDF2 , faiss-gpu

## 2、导入相关包

In [None]:
import autogen
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import OpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter

## 3、配置AutoGen和API密钥

In [None]:
# AutoGen的配置文件是config_list
config_list = [    
    {        
        "model": "gpt-4-1106-preview",        
        "api_key": "openai_api",    
    }
    ]

llm_config_proxy = {    
    "seed": 42,  # change the seed for different trials    
    "temperature": 0,    
    "config_list": config_list,    
    "request_timeout": 600
    }

## 4、读取PDF文件
- 上传一个PDF文件并进行处理，使用PyPDF2读取PDF文件；
- 使用langchain中的text splitter将文本分割成chunk；
- 使用OpenAIEmbeddings嵌入PDF文件，然后FAISS存储在向量数据库中；
- Faiss可以将文本chunk转换为embedding。然后，这些向量可以用于各种应用，如相似性搜索。

In [None]:
reader = PdfReader('/content/openchat.pdf')
corpus = ''.join([p.extract_text() for p in reader.pages if p.extract_text()])  # 将读取的PDF文件转换为文本

splitter =  RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200,)  # 将文本分割为1000个字符的块，每个块之间有200个字符的重叠
chunks = splitter.split_text(corpus)

embeddings = OpenAIEmbeddings(openai_api_key = openai_api)
vectors = FAISS.from_texts(chunks, embeddings)  

## 5、会话检索
- 使用Langchain的ConversationalRetrievalChain对用户的Prompt进行相似性搜索；
- let call ConversationBufferMemory是一个简单的内存缓冲区，用于存储会话的历史记录。

In [None]:
qa = ConversationalRetrievalChain.from_llm(
    OpenAI(temperature=0),    
    vectors.as_retriever(),      # 使用FAISS向量存储库作为检索器
    memory=ConversationBufferMemory(memory_key="chat_history",     
                                    return_messages=True),  # 使用ConversationBufferMemory作为内存
    )

## 6、指定Assistant代理配置
AutoGen Agent支持对OpenAI模型的函数调用，但我们需要使用以下代码段指定函数：

In [None]:
llm_config_assistant = {    
    "Seed" : 42,    
    "temperature": 0,        
    "functions": [        
        {            
        "name": "answer_PDF_question",            
        "description": "Answer any PDF related questions",            
        "parameters": {                
            "type": "object",                
            "properties": {                    
                "question": {                        
                    "type": "string",                        
                    "description": "The question to ask in relation to PDF",                    
                    }                
                },                
                "required": ["question"],            
            },                    
        }    
    ],
    "config_list": config_list,   
    "timeout": 120, 
}

## 7、配置Assistant Agent
创建一个名为“assistant”的具有特定配置的自动化助理代理。我们使用该assistant阅读PDF并生成准确的答案。

In [None]:
assistant = autogen.AssistantAgent(
    name="assistant",            
    llm_config=llm_config_assistant,            
    system_message="""
    You are a helpful assistant, Answer the question                               
    based on the context. Keep the answer accurate.                               
    Respond "Unsure about answer" if not sure about                               
    the answer.
    """                    
    )

## 8、配置UserProxy代理
User Proxy代理包括一个独特的功能：function_map参数，此参数用于将函数调用的配置与实际函数本身链接起来，确保无缝集成和操作

In [None]:
user_proxy = autogen.UserProxyAgent(              
    name="user_proxy",            
    human_input_mode="NEVER",             
    max_consecutive_auto_reply=10,            
    code_execution_config={"work_dir": "coding"},            
    # llm_config_assistant = llm_config_assistant,            
    function_map={                
        "answer_PDF_question": answer_PDF_question            
        }        
    )

一旦设置了代理，该脚本就会启动用户和聊天机器人之间的对话。这是通过调用user_proxy对象上的initiate_chat方法来完成的。initiate_chat方法需要两个参数：充当聊天机器人的assistant实例和描述任务的文本消息。

In [None]:
user_proxy.initiate_chat(
    assistant,    
    message="""
    Write a Openchat word blog post titled why openchat better than GPT3 that uses the exact keyword OpenChat 
    at least once every 100 words. The blog post should include an introduction, main body, and conclusion. 
    The conclusion should invite readers to leave a comment. The main body should be split into at least 4 
    different subsections.
    """
    )