In [1]:
!pip install langchain
!pip install openai
!pip install pypdf
!pip install faiss-cpu
!pip install cohere
!pip install tiktoken
!pip install langchainhub
!pip install unstructured_pytesseract
!pip install unstructured_inference
!pip install pinecone-client

Collecting langchain
  Obtaining dependency information for langchain from https://files.pythonhosted.org/packages/f9/a7/b298d2cfa3bb36b5c6edad82fe5681e53014d53850909e22214b2892b7eb/langchain-0.0.336-py3-none-any.whl.metadata
  Downloading langchain-0.0.336-py3-none-any.whl.metadata (16 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain)
  Obtaining dependency information for jsonpatch<2.0,>=1.33 from https://files.pythonhosted.org/packages/73/07/02e16ed01e04a374e644b575638ec7987ae846d25ad97bcc9945a3ee4b0e/jsonpatch-1.33-py2.py3-none-any.whl.metadata
  Downloading jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting langsmith<0.1.0,>=0.0.63 (from langchain)
  Obtaining dependency information for langsmith<0.1.0,>=0.0.63 from https://files.pythonhosted.org/packages/15/9e/e3153e6c94b73ff52ab7feed40801e15ed5a7c1e7f43422703469b4b42e6/langsmith-0.0.64-py3-none-any.whl.metadata
  Downloading langsmith-0.0.64-py3-none-any.whl.metadata (10 kB)
Downloading langchain-0.0.336-py3-non

In [3]:
from langchain.document_loaders import UnstructuredPDFLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.chains import RetrievalQA
from langchain.agents import initialize_agent, Tool
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain.agents.agent_types import AgentType
from langchain.memory import ConversationBufferMemory
from langchain.prompts import MessagesPlaceholder
from langchain.chat_models import ChatOpenAI

import pinecone
import os

In [21]:
os.environ["PINECONE_KEY"] = "PINECONE_API_KEY"
os.environ["OPENAI_API_KEY"] = "OPENAI_API_KEY"
os.environ["SERPAPI_API_KEY"] = "SERPAPI_API_KEY"
os.environ["PINECONE_INDEX_NAME"] = "PINECONE_INDEX_NAME"

In [5]:
embeddings = OpenAIEmbeddings()

In [9]:
llm = ChatOpenAI(model_name="gpt-4-0613", temperature=0)

In [13]:
memory = ConversationBufferMemory(memory_key="memory", return_messages=True)

In [14]:
agent_kwargs = {
    "extra_prompt_messages": [MessagesPlaceholder(variable_name="memory")],
    'prefix': f'You are an expert in corporate analysis. You have access to the following tools:'
}

In [10]:
def Load_Vector(api_key, index_name, llm, embeddings):
  #default environment -- gcp-starter
  pinecone.init(
      api_key = api_key,
      environment = "gcp-starter"
  )
  index = pinecone.Index(index_name)
  vectorstore = Pinecone.from_existing_index(index_name, embeddings)
  corporate_analysis = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
  )
  return corporate_analysis

In [12]:
tools = [
    Tool(
        name = "Corporate Analysis",
        func = Load_Vector(os.getenv("PINECONE_KEY"), os.getenv("PINECONE_INDEX_NAME"), llm, embeddings).run,
        description="""
        Useful for when you need to answer questions about vector database.
        """
    )
]

In [15]:
def get_initialized_agent(llm, tools, agent_kwargs):
    try:
        # initialize the LLM agent
        agent = initialize_agent(tools,
                                llm,
                                agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
                                verbose=True,
                                agent_kwargs=agent_kwargs
                                )
    except Exception as e:
        raise Exception("Failed to initialize agent: " + str(e))
        
    return agent

In [None]:
get_initialized_agent(llm, tools, agent_kwargs).run("QUESTION")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m이 질문에 대답하려면 원익큐엔씨에 대한 정보를 찾아야 한다.
Action: Corporate Analysis
Action Input: 원익큐엔씨[0m