In [3]:
from pydantic import BaseModel, Field, PrivateAttr
from typing import Optional, Type, Dict, List
from tqdm import tqdm

from langchain.chat_models import ChatOpenAI
from langchain.tools import BaseTool, StructuredTool, Tool
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter, TextSplitter
from langchain.vectorstores import FAISS, Chroma
from langchain.document_loaders import PyPDFLoader
from langchain.chains import RetrievalQA
from langchain.agents import initialize_agent, AgentType
from langchain.callbacks.manager import AsyncCallbackManagerForToolRun, CallbackManagerForToolRun
from pypdf import PdfReader
import json

import os.path as osp, os

In [4]:
class DocumentInput(BaseModel):
    question: str = Field()
    
class DocumentInfoTool(BaseTool):
    name = "document_info"
    description = "Useful for when you need to find the author and publishing date of an article. The 'query' variable should be the filename of the article, formatted as Lastname_year; for example, Bawden_2001"
    _doc_metadata: Dict = PrivateAttr()
    
    def __init__(self):
        super(BaseTool, self).__init__()
        with open(osp.join('docs', 'metadata.json')) as f: 
            self._doc_metadata = json.load(f) 
        
    def _run(self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None) -> Dict:
        return self._doc_metadata[query]
    
    def _arun(self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None) -> Dict:
        raise NotImplementedError("document_info does not support async")

In [5]:
def preprocess_doc(
        path: str,
        splitter_type: TextSplitter = RecursiveCharacterTextSplitter,
        **splitter_kwargs
    ) -> List[any]:
    
    splitter = splitter_type(**splitter_kwargs)
    loader = PyPDFLoader(path)
    pages = loader.load_and_split()
    docs = splitter.split_documents(pages)
    return docs

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [10]:
pdfs = [osp.join('docs', doc) for doc in os.listdir('docs') if doc[-3:] == 'pdf']
files = []
for pdf in pdfs:
    file_name = pdf.split('/')[-1][:-4]
    d = {
        "name": file_name,
        "path": pdf
    }
    files.append(d)

tools = [DocumentInfoTool()] 
for file in tqdm(files, total=len(files)):
    persist_directory = osp.join('docs', 'vectors', file["name"])
    embedding = HuggingFaceEmbeddings()
    if not osp.exists(persist_directory):
        docs = preprocess_doc(
            file["path"],
            chunk_size=1000,
            chunk_overlap=150,
            separators = ["\n\n", "\n", " ", ""]
        )
        db = Chroma.from_documents(
            documents=docs,
            embedding=embedding,
            persist_directory=persist_directory
        )
        db.persist()
    else:
        db = Chroma(
            persist_directory = persist_directory,
            embedding_function=embedding
        )
    retriever = db.as_retriever()
    tools.append(Tool(
        args_schema=DocumentInput,
        name=file["name"],
        description=f"Used to answer questions about the contents of the article that corresponds to {file['name']}",
        func=RetrievalQA.from_chain_type(llm=llm, retriever=retriever),
    ))

100%|█████████████████████████████████████████████████████████████████████████████████████| 4/4 [03:59<00:00, 59.95s/it]


In [11]:
agent = initialize_agent(
    agent = AgentType.OPENAI_FUNCTIONS, #AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    tools = tools,
    llm = llm, 
    verbose = True
)

In [12]:
# agent({"input": "Who are the authors of the two articles?"})

In [13]:
#agent({"input": "Do both Edward Swenson and Garth Bawden speak about Moche social change? If so, do they seem to be in agreement regarding the causal factors of that change?"})
agent({"input": """
First, go through all of the articles you can find and check whether the author speaks\
about Moche social change. \

Second, if the author does both speak about social change, observe what they think \
caused such change. Make note of this.

Third, take your observations of how each author addresses Moche social change, and try
to organize them into groups based on the similarities of their analyses. For example,
if two authors both indicate that invasive forces caused the changes, then they might be \
grouped together, while an author that claims creative experimentation caused the changes \
should be put in a separate group.

"""})



[1m> Entering new  chain...[0m
[32;1m[1;3m
Invoking: `Bawden_2001` with `{'question': 'Does the author speak about Moche social change?'}`


[0m[33;1m[1;3m{'query': 'Does the author speak about Moche social change?', 'result': 'Yes, the author does speak about Moche social change. They discuss how Moche Valley leaders attempted to combine traditional features with new pan-Andean features in order to construct an effective ideological system. They also mention the extreme social alienation inflicted upon the non-elite population due to the transformation to urban life, which led to social stratification and economic deprivation. The author also discusses how commoners manipulated the new social context to establish their group identity in the face of these changes.'}[0m[32;1m[1;3m
Invoking: `Billman_2010` with `{'question': 'Does the author speak about Moche social change?'}`


[0m[38;5;200m[1;3m{'query': 'Does the author speak about Moche social change?', 'result': 'Yes,

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-Kn91yyzjZe8CpSTwoI7RID3Z on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-Kn91yyzjZe8CpSTwoI7RID3Z on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit ht

[32;1m[1;3m
Invoking: `Dillehay_2001` with `{'question': 'Does the author speak about Moche social change?'}`


[0m

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-Kn91yyzjZe8CpSTwoI7RID3Z on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-Kn91yyzjZe8CpSTwoI7RID3Z on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit ht

[36;1m[1;3m{'query': 'Does the author speak about Moche social change?', 'result': 'Yes, the author discusses social change in the late Moche culture. They mention that there were ideological changes and political conflict during this time, and that both elites and the wider subject population were affected by periods of widespread change. The responses of the people to stress and their interactions with ruling elites played a role in shaping the course of late Moche history.'}[0m

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-Kn91yyzjZe8CpSTwoI7RID3Z on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-Kn91yyzjZe8CpSTwoI7RID3Z on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit ht

[32;1m[1;3m
Invoking: `Swenson_2007` with `{'question': 'Does the author speak about Moche social change?'}`


[0m

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-Kn91yyzjZe8CpSTwoI7RID3Z on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-Kn91yyzjZe8CpSTwoI7RID3Z on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit ht

[33;1m[1;3m{'query': 'Does the author speak about Moche social change?', 'result': 'Yes, the author does speak about Moche social change. They discuss how environmental disruptions, such as droughts and dune encroachments, led to social and economic dislocation. They also mention the reconfiguration of Late Moche power relations and economic structures in specific northern valleys. The author argues that political factors played an equal or more important role than climate change in the reorganization of Moche polities.'}[0m

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-Kn91yyzjZe8CpSTwoI7RID3Z on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method..
Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 2.0 seconds as it raised RateLimitError: Rate limit reached for default-gpt-3.5-turbo in organization org-Kn91yyzjZe8CpSTwoI7RID3Z on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit ht

[32;1m[1;3mBased on my analysis of the articles, all of the authors do speak about Moche social change. Here are their views on what caused this change:

- Bawden (2001): Bawden discusses how Moche Valley leaders attempted to combine traditional features with new pan-Andean features in order to construct an effective ideological system. They also mention the extreme social alienation inflicted upon the non-elite population due to the transformation to urban life, which led to social stratification and economic deprivation. Bawden also discusses how commoners manipulated the new social context to establish their group identity in the face of these changes.

- Billman (2010): Billman mentions that social change in Moche culture involved the transformation of rituals, politics, death, and daily life during the Middle Moche phases. This transformation required the blending of old and new beliefs and the mobilization of labor and goods on a large scale. The provision of resources for ruli

{'input': '\nFirst, go through all of the articles you can find and check whether the author speaksabout Moche social change. Make sure to take note of their views on what caused \\ \nthis change. \n\nSecond, if the author does both speak about social change, observe what they think caused such change. Make note of this.\n\nThird, take your observations of how each author addresses Moche social change, and try\nto organize them into groups based on the similarities of their analyses. For example,\nif two authors both indicate that invasive forces caused the changes, then they might be grouped together, while an author that claims creative experimentation caused the changes should be put in a separate group.\n\n',
 'output': "Based on my analysis of the articles, all of the authors do speak about Moche social change. Here are their views on what caused this change:\n\n- Bawden (2001): Bawden discusses how Moche Valley leaders attempted to combine traditional features with new pan-Andean