In [None]:
from uuid import uuid4
from tqdm.auto import tqdm
import os
from dotenv import load_dotenv
load_dotenv()

# Load document

In [None]:
pages = []

with open(os.path.join('data', 'generative_agent.txt'), 'r') as f:
    texts = f.read()

pages = texts.split('\n ---- \n')



# Load data into chunks, 
* Chunks are separated by `\n --- \n` in the file `generative_agent.txt`.
* See data pre-processing in `data_preparation.ipynb`

In [None]:
chunks = []
for idx, page in enumerate(tqdm(pages)):
    if not page:
        continue
    chunks.append({
        'id': str(uuid4()),
        'page_content': page,
        'chunk': idx,
        'page_num': idx+1
    } )

100%|██████████| 23/23 [00:00<00:00, 78557.81it/s]


In [None]:
from langchain_community.embeddings import OpenAIEmbeddings

embeddings_model = OpenAIEmbeddings()

embeddings = embeddings_model.embed_documents(pages)

In [69]:
import chromadb
chroma_client = chromadb.HttpClient(host='localhost', port=8000)

chroma_collection_name = "test"


try:
    collection = chroma_client.create_collection(name=chroma_collection_name)
except:
    collection = chroma_client.get_collection(name=chroma_collection_name)

collection.add(
    documents=pages,
    metadatas=[{"source": "generative_agent.txt"}] * len(pages),
    ids=[f'page {i}' for i in range(len(pages))],
    embeddings=embeddings
)


In [121]:
collection.count(), len(pages)

(23, 23)

In [107]:
from langchain_community.vectorstores import Chroma

chroma_collection_name = "test"
vectorstore = Chroma(collection_name=chroma_collection_name, client=chroma_client, embedding_function=embeddings_model)
# results = vectorstore.similarity_search(query="We required that our evaluators be in the U.S., fluent in English," \
#                                                 "and older than 18 years old. They were paid at the rate of $15.00" \
#                                                 "per hour [ 86], and provided consent by agreeing to a consent form" \
#                                                 "that was approved by our institution’s IRB. We recruited 100 evalu-" \
#                                                 "ators from Prolific, an online platform for recruiting study partic-" \
#                                                 "ipants [ 82], whose participation lasted around 30 minutes")

In [114]:
from langchain_community.chat_models import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


# Prompt template
from langchain_core.prompts import PromptTemplate

template = """Use the following pieces of context to answer the question at the end.
            If you don't know the answer, just say that you don't know, don't try to make up an answer.
            Use three sentences maximum and keep the answer as detailed as possible.

            {context}

            Question: {question}

            Helpful Answer:"""

prompt = PromptTemplate.from_template(template)

# RAG Chain
fine_tuned_model='ft:gpt-3.5-turbo-1106::8dJxkf8w'

retriever = vectorstore.as_retriever()
llm = ChatOpenAI(model_name=fine_tuned_model, temperature=0.9)


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)
rag_chain.invoke("What is a generative agent?")

'A generative agent is an agent that can engage in interactions with other agents and react to changes in the environment. It produces behavior by synthesizing and retrieving relevant information to condition language model output. They make important inferences and maintain long-term coherence.  Without these mechanisms, large language models can output behavior, but the resulting agents may not react based on the agent’s past experiences, may not make important inferences, and may not maintain long-term coherence.  Challenges with long-term planning and coherence remain [ 18]\neven with today’s most performant models such as GPT-4.'

In [115]:
print(rag_chain.invoke("What is a generative agent?"))

A generative agent is an agent that draws on generative models to simulate believable human behavior and produces believable simulacra of both individual and emergent group behavior, such as in the example of starting with only a single user-specified notion that one agent wants to throw a Valentine’s Day party and creating emergent social behaviors, such as spreading invitations to the party over the next two days, making new acquaintances, asking each other out on dates to the party, and coordinating to show up for the party together at the right time.


In [118]:
print(rag_chain.invoke("Please summarize the generative agent paper. What is it flaws? Who is the authors?"))

The paper introduces generative agents, interactive computational agents that simulate human behavior. The authors demonstrated the potential of generative agents by manifesting them as non-player characters in a Sims-style game world and simulating their lives in it. It evaluation of the generative agents’ behavior was limited to a relatively short timescale, and future research should aim to observe their behavior over an extended period to gain a more comprehensive understanding of their capabilities and limitations. The authors of the paper are Joon Sung Park, Joseph C. O’Brien, Carrie J. Cai, Meredith Ringel Morris, Percy Liang, and Michael S. Bernstein.
