In [1]:
import os 
os.environ["OPENAI_API_KEY"] = ""
import openai 

In [2]:
client = openai.OpenAI(
    # This is the default and can be omitted
    api_key=os.environ.get("OPENAI_API_KEY"),
)

In [60]:
from openai.types.chat.chat_completion import ChatCompletion


def make_request(content: str) -> ChatCompletion:
    response = None 
    try:
        response = client.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": content,
                }
            ],
            model="gpt-3.5-turbo",
        )
    except openai.APIError as e:
        #Handle API error here, e.g. retry or log
        print(f"OpenAI API returned an API Error: {e}")
    except openai.APIConnectionError as e:
        #Handle connection error here
        print(f"Failed to connect to OpenAI API: {e}")
    except openai.RateLimitError as e:
        #Handle rate limit error (we recommend using exponential backoff)
        print(f"OpenAI API request exceeded rate limit: {e}")
    return response

In [12]:
import glob 


files = glob.glob("knowledge/*.txt")
documents = []
for doc_id, file in enumerate(files):
    with open(file, "r", encoding="utf8") as f:
        data = f.read()
    documents.append({
        "doc_id": doc_id,
        "text": data
    })

In [14]:
print(documents[0]["text"])

ABSTRACT
Believable proxies of human behavior can empower interactive applications ranging from immersive environments to rehearsal spaces for interpersonal communication to prototyping tools. In this paper, we introduce generative agents: computational software agents that simulate believable human behavior. Generative agents wake up, cook breakfast, and head to work; artists paint, while authors write; they form opinions, notice each other, and initiate conversations; they remember and reflect on days past as they plan the next day. To enable generative agents, we describe an architecture that extends a large language model to store a complete record of the agent’s experiences using natural language, synthesize those memories over time into higher-level reflections, and retrieve them dynamically to plan behavior. We instantiate generative agents to populate an interactive sandbox environment inspired by The Sims, where end users can interact with a small town of twenty-five agents us

In [74]:
prompt = """Given the following paragraph which is extracted from a conference paper
### PARAGRAPH
{paragraph}

### INSTRUCTION
Assume that you are a college Professor preparing for an example
With the given paragraph. Generate ONLY {num_ques} questions and also provide the correct answers. 
NOTE:
- You must ask about the content of the paragraph and the answer should be extracted from the paragraph too 
- The questions must cover most content of the paragraph 

The output MUST be a list of Python dictionary where each record has the following structure: {{\"question\": str, \"answer\": str}}"""
# prompt = prompt.format(num_ques=2, paragraph=documents[0]["text"])
print(prompt)

Given the following paragraph which is extracted from a conference paper
### PARAGRAPH
{paragraph}

### INSTRUCTION
Assume that you are a college Professor preparing for an example
With the given paragraph. Generate ONLY {num_ques} questions and also provide the correct answers. 
NOTE:
- You must ask about the content of the paragraph and the answer should be extracted from the paragraph too 
- The questions must cover most content of the paragraph 

The output MUST be a list of Python dictionary where each record has the following structure: {{"question": str, "answer": str}}


In [58]:
result = make_request(content=prompt)

In [59]:
import pprint
import json 
pprint.pprint(json.loads(result.choices[0].message.content))

[{'answer': 'Planning is essential for agents because it ensures that their '
            'sequence of actions is coherent and believable, allowing them to '
            'behave consistently over time.',
  'question': 'Why is planning essential for agents?'},
 {'answer': 'The approach described starts by creating a plan that outlines '
            "the day's agenda in broad strokes. The plan is then recursively "
            'decomposed to create finer-grained actions by breaking it down '
            'into hour-long chunks of actions and then further decomposing '
            'those into 5-15 minute chunks.',
  'question': 'How are plans created in the approach described?'}]


In [37]:
documents.sort(key=lambda item: len(item["text"]), reverse=True)

In [75]:
from tqdm import tqdm 
import time 

num_questions = [3]*22 + [2]*17

for data, num_ques in tqdm(zip(documents, num_questions), desc="Generating dataset"):
    content = prompt.format(num_ques=num_ques, paragraph=data["text"])
    response = make_request(content=content)
    # data["response"] = response
    data["response_2"] = response
    try:
        record = json.loads(response.choices[0].message.content)
    except Exception as e:
        print("Cannot parse response")

    time.sleep(20)

Generating dataset: 39it [14:40, 22.57s/it]


In [81]:
train_dataset = list()
count = 0 
for data in documents:
    content = data["response_2"].choices[0].message.content
    try:
        record = json.loads(content)
        train_dataset.extend(record)
    except Exception as e:
        c = prompt.format(num_ques=num_ques, paragraph=data["text"])
        # print(e, content)
        print(data)
        count += 1


In [82]:
# system_prompt = """You are a helpful assistant that can answer any question from human about generative agents."""
# train_ds = []
# for data in train_dataset:
#     train_ds.append({
#         "message": [
#             {"role": "system", "content": system_prompt},
#             {"role": "user", "content": data["question"]},
#             {"role": "assistant", "content": data["answer"]}
#         ]
#     })

# train_ds

[{'message': [{'role': 'system',
    'content': 'You are a helpful assistant that can answer any question from human about generative agents.'},
   {'role': 'user',
    'content': 'What is the challenge in generating plausible behavior for agents?'},
   {'role': 'assistant',
    'content': 'Agents need to plan over a longer time horizon to ensure coherence and believability of their actions.'}]},
 {'message': [{'role': 'system',
    'content': 'You are a helpful assistant that can answer any question from human about generative agents.'},
   {'role': 'user',
    'content': 'How does planning help in ensuring coherent behavior for agents?'},
   {'role': 'assistant',
    'content': 'Planning describes a future sequence of actions for the agent, helping to keep their behavior consistent over time.'}]},
 {'message': [{'role': 'system',
    'content': 'You are a helpful assistant that can answer any question from human about generative agents.'},
   {'role': 'user',
    'content': 'How does

In [91]:
system_prompt = """You are a helpful assistant that can answer any question from human about generative agents. Please answer the below question\n\nQuestion: """
train_ds = []
for data in train_dataset:
    train_ds.append({
        "prompt": system_prompt + data["question"],
        "completion": "Answer: " + data["answer"]
    })

# print(train_ds[0]["completion"])

In [92]:
import jsonlines 

with jsonlines.open('train_data.jsonl', mode='w') as writer:
    writer.write_all(train_ds)

In [3]:
create_data_response = client.files.create(
    file=open("train.jsonl", "rb"),
    purpose="fine-tune"
)

In [4]:
create_data_response

FileObject(id='file-0CxdyuVeKNDWsS8zn55mAaRx', bytes=41990, created_at=1705163932, filename='train.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None)

In [5]:
from openai import OpenAI
client = OpenAI()

job_create_response = client.fine_tuning.jobs.create(
  training_file=create_data_response.id, 
  model="gpt-3.5-turbo-1106"
)

In [14]:
job_create_response

FineTuningJob(id='ftjob-IAJoOjpkNLJgs10oahva1tVG', created_at=1705163939, error=None, fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs='auto', batch_size='auto', learning_rate_multiplier='auto'), model='gpt-3.5-turbo-1106', object='fine_tuning.job', organization_id='org-G0vV5e5D0CmwKAucrz5lftdN', result_files=[], status='validating_files', trained_tokens=None, training_file='file-0CxdyuVeKNDWsS8zn55mAaRx', validation_file=None)

In [6]:
client.fine_tuning.jobs.list(limit=10)

SyncCursorPage[FineTuningJob](data=[FineTuningJob(id='ftjob-IAJoOjpkNLJgs10oahva1tVG', created_at=1705163939, error=None, fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs=3, batch_size=1, learning_rate_multiplier=2), model='gpt-3.5-turbo-1106', object='fine_tuning.job', organization_id='org-G0vV5e5D0CmwKAucrz5lftdN', result_files=[], status='running', trained_tokens=None, training_file='file-0CxdyuVeKNDWsS8zn55mAaRx', validation_file=None), FineTuningJob(id='ftjob-xo2KX2gnAVMAbeDpU0qlRF2l', created_at=1704651598, error=None, fine_tuned_model='ft:gpt-3.5-turbo-1106:aitomatic-inc:hiep:8eSCYpGj', finished_at=1704652217, hyperparameters=Hyperparameters(n_epochs=3, batch_size=1, learning_rate_multiplier=2), model='gpt-3.5-turbo-1106', object='fine_tuning.job', organization_id='org-G0vV5e5D0CmwKAucrz5lftdN', result_files=['file-yBvqPlsELQutqMJVa9DcM6ex'], status='succeeded', trained_tokens=433695, training_file='file-t60VnFlcONZck4vSi1eSuW65', validation_file

In [16]:
import json 

with open("train_log.json", "w", encoding="utf8") as jsf:
    json.dump(
        {
            "content": {
                "data_creation_response": create_data_response.model_dump(),
                "job_creation_response": job_create_response.model_dump()
            }
        },
        jsf 
    )

# create_data_response.model_dump_json()

In [13]:
create_data_response.model_dump()

{'id': 'file-0CxdyuVeKNDWsS8zn55mAaRx',
 'bytes': 41990,
 'created_at': 1705163932,
 'filename': 'train.jsonl',
 'object': 'file',
 'purpose': 'fine-tune',
 'status': 'processed',
 'status_details': None}

In [22]:
response = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "How can agents perform inter communication ?",
        }
    ],
    model="ft:gpt-3.5-turbo-1106:aitomatic-inc:hiep:8eSCYpGj",
)

In [23]:
print(response.choices[0].message.content)

Agents can perform intercommunication through various means including:

1. Email: Sending messages, updates, and documents to other agents within the organization.

2. Messaging apps: Using apps like Slack, Microsoft Teams, or WhatsApp to have real-time communication and collaboration.

3. Phone calls: Having direct conversations with other agents to exchange information and discuss important matters.

4. Video conferencing: Using platforms like Zoom or Skype to hold virtual meetings and discussions with other agents.

5. In-person meetings: When possible, meeting face-to-face to have in-depth discussions and brainstorming sessions.

6. Internal social media platforms: Utilizing internal social networks for sharing ideas, updates, and connecting with colleagues.

7. File sharing: Using cloud-based services like Google Drive or Dropbox to share documents and files with other agents. 

Effective intercommunication is essential for agents to collaborate efficiently and work towards common