In [59]:
pip install --upgrade openai

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [84]:
from openai import OpenAI
from dotenv import load_dotenv
import os

# Load the environment variables from the .env file
load_dotenv()

# Get the API key from the environment variable
api_key = os.getenv("OPENAI_API_KEY")

# Initialize the OpenAI client with the API key
client = OpenAI(api_key=api_key)


Step 1: Create an assistant 

In [61]:
# The code snippet creates a new "assistant" named "Math Tutor" 
# by calling the create method on the assistants object in the beta 
# section of the client, setting its instructions to "You are a personal 
# math tutor. Write and run code to answer math questions," assigning it 
# the tool "code_interpreter," and using the model "gpt-3.5-turbo-16k."

assistant = client.beta.assistants.create(
    name = "Math Tutor",
    instructions = "You are a personal math tutor. Write and run code to answer math questions",
    tools = [{"type": "code_interpreter"}],
    model = "gpt-3.5-turbo-1106"
)

Step 2: Create a thread

In [62]:
# The code snippet creates a new "thread" by calling the 
# create method on the threads object in the beta section 
# of the client, and then prints the details of the created thread.

thread = client.beta.threads.create()
print(thread)


Thread(id='thread_AirZE4XHZvoXBYc3XP6YUIK0', created_at=1720059696, metadata={}, object='thread', tool_resources=ToolResources(code_interpreter=None, file_search=None))


Step 3: Add a message to a thread 

In [63]:
# The code snippet creates a new "message" within a 
# specific "thread" by calling the create method, using 
# the thread ID, assigning the role as "user," and setting 
# the content to "Solve this problem 3x + 11 = 14."

messages = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="Solve this problem 3x + 11 = 14"
    )

Step 4: Run the assistant

In [64]:
# The code snippet creates a new "run" within a 
# specific "thread" by calling the create method, 
# using the thread ID and assistant ID to set it up.

run = client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant.id,
)

In [65]:
# The code prints the details of the newly created "run."

print(run)

Run(id='run_Ibcg0gwlhBE03vPLSThFOXPF', assistant_id='asst_6n6Kugctiu1gr7fQA4bxwotx', cancelled_at=None, completed_at=None, created_at=1720059697, expires_at=1720060297, failed_at=None, incomplete_details=None, instructions='You are a personal math tutor. Write and run code to answer math questions', last_error=None, max_completion_tokens=None, max_prompt_tokens=None, metadata={}, model='gpt-3.5-turbo-1106', object='thread.run', parallel_tool_calls=True, required_action=None, response_format='auto', started_at=None, status='queued', thread_id='thread_AirZE4XHZvoXBYc3XP6YUIK0', tool_choice='auto', tools=[CodeInterpreterTool(type='code_interpreter')], truncation_strategy=TruncationStrategy(type='auto', last_messages=None), usage=None, temperature=1.0, top_p=1.0, tool_resources={})


Step 5: Display the assistant's response

In [66]:
# The code snippet gets details of an existing 
# "run" within a specific "thread" by calling the 
# retrieve method, using the thread ID and run ID to find it.

run = client.beta.threads.runs.retrieve(
    thread_id=thread.id,
    run_id=run.id
)

In [67]:
messages = client.beta.threads.messages.list(
    thread_id= thread.id
)

In [68]:
# The code snippet prints each message's role and content in 
# reverse order from the messages.data list.

for message in reversed(messages.data):
    print(message.role + ": " + message.content[0].text.value)

user: Solve this problem 3x + 11 = 14


Upload Files to OpenAI

In [69]:
file_path = "memgpt-paper.pdf"

with open(file_path, "rb") as file:
    uploaded_file = client.files.create(
        file=file,
        purpose='assistants'
    )


In [70]:
print(file)

<_io.BufferedReader name='memgpt-paper.pdf'>


Create the Assistant

In [71]:
# Step 2: Upload files and add them to a Vector Store
# https://platform.openai.com/docs/assistants/tools/file-search/step-2-upload-files-and-add-them-to-a-vector-store

vector_store = client.beta.vector_stores.create(name="memgpt_research")
 
# Ready the files for upload to OpenAI
file_paths = ["memgpt-paper.pdf"]
file_streams = [open(path, "rb") for path in file_paths]
 
# Use the upload and poll SDK helper to upload the files, add them to the vector store,
# and poll the status of the file batch for completion.
file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
  vector_store_id=vector_store.id, files=file_streams
)
 
# You can print the status and the file counts of the batch to see the result of this operation.
print(file_batch.status)
print(file_batch.file_counts)

completed
FileCounts(cancelled=0, completed=1, failed=0, in_progress=0, total=1)


Create a thread

In [76]:
thread = client.beta.threads.create()
print(thread)

Thread(id='thread_iOl0FgCmwLmErLw8WZSOOA78', created_at=1720193793, metadata={}, object='thread', tool_resources=ToolResources(code_interpreter=None, file_search=None))


Step 6: Create the message

In [79]:
# Step 5: Create a message within the created thread
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="How does memgpt allow LLMs to have unlimited context length?"
)

Run the assistant

In [80]:
run = client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant.id,
)

Step 5: Run the assistants response

In [81]:
run = client.beta.threads.runs.retrieve(
    thread_id=thread.id,
    run_id=run.id
)

In [82]:
messages = client.beta.threads.messages.list(
    thread_id= thread.id
)

In [83]:
for message in reversed(messages.data):
    print(message.role + ": " + message.content[0].text.value)

user: How does memgpt allow LLMs to have unlimited context length?
assistant: The MemGPT (Memory-Augmented GPT) architecture allows Language Models (LMs) to have unlimited context length by leveraging a memory-augmented mechanism. In traditional transformer-based LMs like GPT, the context length is limited by the fixed window of attention, which means that each token can only attend to a fixed number of tokens in the input sequence. This limits the ability of the model to capture long-range dependencies and context.

MemGPT overcomes this limitation by introducing a memory-augmented mechanism that enables the model to store and retrieve information from an external memory, effectively extending the context length beyond the fixed window of attention. This allows the model to access and utilize information from a much larger context, thereby enabling it to understand and generate longer and more coherent sequences of text.

The memory-augmented mechanism in MemGPT typically involves int