In [None]:
from embed import embed

In [None]:
import os
# set OPENAI
# os.environ['OPENAI_API_KEY']='your openai key'

In [1]:
from openai import OpenAI
import numpy as np

client = OpenAI()

tokens_used = 0

model_name = "gpt-4-1106-preview"

def generate(message, max_tokens, temperature=0.9):
    response = client.chat.completions.create(
        model=model_name,
        messages=[
            {"role": "system", "content": "You are a helpful, respectful and honest assistant. If you don't know the answer to a question, please don't share false information."},
            {"role": "user", "content": message}
        ],
        max_tokens=max_tokens,
        temperature=temperature,
    )
    global tokens_used
    tokens_used += response.usage.total_tokens

    return response.choices[0].message.content

ModuleNotFoundError: No module named 'openai'

In [None]:
message = "Generate a detailed description of a fictional item. This item is called a flubby cube. Design how a flubby cube is made, what it can do, why do people use it, its market value, how popular it is, do people like it? Do people hate it? "
person_description = generate(message, 1024)
person_description

In [None]:
def get_question(knowledge, max_tokens=32, temperature=0.9):
    prompt = """
You have a piece of knowledge:
{knowledge}
Ask any question in third person that will help you learn more. You can ask about anything, be as creative as you want. The question should be as brief as possible. (question only)

Question:
""".strip()
    return generate(prompt.format(knowledge=knowledge), max_tokens, temperature)

def get_answer(question, knowledge, max_tokens=32, temperature=0.9):
    prompt = """
{knowledge}.
Answer the question "{question}" using at little words as you can (10 words max).
Answer:
""".strip()
    return generate(prompt.format(knowledge=knowledge, question=question), max_tokens, temperature)


def expand_knowledge(question, answer, knowledge, max_tokens=512, temperature=0.9):
    prompt = """
You have a piece of knowledge:
{knowledge}
Now you also get the answer to the question "{question}":
{answer}
Condense both pieces of knowledge into a short paragraph. Do not guess or make up any information. Use strictly the information you have been given.

Your new knowledge:
""".strip()
    return generate(prompt.format(knowledge=knowledge, question=question, answer=answer), max_tokens, temperature)

def condense_knowledge(knowledge, max_tokens=256, temperature=0.9):
    prompt = """
Summarize the following piece of knowledge into a short paragraph:
{knowledge}
""".strip()
    return generate(prompt.format(knowledge=knowledge), max_tokens, temperature)

In [None]:
import tiktoken

encoding = tiktoken.encoding_for_model(model_name)

def get_token_count(text):
    return len(encoding.encode(text))

In [None]:
def find_least_similar(vectors, new_vectors,):
    min_score = 9999999
    min_index = -1

    for i, new_vector in enumerate(new_vectors):
        score = 0
        for vector in vectors:
            d = np.dot(vector, new_vector)
            score += -1 / (np.log(d) - 1e-6)
        # print(score, i)
        if score < min_score:
            min_score = score
            min_index = i
    return min_index

In [None]:
explorer_knowledge = "There's an item in from of you. It's a flubby cube. What can you do with it?"
mastermind_knowledge = person_description

ITER = 50
QUESTION_CANDIDATES = 5
QUESTION_MAX_LENGTH = 32
QUESTION_TEMPERATURE = 0.95
ANSWER_MAX_LENGTH = 64
KNOWLEDGE_MAX_LENGTH = 512
KNOWLEDGE_SUMMARY_MAX_LENGTH = 256

questions = []
question_embeddings = []
answers = []

for i in range(ITER):
    if len(questions) == 0:
        question = get_question(explorer_knowledge, QUESTION_MAX_LENGTH, QUESTION_TEMPERATURE)
        questions.append(question)
        question_embeddings.append(embed(question))
    else:
        question_candidates = [get_question(
            explorer_knowledge, QUESTION_MAX_LENGTH, QUESTION_TEMPERATURE) for _ in range(QUESTION_CANDIDATES)]
        new_question_embeddings = [embed(question) for question in question_candidates]

        least_similar_index = find_least_similar(question_embeddings, new_question_embeddings)
        question = question_candidates[least_similar_index]
        embedding = new_question_embeddings[least_similar_index]

        questions.append(question)
        question_embeddings.append(embedding)

    answer = get_answer(question, mastermind_knowledge, 32)
    answers.append(answer)
    new_explorer_knowledge = expand_knowledge(question, answer, explorer_knowledge, 512)

    if get_token_count(new_explorer_knowledge) > 512:
        new_explorer_knowledge = condense_knowledge(new_explorer_knowledge, 256)

    print("Iteration", i)
    print("Question:", question)
    print("Answer:", answer)
    print("New knowledge:", new_explorer_knowledge)
    print(f"Cost So Far:{tokens_used / 1000 / 100:.2f}$")

    explorer_knowledge = new_explorer_knowledge

In [None]:
data = {
    'questions': questions,
    'answers': answers
}

import json

with open('data_cube.json', 'w') as f:
    json.dump(data, f, indent=2)