In [1]:
import os
import json
import openai
import time

import components.data_utils as data_utils

In [4]:
dataset = "cifar10"
prompt_type = "important"

In [5]:
openai.api_key = '' # please add key here.
# or load from path.
# openai.api_key = open(os.path.join(os.path.expanduser("~"), ".openai_api_key"), "r").read()[:-1]

In [6]:
prompts = {
    "important" : "List the most important features for recognizing something as a \"goldfish\":\n\n-bright orange color\n-a small, round body\n-a long, flowing tail\n-a small mouth\n-orange fins\n\nList the most important features for recognizing something as a \"beerglass\":\n\n-a tall, cylindrical shape\n-clear or translucent color\n-opening at the top\n-a sturdy base\n-a handle\n\nList the most important features for recognizing something as a \"{}\":",
    "superclass" : "Give superclasses for the word \"tench\":\n\n-fish\n-vertebrate\n-animal\n\nGive superclasses for the word \"beer glass\":\n\n-glass\n-container\n-object\n\nGive superclasses for the word \"{}\":",
    "around" : "List the things most commonly seen around a \"tench\":\n\n- a pond\n-fish\n-a net\n-a rod\n-a reel\n-a hook\n-bait\n\nList the things most commonly seen around a \"beer glass\":\n\n- beer\n-a bar\n-a coaster\n-a napkin\n-a straw\n-a lime\n-a person\n\nList the things most commonly seen around a \"{}\":"
}

base_prompt = prompts[prompt_type]

In [5]:
base_prompt

'List the most important features for recognizing something as a "goldfish":\n\n-bright orange color\n-a small, round body\n-a long, flowing tail\n-a small mouth\n-orange fins\n\nList the most important features for recognizing something as a "beerglass":\n\n-a tall, cylindrical shape\n-clear or translucent color\n-opening at the top\n-a sturdy base\n-a handle\n\nList the most important features for recognizing something as a "{}":'

In [7]:
cls_file = data_utils.LABEL_FILES[dataset]
with open(cls_file, "r") as f:
    classes = f.read().split("\n")

In [8]:
classes

['airplane',
 'automobile',
 'bird',
 'cat',
 'deer',
 'dog',
 'frog',
 'horse',
 'ship',
 'truck']

In [8]:
MAX_TOKEN_PER_MIN = 40
INTERVAL = 60.0 / MAX_TOKEN_PER_MIN
feature_dict = {}

for i, label in enumerate(classes):
    feature_dict[label] = set()
    print("\n", i, label)
    for _ in range(2):
        response = openai.ChatCompletion.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are an assistant that provides visual descriptions of objects. Use only adjectives and nouns in your description. Ensure each description is unique, short, and direct. Do not use qualifiers like 'typically', 'generally', or similar words."},
                {"role": "user", "content": base_prompt.format(label)}],
            temperature=0.7,
            max_tokens=256,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0
        )
        #clean up responses
        features = response["choices"][0]["message"]["content"]
        features = features.split("\n-")
        features = [feat.replace("\n", "") for feat in features]
        features = [feat.strip() for feat in features]
        features = [feat for feat in features if len(feat)>0]
        features = [feat[1:] if feat[0] == '-' else feat for feat in features]
        features = set(features)
        feature_dict[label].update(features)
    time.sleep(INTERVAL)
    feature_dict[label] = sorted(list(feature_dict[label]) + [label])


 0 airplane


In [20]:
json_object = json.dumps(feature_dict, indent=4)
with open("data/concept_sets/gpt4_init/gpt4_{}_{}.json".format(dataset, prompt_type), "w") as outfile:
    outfile.write(json_object)

### Test GPT3 & GPT4

In [34]:
response = openai.ChatCompletion.create(
    model="gpt-4",
    messages=[
        {"role": "system", "content": "You are an assistant that provides visual descriptions of objects. Use only adjectives and nouns in your description. Ensure each description is unique, short, and direct. Do not use qualifiers like 'typically', 'generally', or similar words."},
        {"role": "user", "content": prompts["around"].format('ship')}],
    temperature=0.7,
    max_tokens=256,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0
)
#clean up responses
features = response["choices"][0]["message"]["content"]
features = features.split("\n-")
features = [feat.replace("\n", "") for feat in features]
features = [feat.strip() for feat in features]
features = [feat for feat in features if len(feat)>0]
features = set(features)
features

{'- water', 'anchor', 'cargo', 'dock', 'lifebuoy', 'rope', 'sailors'}

In [33]:

response = openai.Completion.create(
        model="text-davinci-002",
        prompt=prompts["around"].format('ship'),
        temperature=0.7,
        max_tokens=256,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )
#clean up responses
features = response["choices"][0]["text"]
features = features.split("\n-")
features = [feat.replace("\n", "") for feat in features]
features = [feat.strip() for feat in features]
features = [feat for feat in features if len(feat)>0]
features = set(features)
features

{'a boat', 'a captain', 'a crew', 'a dock', 'a flag', 'a mast', 'the sea'}