In [116]:
import csv, os, json, random, sys, textwrap, random, re

import openai

In [117]:
DATA_DIR = os.path.expanduser('~/exocortex_data')
TOPIC_COUNTS_PATH = os.path.join(DATA_DIR, 'topic_counts.csv')

def init_data_dir():
  if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)
init_data_dir()

In [118]:
def load_topic_counts():
  if not os.path.exists(TOPIC_COUNTS_PATH):
    return {}
  
  topic_to_count = {}
  with open(TOPIC_COUNTS_PATH) as f:
    reader = csv.reader(f)
    for i, row in enumerate(reader):
      if i == 0:
        continue
      topic_to_count[row[0]] = int(row[1])
  return topic_to_count

load_topic_counts()

{'exocortex': 2,
 'deep learning': 2,
 'artificial intelligence': 1,
 'machine learning': 1,
 'neural networks': 1,
 'data mining': 1,
 'predictive modeling': 1,
 'statistical modeling': 1,
 'regression analysis': 1,
 'linear regression': 1,
 'coefficients': 1,
 'correlation': 1,
 'association': 1,
 'big data': 1,
 'predictive analytics': 1,
 'statistical analysis': 1,
 'time series analysis': 1,
 'time series': 1,
 'time series data': 1,
 'temporal data': 1,
 'event data': 1,
 'streaming data': 1}

In [119]:
def weighted_choice(choices):
  # Return weighted random selection; format: {'choice1':.7, 'choice2':.5}
  total = sum(choices.values())
  rand = random.uniform(0, total)
  upto = 0
  for choice, weight in choices.items():
    if upto + weight > rand:
      return choice
    upto += weight

for _ in range(10):
  print(weighted_choice({'a': 2, 'b': 1}))

a
a
a
a
a
a
b
a
a
a


In [120]:
def request_topic(topic):
  prompt_str = textwrap.dedent(f"""
  Here are some important words related to 'artificial intelligence':
  1. machine learning
  2. deep learning
  3. neural networks
  4. robotics
  5. natural language processing
  
  Here are some important words related to '{topic}':
  """)

  print('sending...')
  temperature = 0.7
  response = openai.Completion.create(
    model="text-davinci-002",
    prompt=prompt_str,
    temperature=temperature,
    max_tokens=1024,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0
  )
  
  response_str = response.choices[0]['text'].strip()
  #print(f'got response:\n{response_str}')
  print(f'got response')
  
  history_path = os.path.join(DATA_DIR, 'prompt_history.csv')
  with open(history_path, 'a') as f:
    csv.writer(f).writerow([prompt_str, response_str, temperature])

  return response_str
    
request_topic(topic='exocortex')

sending...
got response


'1. artificial general intelligence\n2. artificial superintelligence\n3. cognitive architectures\n4. mind uploading'

In [123]:
def write_topic_counts(topic_to_count):
  if not topic_to_count or len(topic_to_count) < 2:
    print('topic_to_count is empty, not writing')
    return
  with open(TOPIC_COUNTS_PATH, 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['topic', 'count'])
    for topic, count in sorted(topic_to_count.items(), key=lambda t: t[1], reverse=True):
      writer.writerow([topic, count])

In [124]:
def dream():
  with open(os.path.expanduser('~/open_ai.json')) as f:
    openai.api_key = json.loads(f.read()).get('api_key')

  topic_to_count = load_topic_counts()
  next_topic = None
  
  for _ in range(10):
    if not next_topic:
      if not topic_to_count:
        next_topic = 'exocortex'
      else:
        next_topic = weighted_choice(topic_to_count)

    print(f'next_topic: {next_topic}')
      
    topic_to_count.setdefault(next_topic, 0)
    topic_to_count[next_topic] += 1
    response_str = request_topic(next_topic)

    next_topic = None
    for line in response_str.splitlines():
      if not re.match('^[0-9]+\. ', line):
        print(f"line doesn't match expected format, ignoring: {line}")
        continue
      topic = line.split(' ', 1)[1]
      if topic not in topic_to_count:
        next_topic = topic
        break
        
    write_topic_counts(topic_to_count)
dream()

next_topic: predictive analytics
sending...
got response
next_topic: statistical learning
sending...
got response
next_topic: regression
sending...
got response
next_topic: pattern recognition
sending...
got response
next_topic: feature extraction
sending...
got response
next_topic: feature selection
sending...
got response
next_topic: feature engineering
sending...
got response
next_topic: data preprocessing
sending...
got response
next_topic: cleaning
sending...
got response
next_topic: dirt
sending...
got response
