# Notebook Details
Notebook to generate the Reddit Small Dataset. Similar to that of the python script.

In [5]:
%pip install convokit tqdm nlpaug transformers clean-text nltk tensorflow openai



Note: you may need to restart the kernel to use updated packages.


# Human-typed dataset

In [22]:
from convokit import Corpus, download
corpus = Corpus(filename=download("reddit-corpus-small"))

Downloading reddit-corpus-small to /home/ajunray/.convokit/downloads/reddit-corpus-small
Downloading reddit-corpus-small from http://zissou.infosci.cornell.edu/convokit/datasets/subreddit-corpus/reddit-corpus-small.corpus.zip (37.9MB)... 

KeyboardInterrupt: 

In [None]:
corpus.print_summary_stats()

In [13]:
def save(uttr_current, file_name):
  with open(file_name, "w+") as f:
    for utt in uttr_current:
        f.write(str(utt) + "\n------\n")

In [None]:
utterances = []

for utt in corpus.iter_utterances():
    utterances.append(utt.text)
save(utterances, "reddit-corpus-small.txt")

## Pruning

In [None]:
# use of clean-text to strip various un-needed information
# https://pypi.org/project/clean-text/

from cleantext import clean
import tqdm
import re

emoji_pattern = re.compile("["
        u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           "]+", flags=re.UNICODE)

for utt_idx in tqdm.tqdm(range(len(utterances))):
  utterances[utt_idx] = clean(utterances[utt_idx],
      fix_unicode=True,
      to_ascii=True,
      lower=True,
      no_urls=True,
      no_emails=True,
      no_phone_numbers=True,
      no_currency_symbols=True,
      replace_with_url="<URL>",
      replace_with_email="<EMAIL>",
      replace_with_phone_number="<PHONE>",
      replace_with_currency_symbol="<CUR>",
      lang="en"
  )
  utterances[utt_idx] = emoji_pattern.sub(r'', utterances[utt_idx]) # no emoji

In [None]:
# Remove utterances < 20 characters and > 300 characters
print(len(utterances))
utterances = [utt for utt in utterances if len(utt) >= 3000]
print(len(utterances))

In [None]:
# Remove repeated utterances
print(len(utterances))
utterances = list(set(utterances))
print(len(utterances))

In [None]:
save(utterances, "post-pruned.txt")

Tokenization using nltk

In [None]:
import nltk
from nltk.tokenize import word_tokenize

nltk.download('punkt')
tokenized_human = []
utterances_human = utterances

for utt_idx in tqdm.tqdm(range(len(utterances))):
  tokenized_human.append(' '.join(word_tokenize(utterances[utt_idx])))

In [None]:
save(tokenized_human, "tokenized.txt")

# Load Files

In [7]:
utterances_human = []
with open("post-pruned.txt", "r") as f:
	lines = f.readlines()
	lines = '\n'.join(lines)
	utterances_human = lines.split('\n------\n')
print(len(utterances_human))

194315


# AI-generated dataset

Using GPT-2 from huggingface, we generate separate sub-datasets representing:
- Create
- Reply
In a 50% - 50% ratio

In [11]:
import tqdm
import openai
import random
import os

openai.api_key="API_KEY"
os.environ['AZURE_OPENAI_ENDPOINT']="API_ENDPOINT"
openai.api_type = "azure"
openai.api_version = "API_DATE"

utterances_ai_reply = []

# generate replies
msgs = []
for utterance_human_idx in tqdm.tqdm(range(2000)):
  # try-except as a cheeky way to continue generation if API call does not work
  while True:
    try:
      # 0-shot reply generation
      txt = f"""Generate a reply to this Reddit Post:
{utterances_human[int(random.random() * len(utterances_human))]}
"""
      # generate
      res = openai.chat.completions.create(
          model= "MODEL_NAME",
          messages=[
              {
                  "role": "user",
                  "content": txt
              }
          ],
          temperature=0.4,
          max_tokens=256,
          presence_penalty=-0.5
      )
      res = res.choices[0].message.content

      # prevent too many posts created that are actually just the AI aplogising
      if res == None or "I'm sorry, but I" in res:
        raise Exception("I'm sorry")

      utterances_ai_reply.append(res)
      break
    except Exception as e:
      print(f"Exception: {e}")


  0%|          | 8/2000 [00:09<40:07,  1.21s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


  1%|          | 14/2000 [00:23<1:06:44,  2.02s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


  4%|▎         | 71/2000 [01:45<42:08,  1.31s/it]  

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


  5%|▌         | 100/2000 [02:35<1:00:33,  1.91s/it]

Exception: 
Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


  7%|▋         | 147/2000 [03:45<41:33,  1.35s/it]  

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


  8%|▊         | 154/2000 [03:55<36:55,  1.20s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 10%|█         | 202/2000 [05:11<46:21,  1.55s/it]  

Exception: 


 11%|█         | 223/2000 [05:47<55:18,  1.87s/it]  

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 14%|█▍        | 287/2000 [07:30<39:26,  1.38s/it]  

Exception: 


 14%|█▍        | 289/2000 [07:34<43:44,  1.53s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 14%|█▍        | 290/2000 [07:38<1:02:33,  2.20s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 16%|█▌        | 320/2000 [08:22<39:54,  1.43s/it]  

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 19%|█▉        | 386/2000 [10:14<1:09:08,  2.57s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 21%|██        | 417/2000 [11:06<33:12,  1.26s/it]  

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 21%|██        | 423/2000 [11:19<53:07,  2.02s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 25%|██▍       | 495/2000 [13:12<38:13,  1.52s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 25%|██▌       | 508/2000 [13:32<35:42,  1.44s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 26%|██▌       | 519/2000 [13:48<35:56,  1.46s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 27%|██▋       | 546/2000 [14:37<55:33,  2.29s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 28%|██▊       | 559/2000 [14:59<32:04,  1.34s/it]  

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 29%|██▊       | 573/2000 [15:26<37:56,  1.60s/it]  

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 29%|██▉       | 582/2000 [15:39<37:03,  1.57s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 32%|███▏      | 646/2000 [17:10<28:55,  1.28s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 33%|███▎      | 666/2000 [17:41<29:00,  1.30s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 34%|███▍      | 679/2000 [18:00<27:39,  1.26s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 35%|███▍      | 695/2000 [18:30<42:15,  1.94s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 35%|███▌      | 709/2000 [18:49<25:57,  1.21s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 38%|███▊      | 753/2000 [20:01<35:28,  1.71s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 40%|███▉      | 791/2000 [21:08<34:56,  1.73s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 41%|████      | 816/2000 [21:58<39:09,  1.98s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 42%|████▏     | 843/2000 [22:47<26:11,  1.36s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 44%|████▎     | 871/2000 [23:37<42:23,  2.25s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 45%|████▌     | 905/2000 [24:38<26:22,  1.45s/it]  

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 51%|█████     | 1017/2000 [27:38<20:09,  1.23s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}
Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 52%|█████▏    | 1048/2000 [28:44<34:31,  2.18s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 53%|█████▎    | 1057/2000 [28:59<22:55,  1.46s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 55%|█████▍    | 1096/2000 [29:58<19:29,  1.29s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 55%|█████▌    | 1103/2000 [30:08<23:33,  1.58s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 55%|█████▌    | 1108/2000 [30:16<21:41,  1.46s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 56%|█████▌    | 1114/2000 [30:27<22:30,  1.52s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 56%|█████▌    | 1121/2000 [30:44<38:23,  2.62s/it]

Exception: 


 60%|█████▉    | 1198/2000 [32:56<22:11,  1.66s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 60%|██████    | 1202/2000 [33:02<17:20,  1.30s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 60%|██████    | 1205/2000 [33:09<23:04,  1.74s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 60%|██████    | 1208/2000 [33:15<24:54,  1.89s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 61%|██████▏   | 1226/2000 [33:53<29:44,  2.31s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 61%|██████▏   | 1227/2000 [33:55<27:35,  2.14s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 63%|██████▎   | 1251/2000 [34:35<18:10,  1.46s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 63%|██████▎   | 1253/2000 [34:38<18:26,  1.48s/it]

Exception: 


 64%|██████▍   | 1283/2000 [35:24<16:34,  1.39s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 65%|██████▍   | 1291/2000 [35:43<28:26,  2.41s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 65%|██████▍   | 1292/2000 [35:45<26:50,  2.27s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 65%|██████▌   | 1301/2000 [35:58<18:32,  1.59s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 65%|██████▌   | 1305/2000 [36:06<22:02,  1.90s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 67%|██████▋   | 1337/2000 [36:55<14:38,  1.33s/it]

Exception: 


 67%|██████▋   | 1344/2000 [37:07<18:49,  1.72s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}
Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 67%|██████▋   | 1346/2000 [37:14<27:32,  2.53s/it]

Exception: 


 68%|██████▊   | 1355/2000 [37:28<14:57,  1.39s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 75%|███████▍  | 1491/2000 [41:24<16:34,  1.95s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 76%|███████▌  | 1513/2000 [42:04<14:34,  1.79s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 78%|███████▊  | 1569/2000 [43:35<10:20,  1.44s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 79%|███████▉  | 1580/2000 [43:55<13:50,  1.98s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 82%|████████▏ | 1648/2000 [45:45<07:55,  1.35s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 85%|████████▍ | 1697/2000 [47:01<09:01,  1.79s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 85%|████████▌ | 1702/2000 [47:10<09:07,  1.84s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 88%|████████▊ | 1764/2000 [49:53<1:12:27, 18.42s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 90%|████████▉ | 1796/2000 [50:52<07:11,  2.11s/it]  

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 91%|█████████ | 1816/2000 [51:24<03:50,  1.25s/it]

Exception: 


 93%|█████████▎| 1858/2000 [52:31<03:45,  1.59s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 94%|█████████▎| 1874/2000 [52:59<03:51,  1.84s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 95%|█████████▍| 1899/2000 [53:41<02:48,  1.67s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 95%|█████████▌| 1904/2000 [53:51<02:39,  1.66s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 95%|█████████▌| 1905/2000 [53:53<02:44,  1.73s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 97%|█████████▋| 1935/2000 [54:42<01:59,  1.84s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 97%|█████████▋| 1937/2000 [54:47<02:08,  2.04s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}
Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 98%|█████████▊| 1956/2000 [55:21<00:54,  1.24s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


 99%|█████████▉| 1988/2000 [56:11<00:17,  1.44s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


100%|█████████▉| 1999/2000 [56:32<00:02,  2.35s/it]

Exception: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400}}


100%|██████████| 2000/2000 [56:35<00:00,  1.70s/it]


In [14]:
save(utterances_ai_reply, "ai_reply.txt")

In [None]:
import tqdm
import random
import openai
from openai import OpenAI
from typing import cast

def add_utterance(txt, utterance):
  return f"""
  ### START POST ###
  {utterances_human[int(random.random() * len(utterances_human))]}
  ### END POST ###
  """ + txt

client = OpenAI(api_key="API_KEY")
utterances_ai_create = []

# generate creates
for utterance_human_idx in tqdm.tqdm(range(2000)):
  # 3-shot creation of a new Reddit Post based on other Reddit Posts
  txt = """
### START POST ###
"""

  for i in range(3):
    txt = add_utterance(txt, utterances_human[int(random.random() * len(utterances_human))])

  # try-except as a cheeky way to continue generation if API call does not work
  while True:
    try:
      # generate
      res = client.chat.completions.create(
          model= "gpt-35-turbo-16k",
          messages=[
              {
                  "role": "user",
                  "content": txt
              }
          ],
          temperature=0.6,
          max_tokens=256,
          presence_penalty=0.5
      )
      res = res.choices[0].message.content

      # prevent too many posts created that are actually just the AI aplogising
      if res == None or "I'm sorry, but I" in res:
        raise Exception()

      utterances_ai_create.append(res)
      break
    except:
      continue

In [None]:
save(utterances_ai_create, "ai_create.txt")