In [None]:
# !python3.8 -m pip install openai

In [None]:
import openai
import pandas as pd
from openai.error import RateLimitError, ServiceUnavailableError, APIError
from tqdm.notebook import tqdm
import time
import sys
import logging

In [None]:
logger = logging.getLogger(__name__)
file_handler = logging.FileHandler('api_requests.log')
formatter = logging.Formatter(
    '%(asctime)s: %(message)s'
)
file_handler.setFormatter(formatter)
logger.setLevel(logging.INFO)
logger.addHandler(file_handler)

In [None]:
logger.info("{} | {} | {} | {}".format('sent_text', 'responsed_text', 'tokens_spent', 'total_tokens_spent'))

In [None]:
with open('token.txt') as f:
    openai.api_key = f.read()

In [None]:
with open('organisation_key.txt') as f:
    openai.organization = f.read()

In [None]:
# openai.Engine.list()

In [None]:
dataset = pd.read_csv('cleaned_prompts.tsv', sep='\t')
dataset = dataset[~dataset.content.isna()]
dataset = dataset[~dataset.content.duplicated()].reset_index(drop=True)

In [None]:
def generate_description(prompt):
    return """
You will be given prompt that was used for image generation in Stable Diffusion discord channel. These prompts contain a lot of keywords like "high resoultion", "4k", "3d rendering", etc which do not describe some instance on the picture. You have to write an image description based on this prompt. It should be only description of general object on the picture without any extended details, just description of an instance on picture. If you can’t distinguish general object (e.g., there is only concept or incomprehensible text in the prompt or just random characters like "vnj vdf asdo 12"), you have to answer “_no_object”. If the prompt already contains image description, just return it. You need to only do this thing and return image description without trying to write anything else.
Examples:

Prompt:city, detailed, 8 k, ultra - detailed,
Asnwer:a city

Prompt:city night photo like in 1 9 9 1 year very realistic _ high detailed
Answer:a night city

Prompt:a poster of fifa world cup 2022
Asnwer:a poster of fifa world cup 2022

Prompt:moon by romero britto and picasso
Asnwer:a moon

Prompt:a photorealistic image of a cat, trending on artstation, cinematic lightning
Answer:a cat

Prompt:a dog
Answer:a dog

Prompt:surprise me
Answer:_no_object

Prompt:big city, at night, bright moon, renaissance portrait, higt realist, gradient colors, buildings with bright lights, people walking, bright colors
Answer:a big city at night with bright moon, buildings with bright lights, people walking

Prompt:com
Answer:_no_object

Prompt:lions, savanna, 38 mm film, photo
Answer:lions in savanna

Prompt:portrait of a black man, highly detailed, hyper realistic, cinematic, 8k, afro culture, love, peace, soul, resilient, dreams, snakes
Answer:a black man

Prompt:{}
Answer:""".format(prompt)

In [None]:
total_tokens = 0

In [None]:
prompts_to_descr = dict()

In [None]:
it = iter(tqdm(dataset.content.values))
flag = True
while True:
    try:
        if flag:
            string = next(it)
        if string in prompts_to_descr:
            continue
        response = openai.Completion.create(
            model="text-davinci-003",
            prompt=generate_description(string),
            temperature=0.5,
            max_tokens=2300
        )
        image_description = response['choices'][0]['text']
        tokens = response['usage']['total_tokens']
        total_tokens += tokens
        prompts_to_descr[string] = image_description
        logger.info("{} | {} | {} | {}".format(string, image_description, str(tokens), str(total_tokens)))
        print("total tokens: {}, money spent: ${}".format(total_tokens, total_tokens/1000*0.02))
        flag = True
    except (RateLimitError, ServiceUnavailableError, APIError):
        flag = False
        time.sleep(1.0)
        continue
    except StopIteration:
        break

In [None]:
prompts = []
image_descriptions = []
for prompt in prompts_to_descr:
    img_descr = prompts_to_descr[prompt]
    prompts.append(prompt)
    image_descriptions.append(img_descr)
res_df = pd.DataFrame({'prompt': prompts, 'image_description': image_descriptions})
res_df.to_csv('model_answers.csv', index=False)
res_df

In [None]:
res_df[res_df.image_description=='_no_object'].to_csv('only_with_description.csv', index=False)