In [27]:
from opengpt.prompt_utils import add_to_prompt_database
from opengpt.config import Config
import hashlib
import json

In [3]:
config = Config(yaml_path='../configs/example_config_for_detaset_creation.yaml')

## How to add prompts into the prompt database

Here we will add three prompts to the prompt database, the same template can be used to add any prompt. 

Each prompt requires three fields:

- `text` - The text of the prompt that will be sent to OpenAI (ChatGPT, or GPT-4, or any other model that can be used to create a dataset). The prompt needs to have at least two placeholders. First, `context` - the content of a document that will be used to generate question/answer pairs, or a conversation, or anything else. And second, `language` in what language do we want the output to be.

- `description` - A short description of what is this prompt for and how to use it.

- `parser` - The parser to be used to parse the output from a Teacher (e.g. OpenAI, Google).

Notes:
- When using the `csv_*` parsers the separator ";" has to be used.

In [None]:
# Check what is already in the prompt DB, use existing prompts or add new ones 
if os.path.exists(config.path.prompt_db):
    db = json.load(open(config.path.prompt_db))
    for prompt in db:
        print('Description: ', prompt['description'])
        print('Hash: ', prompt['hash'])
        print('Parser: ', prompt['parser'])
        print('Text: ', prompt['text'])
        print("*"*100)
        print()

In [11]:
text = '''Given the following context: {context}

----------------
Create a conversation between a Patient concerned about the symptoms and disorders presented above and an AI-Assistant. The conversation should abide by the following rules and instructions:
1. The output should be in {language}.
2. All questions and answers have to be strictly based on the context provided above.
3. The questions should be diverse and cover different aspects of the context provided above.
4. When generating the conversation please always use the English placeholders "Patient" for the Patient and "AI-Assistant" for the AI-Assistant, irrelevant of the language.
5. The conversation should always be started by the Patient.
6. The patient should start the conversation with a "Hi,".
7. The AI-Assistant should be helpful, and supportive and try to not scare the patient. The answers have be to detailed and self-contained.'''
description = '''This will generate a conversation between a Patient and an AI assistant in the specified languages. The only argument for this prompt is the {language} argument, if not provided it will default to English.'''
parser = 'medical_conversation_parser'

In [12]:
db = add_to_prompt_database(text, 
                            description, 
                            parser, 
                            config.path.prompt_db, 
                            force_replace=False)



In [13]:
text = '''Given the following context: {context}

----------------
Create {quantity} detailed question-answer pairs from the context above, the questions are asked by a curious user and the answers are by a helpful AI-Assistant. The question-answer pairs should abide by the following rules and instructions:
1. The output should be in {language}.
2. The output should be in CSV format, with the following header: ID;Question;Answer
3. All question/answer pairs have to be strictly based on the context provided above and be self-contained and independent.
4. The questions should be diverse and cover different aspects of the context provided above.
5. The answers should be long, extensive, detailed, informative, helpful and self-contained.'''
description = 'A general purpose prompt creating qustion answer pairs. This prompt takes two arguments {quantity} or how many q/a pairs to generate and {language}'
parser = 'csv_qa_parser'

In [14]:
db = add_to_prompt_database(text, 
                            description, 
                            parser, 
                            config.path.prompt_db, 
                            force_replace=False)



In [15]:
text = '''You are asked to come up with a set of {quantity} diverse task instructions in the field of medicine and healthcare. These task instructions will be given to a Medical GPT model and we will evaluate the Medical GPT model for completing the instructions.

Here are the requirements:
1. Try not to repeat the verb for each instruction to maximize diversity.
2. The language used for the instruction also should be diverse. For example, you should combine questions with imperative instructions.
3. The type of instructions should be diverse. The list should include diverse kinds of tasks like step-by-step reasoning, multiple-choice-questions, open-ended generation, classification, editing, complex medical questions, simple medical questions, etc.
4. A GPT language model should be able to complete the instruction. For example, do not ask the assistant to create any visual or audio output. For another example, do not ask the assistant to wake you up at 5pm or set a reminder because it cannot perform any action.
5. The instructions should be in {language}.
6. The instructions should be 1 to 4 sentences long. Either an imperative sentence or a question is permitted.
7. You should generate an appropriate input to the instruction. The input field should contain a specific example provided for the instruction. It should involve realistic data and should not contain simple placeholders. The input should provide substantial content to make the instruction challenging but should ideally not exceed 300 words.
8. Not all instructions require input. For example, when an instruction asks about some general information, "What is diabetes", it is not necessary to provide a specific context. In this case, we simply put "<noinput>" in the input field.
9. The output should be an appropriate response to the instruction and the input. It should ideally not exceed 400 words.
10. All generated output should use the metric system for measurements and UK names for medications, substances, drugs and everything else.

List of {quantity} tasks (every task has the following fields: Task:, Instruction:, Input:, Output:):'''
description = '''Generates high complexity various medical instruction-tasks'''
parser = None

In [16]:
db = add_to_prompt_database(text, 
                            description, 
                            parser, 
                            config.path.prompt_db, 
                            force_replace=False)



In [25]:
# Print the prompt db
for prompt in db:
    print('Description: ', prompt['description'])
    print('Hash: ', prompt['hash'])
    print('Parser: ', prompt['parser'])
    print('Text: ', prompt['text'])
    print("*"*100)
    print()

Description:  This will generate a conversation between a Patient and an AI assistant in the specified languages. The only argument for this prompt is the {language} argument, if not provided it will default to English.
Hash:  f4df95ec69
Parser:  medical_conversation_parser
Text:  Given the following context: {context}

----------------
Create a conversation between a Patient concerned about the symptoms and disorders presented above and an AI-Assistant. The conversation should abide by the following rules and instructions:
1. The output should be in {language}.
2. All questions and answers have to be strictly based on the context provided above.
3. The questions should be diverse and cover different aspects of the context provided above.
4. When generating the conversation please always use the English placeholders "Patient" for the Patient and "AI-Assistant" for the AI-Assistant, irrelevant of the language.
5. The conversation should always be started by the Patient.
6. The patient s