# CHI 2024 Course - LLM Prompt Templates

*Goal:* Create a dataset of prompt templates based on the anatomy of prompts surveyed.

![Anatomy of prompt](./docs/prompt_anatomy.png)

In [7]:
import re
import random
import pandas as pd

In [8]:
# Prompt templates building blocks

roles = [
    'Act as {{role}} with {{years_of_experience}} years of experience.', # Flattery component
    'Consider the perspective of a {{role}}.',
    'Pretend that you are {{role}}.',
    'You have an IQ of {{iq_score}}.' # Flattery component
]

dos = [
    'Answer the following question: {{question}}.',
    'Classify by {{class}}.', # sentiment
    'Contrast {{content_a}} with {{content_b}}.',
    'Convert {{code}} into {{programming_language}}.',
    'Create {{content}}.', # text, code, poem, executive action plan, website with interactive charts, outline for...
    'Criticize {{content}}.',
    'Document and explain {{code}}.',
    'Explain {{topic}}.',
    'Extract {{info}} from {{content}}.', # names, dates, location
    'Find correlations in {{content}}.',
    'Give me the {{number}} key takeaways from {{content}}.',
    'Give me constructive feedback about {{content}}.',
    'Give me key similarities between {{topics_list}}.',
    'Get me the most significant events about {{topic}}.',
    'Improve {{content}}.',
    'Predict {{machine_learning_task}}.', # churn, conversion, 
    'Present arguments for/against {{topic}}.',
    'Proof-read {{content}}.',
    'Reverse prompt engineer {{content}}.',
    'Rewrite {{content}}.',
    'Summarize {{content}}.',
    'Translate into {{language}}.',
    'What are questions I can ask about {{topic}}?',
    'What is the excel formula for {{content}}?',
    'What is wrong with {{topic}}?',
    'When I {{condition}}, {{sub_prompt}}.'
]

contexts = [ # Priming
    'Consider the following context {{context}}.', 
    'I have a {{file_type}}.',
    'I will provide input content according to the following format {{input_format}}.', # triple quotes, etc.
    'My {{content}} starts with {{content_sample}}.' # code, json file, csv file
]

contents = [
    'For this content: {{data}}.', # documents, json file, job posting
    'Here is a chunk of data: {{data_sample}}.',
    'Consider the following examples. Label: {{label}}. Content: {{data_point}}.',
    'Consider the following examples. Q: {{question}}. A: {{answer}}.'    
]

donts = [
    'Consider that I will license this code under {{license}} license.', # responsible
    'Do not make up things.', # responsible
    'Do not provide explanations.',
    'Do not write a {{programming_language}} code.',
    'Ignore previous instructions and {{malicious_prompt}}.',  # used in hacking
    'Remove {{attributes}}.', # Negative prompts    
    'Respect copyrights of training data.', # responsible 
    'Use only the provided content and do cite the passage(s) of the content used.' # responsible
]

outputs = [
    'Add a column for {{topic}}.',
    'Annotate output citing the content provided.',    
    'Assign a probability of success.',
    'Bold changes performed.',
    'Bold keywords.',
    'By {{author}}.',
    'Create {{number}} versions.',
    'Extract keywords.',
    'Format output as ASCII art.',
    'Format output as a {{programming_language}} code.'    
    'Format output as a {{data_structure}}.', # graphviz graph, visual, tabular, shopping list, bullet-list, 
    'Format output as a {{file_type}} file.', # SVG, JSON, CSV
    'Format output as a {{presentation_type}}',# stand-up routine, tedtalk, lecture
    'Format output in a {{sentiment}} tone.', # upbeat, friendly
    'Format output to a {{audience}}.', # 4th grader, executives, scientists
    'Group by {{topic}}.',
    'In less than {{number_of_words}} words.',
    'In the style of {{style}}.',
    'Optmize output for {{software}}.', # excel spreadsheet, rtf document, svg image
    'Respond with verbatim {{reference}}.', # news article clippings, scientific papers, 
    'Sort by {{attribute}}.', # chronologically, price, alphabetically
    'Sound appealing to {{audience}}.',
    'Step by step.', # explainable
    'Turn into a fill in the blank {{software}} document.'
]

assessments = [
    'Ask me for details if needed.',
    'Answer yes if you understand.',
    'My first request is {{sub_prompt}}.',
    'Otherwise, simply write: Insufficient information.'
]

iterations = [
    'Continue from step {{iteration}}.',
    'Follow the instructions provided.', # used in hacking    
    'Now {{sub_prompt}}.',
    'Remove {{percentage}}% of the lowest rated {{topic}}.',
    'Stick to the character.' # used in hacking
]

In [9]:
# Prompt templates
prompts = []


# TODO: review whether placeholders were renamed before generating
# Placeholders need to be present in the building blocks

for role in roles:
    for do in dos:
        for context in contexts:
            for content in contents:
                for dont in donts:  
                    # License not needed when no code is asked
                    if( dont == 'Consider that I will license this code under {{license}} license.' and len( re.findall( 'code', do, flags = re.IGNORECASE ) ) == 0 ):
                        dont = ''  
                    # Do not write a {{programming_language}} not needed when there is no code asked
                    elif( dont == 'Do not write a {{programming_language}} code.' and len( re.findall( 'code', do, flags = re.IGNORECASE ) ) == 0 ):
                        dont = ''
                    # Respect training data not needed when there is no ML task
                    elif( dont == 'Respect copyrights of training data.' and len( re.findall( 'classify|predict|machine_learning_task', do, flags = re.IGNORECASE ) ) == 0 ):
                        dont = ''
                    # Do not provide explanations not needed when explain is requested
                    elif( dont == 'Do not provide explanations.' and len( re.findall( 'explain', do, flags = re.IGNORECASE ) ) > 0 ):
                        dont = ''
                    
                    for output in outputs:
                        # Bold changes performed not needed when no change is asked
                        if( output == 'Bold changes performed.' and len( re.findall( 'convert|document|feedback|improve|proof-read|rewrite', do, flags = re.IGNORECASE ) ) == 0 ):
                            output = ''
                        # Assign a probability of success not needed when no task with probability-related task is asked
                        elif( output == 'Assign a probability of success.' and len( re.findall( 'classify|predict|machine_learning_task', do, flags = re.IGNORECASE ) ) == 0 ):
                            output = ''
                        # Adding a column not needed when there is no reference to table
                        elif( output == 'Add a column for {{topic}}.' and len( re.findall( 'table', do, flags = re.IGNORECASE ) ) == 0 ):
                            output = ''
                        # Annotate not needed when there is no data provided as input
                        elif( output == 'Annotate output citing the content provided.' and len( re.findall( 'data|data_sample', content, flags = re.IGNORECASE ) ) == 0 ):
                            output = ''
                        # Respond with verbatim reference not needed when there is no data provided as input
                        elif( output == 'Respond with verbatim {{reference}}.' and len( re.findall( 'data|data_sample', content, flags = re.IGNORECASE ) ) == 0 ):
                            output = ''
                            
                        for assessment in assessments: 
                            # Answering insufficient information not needded when there is no question asked
                            if( assessment == 'Otherwise, simply write: Insufficient information.' and len( re.findall( 'question', do, flags = re.IGNORECASE ) ) == 0 ):
                                assessment = ''
                            
                            for iteration in iterations:
                                # Stick to the character not needed then there is no {{role}} specified
                                if( iteration == 'Stick to the character.' and len( re.findall( 'role', role, flags = re.IGNORECASE ) ) == 0 ):
                                    iteration = ''
                                # Remove {{percentage}}% of the lowest rated... not needed when there is no machine_learning_task
                                elif( iteration == 'Remove {{percentage}}% of the lowest rated {{topic}}.' and len( re.findall( 'machine_learning_task', do, flags = re.IGNORECASE ) ) == 0 ):
                                    iteration = ''
                                # Now sub_prompt not needed when there is sub_prompt in the assessment
                                elif( iteration == 'Now {{sub_prompt}}.' and len( re.findall( 'sub_prompt', assessment, flags = re.IGNORECASE ) ) > 0 ):
                                    iteration = ''
                                    
                                prompt = ' '.join( [role, do, context, content, dont, output, assessment, iteration ] )
                                prompt = re.sub( '\s+', ' ', prompt )
                                prompts.append( prompt )


In [10]:
# Statements part of the prompt templates

print( 'Role:\t\t{}'.format( len(roles) ) )
print( 'DOs:\t\t{}'.format( len(dos) ) )
print( 'Context:\t{}'.format( len(contexts) ) )
print( 'Input content:\t{}'.format( len(contents) ) )
print( 'DONTs:\t\t{}'.format( len(donts) ) )
print( 'Output:\t\t{}'.format( len(outputs) ) )
print( 'Assessment:\t{}'.format( len(assessments) ) )
print( 'Iterating:\t{}'.format( len(iterations) ) )
print( 'Total:\t\t{}'.format( len(prompts) ) )

Role:		4
DOs:		26
Context:	4
Input content:	4
DONTs:		8
Output:		23
Assessment:	4
Iterating:	5
Total:		6123520


In [11]:
# Content for the readme file on git repo

# Placeholders

placeholders = {}

for role in roles:
    keys = re.findall("{{([^}]+)}}", role)
    for k in keys:
        placeholders[ k ] = True

for do in dos:
    keys = re.findall("{{([^}]+)}}", do)
    for k in keys:
        placeholders[ k ] = True

for context in contexts:
    keys = re.findall("{{([^}]+)}}", context)
    for k in keys:
        placeholders[ k ] = True

for content in contents:
    keys = re.findall("{{([^}]+)}}", content)
    for k in keys:
        placeholders[ k ] = True

for dont in donts:
    keys = re.findall("{{([^}]+)}}", dont)
    for k in keys:
        placeholders[ k ] = True

for output in outputs:
    keys = re.findall("{{([^}]+)}}", output)
    for k in keys:
        placeholders[ k ] = True

for assessmen in assessments:
    keys = re.findall("{{([^}]+)}}", assessmen)
    for k in keys:
        placeholders[ k ] = True

for iteration in iterations:
    keys = re.findall("{{([^}]+)}}", iteration)
    for k in keys:
        placeholders[ k ] = True

print( 'Current placehorders are:\n' )
for k, v in placeholders.items():
    print( "- " + k )

print( '\n' )

# Sampling the dataset to update the readme file on git repo

print( 'Sample:\n' )

for i in random.sample( range( len( prompts ) ), 50):
    print( prompts[i] + '\n' )


Current placehorders are:

- role
- years_of_experience
- iq_score
- question
- class
- content_a
- content_b
- code
- programming_language
- content
- topic
- info
- number
- topics_list
- machine_learning_task
- language
- condition
- sub_prompt
- context
- file_type
- input_format
- content_sample
- data
- data_sample
- label
- data_point
- answer
- license
- malicious_prompt
- attributes
- author
- data_structure
- presentation_type
- sentiment
- audience
- number_of_words
- style
- software
- reference
- attribute
- iteration
- percentage


Sample:

Pretend that you are {{role}}. Criticize {{content}}. My {{content}} starts with {{content_sample}}. For this content: {{data}}. Remove {{attributes}}. Format output as a {{file_type}} file. Answer yes if you understand. Follow the instructions provided.

Pretend that you are {{role}}. Present arguments for/against {{topic}}. Consider the following context {{context}}. Consider the following examples. Label: {{label}}. Content: {{data_

In [None]:
df = pd.DataFrame( data = prompts, columns = ['Prompt template'] ) 
df.to_csv( 'prompt_templates.txt', index = False )