Prompt Markup Language Parser.
- PromptML is a simple markup language.
- It inserts control strings in common string, wrapped by
[]
, (e. g.[mask]
,[sep]
) - The control strings can have some options, seperated with string by
|
, (e. g.[title|upper]
,[text|lower]
) - The control options can be multiple, seperated by
,
, (e. g.[title|upper,rmpunt]
)
[cls]A [mask] news : [sent_0|lower,fix][sep|+]
- String:
cls
Control Options: {} - String:
A
Control Options: None - String:
mask
Control Options: {} - String:
news :
Control Options: None - String:
sent_0
Control Options: {lower
,fix
} - String:
sep
Control Options: {+
}
- String:
[cls]\\[ Topic : [mask] \\][sent_0][sep|+]
- String:
cls
Control Options: {} - String:
[ Topic :
Control Options: None - String:
mask
Control Options: {} - String:
]
Control Options: None - String:
sent_0
Control Options: {} - String:
sep
Control Options: {+
}
- String:
from promptml import PromptTemplate
from datasets import load_dataset
from transformers import AutoTokenizer
def main():
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased", use_fast=True)
template = PromptTemplate("[cls]A [mask] news : [text|limit][sep]", tokenizer)
res = template.render({"text": "hello world"}, max_length=20)
imdb = load_dataset("imdb")
imdb = template.render(imdb, max_length=128)
if __name__ == '__main__':
main()