In [1]:
from probity.probing.datasets.templated import TemplatedDataset
from probity.probing.datasets.tokenized import TokenizedProbingDataset
from transformers import AutoTokenizer

# Create movie sentiment dataset
adjectives = {
    "positive": ["incredible", "amazing", "fantastic"],
    "negative": ["terrible", "awful", "horrible"]
}
verbs = {
    "positive": ["loved", "enjoyed", "adored"],
    "negative": ["hated", "disliked", "detested"]
}

# Create dataset using factory method
movie_dataset = TemplatedDataset.from_movie_sentiment_template(
    adjectives=adjectives,
    verbs=verbs
)

# Convert to probing dataset with automatic position finding
# and label mapping from sentiment metadata
probing_dataset = movie_dataset.to_probing_dataset(
    label_from_metadata="sentiment",
    label_map={"positive": 1, "negative": 0},
    auto_add_positions=True
)

# Convert to tokenized dataset
tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenized_dataset = TokenizedProbingDataset.from_probing_dataset(
    dataset=probing_dataset,
    tokenizer=tokenizer
)



Template: I thought this movie was {ADJ}, I {VERB} it.
Marker: {ADJ}
Regex: I\ thought\ this\ movie\ was\ (.*?),\ I\ \{VERB\}\ it\.
Text: I thought this movie was incredible, I loved it.


ValueError: Text does not match template: I thought this movie was incredible, I loved it.

In [None]:
# Or create a custom template
from probity.probing.datasets.templated import Template, TemplateVariable

# Create variables
subject = TemplateVariable(
    name="SUBJECT",
    values=["The cat", "The dog"],
    metadata={"entity_type": "animal"}
)

action = TemplateVariable(
    name="ACTION",
    values=["jumped over", "ran under"],
    metadata={"movement_type": "locomotion"}
)

object_var = TemplateVariable(
    name="OBJECT",
    values=["the fence", "the table"],
    metadata={"object_type": "barrier"}
)

# Create template
template = Template(
    template="{SUBJECT} {ACTION} {OBJECT}.",
    variables={
        "SUBJECT": subject,
        "ACTION": action,
        "OBJECT": object_var
    },
    metadata={"task": "entity_movement"}
)

# Create dataset
custom_dataset = TemplatedDataset(templates=[template])

# Convert to probing dataset
probing_dataset = custom_dataset.to_probing_dataset(auto_add_positions=True)