In [None]:
from typing import List
from haystack import component, Pipeline
from pydantic import Json
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import torch


_CITATION = """
@misc{deberta-v3-base-prompt-injection-v2,
  author = {ProtectAI.com},
  title = {Fine-Tuned DeBERTa-v3-base for Prompt Injection Detection},
  year = {2024},
  publisher = {HuggingFace},
  url = {https://huggingface.co/ProtectAI/deberta-v3-base-prompt-injection-v2},
}
"""


@component
class DetectPromptInjector:
    """
    A component to detect the presence of malicious prompts
    """

    @component.output_types(safe=float, injection=float)
    def run(self, prompt_input: str):
        tokenizer = AutoTokenizer.from_pretrained(
            "ProtectAI/deberta-v3-base-prompt-injection-v2"
        )
        model = AutoModelForSequenceClassification.from_pretrained(
            "ProtectAI/deberta-v3-base-prompt-injection-v2"
        )
        classifier = pipeline(
            "text-classification",
            model=model,
            tokenizer=tokenizer,
            truncation=True,
            max_length=512,
            device=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        )

        result = classifier(prompt_input)

        label, val = (
            result[0]["label"],
            result[0]["score"],
        )

        if label == "SAFE":
            return {
                "safe": val,
                "injection": 1 - val,
            }

        else:
            return {
                "safe": 1 - val,
                "injection": val,
            }


text_pipeline = Pipeline()
text_pipeline.add_component(
    name="detect_prompt_injector", instance=DetectPromptInjector()
)


result = text_pipeline.run(
    {
        "detect_prompt_injector": {
            "prompt_input": "I like you. I love you"
        }
    }
)

print(result)