In [12]:
import pandas as pd
from langchain.llms.huggingface_pipeline import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from dotenv import dotenv_values
import sys
sys.path.insert(0,'/workspaces/RAG_secure_code_generation/src')
from utils.utils import load_yaml, init_argument_parser, sanitize_output, fill_default_parameters
from langchain.prompts import (
    ChatPromptTemplate, PromptTemplate
)
from utils.openai_utils import is_openai_model, build_chat_model
from langchain.chat_models import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
import random
import numpy as np
from functools import partial
from typing import List
from langchain.embeddings import OpenAIEmbeddings
from langchain.output_parsers import CommaSeparatedListOutputParser


from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loaders import WebBaseLoader
from utils.custom_grobid_parser import CustomGrobidParser
from langchain.docstore.document import Document
from langchain_core.embeddings import Embeddings
import bs4
from langchain_core.runnables import RunnablePassthrough
from utils.rag_utils import build_scientific_papers_loader, build_documents_retriever, format_docs, build_web_page_loader
from utils.openai_utils import is_openai_model, build_chat_model
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import (
    SystemMessagePromptTemplate,
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
)
from pydantic.v1 import Field ,BaseModel, validator

In [13]:
seed = 156
np.random.seed(seed)
random.seed(seed)

In [14]:
model_name = "gpt-3.5-turbo-0613"


In [15]:
template_file = "../data/templates/create_synthetic_dataset.yaml"
task_file = "../data/tasks/detect_xss_simple_prompt.txt"
parameters_file = "../data/prompt_parameters/syn_xss_100.yaml"

In [16]:
env = dotenv_values()

In [17]:
openai_key = env['OPENAI_API_KEY']
model = ChatOpenAI(temperature=0, openai_api_key=openai_key, model=model_name)
embeddings = OpenAIEmbeddings(api_key = openai_key)

In [18]:
template = load_yaml(template_file)
    # load parameters
prompt_parameters = load_yaml(parameters_file)

#read txt containing the task
with open(task_file) as f:
    prompt_parameters["input"] = f.read()
prompt_parameters = fill_default_parameters(prompt_parameters, template["default_parameters"])
use_openai_api = is_openai_model(model_name)
openai_key = env['OPENAI_API_KEY']
model = ChatOpenAI(temperature=0, openai_api_key=openai_key, model=model_name)
embeddings = OpenAIEmbeddings(api_key = openai_key)

In [19]:
class XSS_row(BaseModel):
    Payloads: str = Field(description="a string representing an http get request with payload")
    Class: str = Field(description="a string representing the class of the http get request, it is Malicious if the http get request contains an xss attack, otherwise it is Benign")

In [20]:
output_parser = PydanticOutputParser(pydantic_object=XSS_row)
format_instructions = output_parser.get_format_instructions()
format_instructions

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"Payloads": {"title": "Payloads", "description": "a string representing an http get request with payload", "type": "string"}, "Class": {"title": "Class", "description": "a string representing the class of the http get request, it is Malicious if the http get request contains an xss attack, otherwise it is Benign", "type": "string"}}, "required": ["Payloads", "Class"]}\n```'

In [21]:
prompt = ChatPromptTemplate(
    messages=[
        SystemMessagePromptTemplate.from_template(template['input']),
        HumanMessagePromptTemplate.from_template("{input}")  
    ],
    input_variables=["input"],
    partial_variables={"format_instructions": format_instructions}
)

In [22]:
prompt_simple = "Generate a synthetic dataset of 5 http get request containing an XSS (Malicious) and 5 http get request not containing an XSS (Benign)."
prompt = ChatPromptTemplate(
    messages=[
        HumanMessagePromptTemplate.from_template("{input}")  
    ],
    input_variables=["input"],
    partial_variables={"format_instructions": format_instructions}
)

In [23]:
chain = prompt | model | output_parser



In [24]:
print(prompt.format(input=prompt_simple))


Human: Generate a synthetic dataset of 5 http get request containing an XSS (Malicious) and 5 http get request not containing an XSS (Benign).


In [25]:
response = chain.invoke(prompt_parameters)

OutputParserException: Invalid json output: import re

# Check for common XSS patterns
xss_patterns = [
    r"<script.*?>.*?</script>",
    r"<.*?on\w+=.*?>",
    r"javascript:.*?;",
    r"alert\(.*?\)",
    r"document\.cookie",
    r"eval\(.*?\)",
    r"window\.location",
    r"window\.open",
    r"window\.document",
    r"window\.history",
    r"window\.navigator",
    r"window\.screen",
    r"window\.frames",
    r"window\.parent",
    r"window\.top",
    r"window\.self",
    r"window\.opener",
    r"window\.frameElement",
    r"window\.defaultStatus",
    r"window\.status",
    r"window\.name",
    r"window\.innerHeight",
    r"window\.innerWidth",
    r"window\.outerHeight",
    r"window\.outerWidth",
    r"window\.screenX",
    r"window\.screenY",
    r"window\.pageXOffset",
    r"window\.pageYOffset",
    r"window\.scrollX",
    r"window\.scrollY",
    r"window\.scrollTo",
    r"window\.scrollBy",
    r"window\.moveTo",
    r"window\.moveBy",
    r"window\.resizeTo",
    r"window\.resizeBy",
    r"window\.openDatabase",
    r"window\.indexedDB",
    r"window\.sessionStorage",
    r"window\.localStorage",
    r"window\.globalStorage",
    r"window\.openDialog",
    r"window\.showModalDialog",
    r"window\.alert",
    r"window\.confirm",
    r"window\.prompt",
    r"window\.print",
    r"window\.find",
    r"window\.open",
    r"window\.close",
    r"window\.postMessage",
    r"window\.blur",
    r"window\.focus",
    r"window\.scroll",
    r"window\.stop",
    r"window\.setInterval",
    r"window\.setTimeout",
    r"window\.clearInterval",
    r"window\.clearTimeout",
    r"window\.requestAnimationFrame",
    r"window\.cancelAnimationFrame",
    r"window\.getComputedStyle",
    r"window\.getSelection",
    r"window\.matchMedia",
    r"window\.requestFileSystem",
    r"window\.resolveLocalFileSystemURL",
    r"window\.openDatabaseSync",
    r"window\.crypto",
    r"window\.speechSynthesis",
    r"window\.webkitNotifications",
    r"window\.webkitRequestAnimationFrame",
    r"window\.webkitCancelAnimationFrame",
    r"window\.webkitCancelRequestAnimationFrame",
    r"window\.webkitIndexedDB",
    r"window\.webkitStorageInfo",
    r"window\.webkitRequestFileSystem",
    r"window\.webkitResolveLocalFileSystemURL",
    r"window\.webkitRequestFileSystemSync",
    r"window\.webkitResolveLocalFileSystemSyncURL",
    r"window\.webkitSpeechSynthesis",
    r"window\.webkitURL",
    r"window\.webkitRTCPeerConnection",
    r"window\.webkitMediaStream",
    r"window\.webkitRTCIceCandidate",
    r"window\.webkitRTCSessionDescription",
    r"window\.webkitRTCPeerConnectionIceEvent",
    r"window\.webkitRTCPeerConnectionIceCandidateEvent",
    r"window\.webkitMediaStreamEvent",
    r"window\.webkitSpeechRecognition",
    r"window\.webkitSpeechGrammarList",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError",
    r"window\.webkitSpeechRecognitionEvent",
    r"window\.webkitSpeechRecognitionResult",
    r"window\.webkitSpeechRecognitionAlternative",
    r"window\.webkitSpeechRecognitionError

In [None]:
response

AIMessage(content='Dataset:\n\nMalicious examples:\n1. http_get_request = "<script>alert(\'XSS\')</script>"\n2. http_get_request = "<img src=x onerror=alert(\'XSS\')>"\n3. http_get_request = "<a href=\'javascript:alert(`XSS`)\'>Click me</a>"\n4. http_get_request = "<img src=\'https://example.com/image.jpg\' onerror=\'javascript:alert(`XSS`)\'>"\n5. http_get_request = "<svg onload=\'javascript:alert(`XSS`)\'></svg>"\n\nBenign examples:\n1. http_get_request = "https://example.com"\n2. http_get_request = "https://example.com/page"\n3. http_get_request = "https://example.com/?param=value"\n4. http_get_request = "https://example.com/?param1=value1&param2=value2"\n5. http_get_request = "https://example.com/page?param=value"')