In [1]:
from langchain.chat_models import ChatOpenAI
from langchain import HuggingFaceHub
from langchain.prompts import (
    ChatPromptTemplate,
)
from langchain_core.output_parsers import StrOutputParser

In [2]:
from dotenv import dotenv_values
dotenv_values = dotenv_values()
hf_key = dotenv_values['HUGGINGFACEHUB_API_TOKEN']
openai_key = dotenv_values['OPENAI_API_KEY']

In [3]:
model = ChatOpenAI(temperature=0, openai_api_key=openai_key, model="gpt-4-1106-preview")
model.model_name


'gpt-4-1106-preview'

In [4]:
template = """The user will provide the initial part of the function (function name, parameters with types, return type and a comment describing the purpose of the function, with some optional example.
You are a coding assistant that writes some python code to complete the user's function. Be sure that the code is syntactically correct and that it returns the correct type. The lenght of the code should be less than {max_lines} lines.

Return only python code in Markdown format, e.g.:

```python
....
```"""

In [5]:
prompt = ChatPromptTemplate.from_messages([("system", template), ("human", "{input}")])


In [6]:
prompt

ChatPromptTemplate(input_variables=['input', 'max_lines'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['max_lines'], template="The user will provide the initial part of the function (function name, parameters with types, return type and a comment describing the purpose of the function, with some optional example.\nYou are a coding assistant that writes some python code to complete the user's function. Be sure that the code is syntactically correct and that it returns the correct type. The lenght of the code should be less than {max_lines} lines.\n\nReturn only python code in Markdown format, e.g.:\n\n```python\n....\n```")), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}'))])

In [6]:
def _sanitize_output(text: str):
    _, after = text.split("```python")
    return after.split("```")[0]

In [7]:
chain = prompt | model | StrOutputParser() | _sanitize_output

In [8]:
input = '''def detect_xss(http_get_request: str)->bool: 
""" Check if in the given http_get_request there is an XSS exploit, considering also the possible evasions that an attacker can perform.""" 
'''

In [9]:
prompt

ChatPromptTemplate(input_variables=['input', 'max_lines'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['max_lines'], template="The user will provide the initial part of the function (function name, parameters with types, return type and a comment describing the purpose of the function, with some optional example.\nYou are a coding assistant that writes some python code to complete the user's function. Be sure that the code is syntactically correct and that it returns the correct type. The lenght of the code should be less than {max_lines} lines.\n\nReturn only python code in Markdown format, e.g.:\n\n```python\n....\n```")), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}'))])

In [10]:
response = chain.invoke({"max_lines":99999,"input": input})

In [11]:
print(response)


import re
from html import unescape

def detect_xss(http_get_request: str) -> bool:
    """
    Check if in the given http_get_request there is an XSS exploit, considering also the possible evasions that an attacker can perform.
    
    Args:
    http_get_request (str): The HTTP GET request string to be checked for XSS.

    Returns:
    bool: True if an XSS exploit is detected, False otherwise.
    """
    
    # Unescape the request to handle HTML entity evasion
    http_get_request = unescape(http_get_request)
    
    # Patterns to detect XSS
    # These patterns are basic and may not cover all possible XSS vectors
    # It's recommended to use a comprehensive library for production systems
    xss_patterns = [
        r'<script.*?>.*?</script.*?>',  # Basic <script> tag injection
        r'javascript:[^"]*',            # JavaScript URI
        r'("|\')\s*on\w+\s*=',          # Event handlers
        r'(<[^>]+)\s+style\s*=\s*[^>]*?expression\([^>]*?\)',  # CSS expression
        