In [4]:
from pydantic import BaseModel, ValidationError
import re
import json

# Define the Pydantic model based on the expected JSON structure
class AttackLog(BaseModel):
    category: str
    type: str
    origin: str
    relevant_log: str
    recommendation: str

def extract_json_objects(text):
    json_objects = []
    pattern = r'\{(?:[^{}]|(?R))*\}'

    # Attempt to match curly braces while correctly handling nested structures
    stack = []
    for i, char in enumerate(text):
        if char == '{':
            if not stack:
                start_index = i
            stack.append('{')
        elif char == '}' and stack:
            stack.pop()
            if not stack:
                # Extract the substring which is potentially a JSON object
                substring = text[start_index:i+1]
                try:
                    obj = json.loads(substring)
                    # Try to load the object into the Pydantic model
                    json_objects.append(AttackLog(**obj))
                except (json.JSONDecodeError, ValidationError) as e:
                    raise Exception(f"Error parsing or validating JSON: {e}")

    return json_objects

# Example usage
input_text = """
Some text comes from the LLM and then in between a json object
{"category": "Warning", "type": "Attempted Directory Traversal", "origin": "192.168.1.1 using Postman", "relevant_log": "192.168.1.1 - - [16/Apr/2024:09:25:06 +0000] 'GET /env HTTP/1.1' 404 22 '-' 'PostmanRuntime/7.26.10' '-'", "recommendation": "Implement strict access control and input validation."}
maybe some more text, followed by another json object
...
"""

try:
    json_objects = extract_json_objects(input_text)
    for obj in json_objects:
        print(json.dumps(obj.dict(), indent=4))
except Exception as e:
    print(e)

{
    "type": "Attempted Directory Traversal",
    "origin": "192.168.1.1 using Postman",
    "relevant_log": "192.168.1.1 - - [16/Apr/2024:09:25:06 +0000] 'GET /env HTTP/1.1' 404 22 '-' 'PostmanRuntime/7.26.10' '-'",
    "recommendation": "Implement strict access control and input validation."
}


In [8]:
from typing import List
from pydantic import BaseModel

# Assuming the enum is being used correctly elsewhere in your code
enum = ["Info", "Warn", "Critical"]

class ResponseItem(BaseModel):
    category: str
    type: str
    origin: str
    relevant_log: str
    recommendation: str

class ResponseItems(BaseModel):
    parsing_errors: int
    items: List[ResponseItem]

def format_to_markdown(response_items: ResponseItems) -> str:
    
    markdown_text = "*Anaylis:*\n"
    
    for item in response_items.items:
        markdown_text += f"**Category:** {item.category}  \n"
        markdown_text += f"**Type:** {item.type}  \n"
        markdown_text += f"**Origin:** {item.origin}  \n"
        markdown_text += f"**Relevant Log:** {item.relevant_log}  \n"
        markdown_text += f"**Recommendation:** {item.recommendation}\n\n"

    markdown_text += f"*Parsing Errors:* {response_items.parsing_errors}\n\n"
    return markdown_text

# Example usage
response_items = ResponseItems(
    parsing_errors=2,
    items=[
        ResponseItem(
            category="Info",
            type="Error",
            origin="System Check",
            relevant_log="Failed to access database",
            recommendation="Check database connection settings."
        ),
        ResponseItem(
            category="Critical",
            type="Failure",
            origin="User Module",
            relevant_log="User authentication failed",
            recommendation="Review authentication protocol."
        )
    ]
)

markdown_text = format_to_markdown(response_items)
print(markdown_text)


*Anaylis:*
**Category:** Info  
**Type:** Error  
**Origin:** System Check  
**Relevant Log:** Failed to access database  
**Recommendation:** Check database connection settings.

**Category:** Critical  
**Type:** Failure  
**Origin:** User Module  
**Relevant Log:** User authentication failed  
**Recommendation:** Review authentication protocol.

*Parsing Errors:* 2




In [9]:
def format_to_markdown(response_items: ResponseItems) -> str:
    # Emoji definitions for different categories
    emoji_map = {
        "Info": "✅",
        "Warn": "⚠️",
        "Critical": "🔥"
    }
    
    markdown_text = f"*Parsing Errors:* {response_items.parsing_errors}\n\n"
    markdown_text += "*Details:*\n"
    
    for item in response_items.items:
        # Assign emoji based on category
        emoji = emoji_map.get(item.category, "ℹ️")  # Default emoji if category is not found

        markdown_text += f"{emoji} **Category:** {item.category}  \n"
        markdown_text += f"**Type:** {item.type}  \n"
        markdown_text += f"**Origin:** {item.origin}  \n"
        markdown_text += f"**Relevant Log:** {item.relevant_log}  \n"
        markdown_text += f"**Recommendation:** {item.recommendation}\n\n"
    
    return markdown_text

# Example usage
response_items = ResponseItems(
    parsing_errors=2,
    items=[
        ResponseItem(
            category="Info",
            type="Error",
            origin="System Check",
            relevant_log="Failed to access database",
            recommendation="Check database connection settings."
        ),
        ResponseItem(
            category="Critical",
            type="Failure",
            origin="User Module",
            relevant_log="User authentication failed",
            recommendation="Review authentication protocol."
        )
    ]
)

markdown_text = format_to_markdown(response_items)
print(markdown_text)


*Parsing Errors:* 2

*Details:*
✅ **Category:** Info  
**Type:** Error  
**Origin:** System Check  
**Relevant Log:** Failed to access database  
**Recommendation:** Check database connection settings.

🔥 **Category:** Critical  
**Type:** Failure  
**Origin:** User Module  
**Relevant Log:** User authentication failed  
**Recommendation:** Review authentication protocol.


