In [1]:
from snortparser import Parser
import json

In [2]:
def strip_word_boundary(pcre: str, log: bool = True) -> str:
    """Replace `\\b` with empty string in a PCRE."""
    if "\\b" in pcre:
        new_pcre = pcre.replace("\\b", "")
        if log:
            print("Word boundary found '\\b', removing it.")
            print(f"OLD: {pcre}")
            print(f"NEW: {new_pcre}\n")
        return new_pcre 

    return pcre

def strip_negative_lookahead(pcre: str, log: bool = True) -> str:
    """Replace `(?!` with empty string in a PCRE."""
    if "(?!" in pcre:
        new_pcre = pcre.replace("?!", "") # Keep brackets as they form a grouping
        if log:
            print("Negative lookahead found '(?!', removing it.")
            print(f"OLD: {pcre}")
            print(f"NEW: {new_pcre}\n")
        return new_pcre 

    return pcre

def strip_positive_lookahead(pcre: str, log: bool = True) -> str:
    """Replace `(?=` with empty string in a PCRE."""
    if "(?=" in pcre:
        new_pcre = pcre.replace("?=", "")  # Keep brackets as they form a grouping
        if log:
            print("Positive lookahead found '(?=', removing it.")
            print(f"OLD: {pcre}")
            print(f"NEW: {new_pcre}\n")
        
        return new_pcre 

    return pcre

def strip_negative_lookbehind(pcre: str, log: bool = True) -> str:
    """Replace `(?<!` with empty string in a PCRE."""
    if "(?<!" in pcre:
        new_pcre = pcre.replace("?<!", "") # Keep brackets as they form a grouping
        if log:
            print("Negative lookbehind found '(?<!', removing it.")
            print(f"OLD: {pcre}")
            print(f"NEW: {new_pcre}\n")
        return new_pcre 

    return pcre

def strip_positive_lookbehind(pcre: str, log: bool = True) -> str:
    """Replace `(?<=` with empty string in a PCRE."""
    if "(?<=" in pcre:
        new_pcre = pcre.replace("?<=", "") # Keep brackets as they form a grouping
        if log:
            print("Positive lookbehind found '(?<=', removing it.")
            print(f"OLD: {pcre}")
            print(f"NEW: {new_pcre}\n")
        return new_pcre 

    return pcre

In [3]:
# Read snort file
filename = "snort3-community.rules"
with open(filename) as f:
    rules = f.readlines()

# Generate stats
num_rules = 0
pcre_rules = []
for rule in rules:
    try:
        parsed = Parser(rule) # parsing can fail
        num_rules += 1

        pcre = None # Perl Compatible Regular Expression string
        msg = None # message string
        sid = None # SNORT unique ID number

        # Get PCRE string
        for i in parsed.options:
            if parsed.options[i][0] == 'pcre':
                pcre = str(parsed.options[i][1])
                pcre = pcre.strip('"').strip('"') # Remove quotes
                
                # Remove word boundary
                pcre = strip_word_boundary(pcre)

                # Remove lookarounds and lookaheads
                pcre = strip_positive_lookahead(pcre)
                pcre = strip_negative_lookahead(pcre)
                pcre = strip_positive_lookbehind(pcre)
                pcre = strip_negative_lookbehind(pcre)
        
        # Skip rule if no PCRE string
        if pcre is None:
            continue

        # Get MSG string
        for i in parsed.options:    
            if parsed.options[i][0] == 'msg':
                msg = parsed.options[i][1][0]
                msg = msg.strip('"').strip('"') # Remove quotes
        
        # Get SID string
        for i in parsed.options:    
            if parsed.options[i][0] == 'sid':
                sid = int(parsed.options[i][1][0])
        
        # Add to list of PCRE rules
        pcre_rules.append({
            "SID": sid,
            "MSG": msg,
            "PCRE": pcre,
            "PCRE_LENGTH": len(pcre),
        })


    except Exception as e:
        print(f"ERROR (rule skipped): {e}")


Negative lookahead found '(?!', removing it.
OLD: /^CEL(?!\n)\s[^\n]{100}/ims
NEW: /^CEL(\n)\s[^\n]{100}/ims

Word boundary found '\b', removing it.
OLD: /^Command\s+?completed\b/ms
NEW: /^Command\s+?completed/ms

Negative lookahead found '(?!', removing it.
OLD: /^STAT(?!\n)\s[^\n]{190}/im
NEW: /^STAT(\n)\s[^\n]{190}/im

Positive lookahead found '(?=', removing it.
OLD: /^Content-Type\x3a\s*(?=[av])(video\/x\-ms\-(w[vm]x|asf)|a(udio\/x\-ms\-w(m[av]|ax)|pplication\/x\-ms\-wm[zd]))/ims
NEW: /^Content-Type\x3a\s*([av])(video\/x\-ms\-(w[vm]x|asf)|a(udio\/x\-ms\-w(m[av]|ax)|pplication\/x\-ms\-wm[zd]))/ims

Negative lookahead found '(?!', removing it.
OLD: /^SITE(?!\n)\s[^\n]{100}/ims
NEW: /^SITE(\n)\s[^\n]{100}/ims

Negative lookahead found '(?!', removing it.
OLD: /^CMD(?!\n)\s[^\n]{200}/ims
NEW: /^CMD(\n)\s[^\n]{200}/ims

Negative lookahead found '(?!', removing it.
OLD: /^SYST(?!\n)\s[^\n]{100}/ims
NEW: /^SYST(\n)\s[^\n]{100}/ims

Negative lookahead found '(?!', removing it.
OLD: /^USER

In [4]:
print(f"Snort file: '{filename}'")
print(f"Number of rules: {num_rules}")
print(f"Number of PCRE strings: {len(pcre_rules)}")
# print(f"Number of unique PCRE strings: {len(set(pcre_rules))}")
print(f"Percentage of PCRE rules out of all rules: {len(pcre_rules) / num_rules * 100:.2f}%")

Snort file: 'snort3-community.rules'
Number of rules: 4025
Number of PCRE strings: 1034
Percentage of PCRE rules out of all rules: 25.69%


In [5]:
print("INFO: Writing unique PCRE strings to file 'snort_pcre_strings.txt'")
with open("snort_pcre_rules.json", 'w') as f:
    json.dump(pcre_rules, f, indent=4)

INFO: Writing unique PCRE strings to file 'snort_pcre_strings.txt'
