### Parser
Parser functions used to parse requests along with their properties from the dataset

In [1]:

import re
import string
import urllib.parse as urlparse
from setuptools import Feature

HTTP_METHODS = {
    "GET": "GET(.|\n)+?(?=GET|POST|\Z)",
    "POST": "POST(.|\n)+?(?=GET|POST|\Z)"
}

URL_REGEX = "http.+?(?= )"
BODY_REGEX = "(?<=\n\n).+(?=\n\n)"



def parse(path, request_reg: string):
    with open(path) as file:
        data = file.read()

        requests = []
        [requests.append(request.group(0)) for request in re.finditer(request_reg, data, re.MULTILINE)]

        return requests

def parseParamsFromUrl(request):
    url = parseUrl(request)
    return urlparse.parse_qs(urlparse.urlparse(url).query)

def parseUrl(request):
    return re.search(URL_REGEX, request).group(0)

def parseParamsFromBody(request):
    params = {}
    body = re.search(BODY_REGEX, request)

    if body is not None:
        body = body.group(0)

        params = urlparse.parse_qs(body)

    return params


### Features
Calculator classes are used to extract feature from the given string. 
With respect to scalability, every feature has its own class that takes care of calculation. 

In [2]:
import string
from abc import ABCMeta, abstractmethod


class FeatureCalculator(metaclass=ABCMeta):
    def __init__(self):
        pass

    @abstractmethod
    def transform(self, s: string, request_type=False):
        pass


In [5]:
class LengthFeatureCalculator(FeatureCalculator):

    def __init__(self):
        super().__init__()

    def transform(self, s: string, request_type=False):
        return len(s)


In [None]:
class LettersFeatureCalculator(FeatureCalculator):

    def __init__(self):
        super().__init__()

    def transform(self, s: string, request_type=False):
        return sum(c.isalpha() for c in s)
    

In [None]:
class NonAlphaFeatureCalculator(FeatureCalculator):

    def __init__(self):
        super().__init__()

    def transform(self, s: string, request_type=False):
        return sum((not c.isalpha()) for c in s)
    

In [None]:
class PathLengthFeatureCalculator(FeatureCalculator):

    def __init__(self):
        super().__init__()

    def transform(self, s: string, request_type=False):

        if not request_type:
            return 0

        return len(parseUrl(s))
    

In [None]:
class PathNonAlphaFeatureCalculator(FeatureCalculator):

    def __init__(self):
        super().__init__()

    def transform(self, s: string, request_type=False):

        if not request_type:
            return 0

        return sum((not c.isalpha()) for c in parseUrl(s))



In [None]:
import collections
import math

class EntropyFeatureCalculator(FeatureCalculator):

    def __init__(self):
        super().__init__()

    def transform(self, s: string, request_type=False):
        return (-1) * sum(
            i / len(s) * math.log2(i / len(s))
            for i in collections.Counter(s).values())
    

In [None]:
class DigitsFeature(Feature):

    def __init__(self):
        super().__init__()

    def transform(self, s: string, request_type=False):
        return sum(c.isdigit() for c in s)

In [None]:
class ArgumentsLengthFeatureCalculator(FeatureCalculator):

    def __init__(self):
        super().__init__()

    def transform(self, s: string, request_type=False):

        if not request_type:
            return 0

        urlParams = parseParamsFromUrl(s)
        bodyParams = parseParamsFromBody(s)

        params = {**urlParams, **bodyParams}

        length = 0

        for param in params.items():
            length = length + len(param[0])

        return length
    

In [None]:
class ArgumentsNumberFeatureCalculator(FeatureCalculator):

    def __init__(self):
        super().__init__()

    def transform(self, s: string, request_type=False):

        if not request_type:
            return 0

        urlParams = parseParamsFromUrl(s)
        bodyParams = parseParamsFromBody(s)

        params = {**urlParams, **bodyParams}

        return len(params)
