In [17]:
import re
from datetime import datetime

LOG_RE = re.compile(r'''
    (?P<host>\S+)\s+                # host (or -)
    (?P<ident>\S+)\s+               # ident (or -)
    \[(?P<time>[^\]]+)\]\s+         # [timestamp]
    "(?P<method>\S+)\s+(?P<path>[^"]+?)\s+(?P<proto>[^"]+)"\s+  # "METHOD path PROTO"
    (?P<status>\d{3})\s+            # status
    (?P<size>\S+)\s+                # size in bytes or -
    "(?P<referer>[^"]*)"\s+         # "referer" (may be empty)
    (?P<token>\S+)\s+               # custom token (session id or cookie)
    "(?P<agent>[^"]+)"              # "user-agent"
''', re.VERBOSE)

def parse_apache_line(line):
    m = LOG_RE.search(line)
    if not m:
        raise ValueError("line did not match expected format")
    d = m.groupdict()
    # print(m)

    # parse time to datetime
    # example time format: 01/Nov/2019:16:32:09 +0000
    dt = datetime.strptime(d['time'], "%d/%b/%Y:%H:%M:%S %z")

    # normalize size
    size = None if d['size'] == '-' else int(d['size'])

    # Optional minimal UA parsing (heuristic)
    
    parsed = {
        'timestamp': dt,
        'method': d['method'],
        'path': d['path'],
        'protocol': d['proto'],
        'status': int(d['status']),
        'size': size,
        'referer': None if d['referer'] == '' else d['referer'],
        'token': d['token'],
    }
    return parsed

# Example usage:
line1 = '- - [01/Nov/2019:16:32:09 +0000] "POST /storage/store_sess_total_mousemv_db.php HTTP/1.1" 200 449 "https://160.40.52.164/content/big_data.php" htodnmm7tjpihgeuqk64c0gjes "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36 OPR/64.0.3417.61"'
line2 = '- - [30/Oct/2019:09:42:02 +0000] "GET / HTTP/1.0" 200 2045 "-" - "-"'
line3 = '- - [30/Oct/2019:11:50:36 +0000] "GET / HTTP/1.1" 200 2770 "-" - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36 OPR/64.0.3417.61"'
print(parse_apache_line(line1))


{'timestamp': datetime.datetime(2019, 11, 1, 16, 32, 9, tzinfo=datetime.timezone.utc), 'method': 'POST', 'path': '/storage/store_sess_total_mousemv_db.php', 'protocol': 'HTTP/1.1', 'status': 200, 'size': 449, 'referer': 'https://160.40.52.164/content/big_data.php', 'token': 'htodnmm7tjpihgeuqk64c0gjes'}


In [9]:
dic = {}


In [10]:
dic[1] = 1
dic[2] = 1


In [15]:
print(3 not in dic)

True


In [19]:
y = 1
x = 1 if y==0 else 0
print(x)

0


In [26]:
from numpy import std

x = [1,2]
print(std(x))

0.5


In [1]:
import pandas as pd

In [4]:
df = pd.read_csv("bots.csv")

In [5]:
df.head()

Unnamed: 0,session_id,Total_requests,Total_Bytes,Total_GET_requests,Total_POST_requests,Total_3xx_responses,Total_4xx_responses,per_image_requests,per_css_request,per_js_requests,Depth_SD,Max_requests_per_page,Average_requests_per_page,Max_sequential_request,per_sequential_requests,Session_time,Browsing_speed,SD_request_time
0,97hf7ciplt2k54f5j6109nekn0,157,70114,8,149,0.0,0.0,0.0,0.006369,0.031847,0.112145,143,15.7,157,1.0,277.0,0.036101,0.587992
1,46hm95bnvnuglhj1i1906nc80u,29,24199,16,13,0.0,0.0,0.0,0.137931,0.172414,0.405081,9,2.636364,29,1.0,11.0,1.0,0.665079
2,mtilohhtbsshka38svllisk0am,18,17012,9,9,0.0,0.0,0.0,0.055556,0.277778,0.31427,8,1.8,18,1.0,15.0,0.666667,0.957427
3,66vt430cgmgpus1k96japf46pf,326,200283,43,283,0.0,0.0,0.0,0.003067,0.015337,0.306518,264,16.3,326,1.0,512.0,0.039062,0.813757
4,igirlpfg3oft6i3dl8ah549gqi,319,222003,40,279,0.0,0.0,0.0,0.003135,0.015674,0.335409,262,13.869565,319,1.0,528.0,0.043561,0.799822


In [12]:
print("hii/hf".startswith("gi"))

False
