## Read and Parse the logs

In [1]:
with open('../data/auth.log') as f:
    data = f.readlines()

In [2]:
data[:10]

['Mar 27 13:06:56 ip-10-77-20-248 sshd[1291]: Server listening on 0.0.0.0 port 22.\n',
 'Mar 27 13:06:56 ip-10-77-20-248 sshd[1291]: Server listening on :: port 22.\n',
 'Mar 27 13:06:56 ip-10-77-20-248 systemd-logind[1118]: Watching system buttons on /dev/input/event0 (Power Button)\n',
 'Mar 27 13:06:56 ip-10-77-20-248 systemd-logind[1118]: Watching system buttons on /dev/input/event1 (Sleep Button)\n',
 'Mar 27 13:06:56 ip-10-77-20-248 systemd-logind[1118]: New seat seat0.\n',
 'Mar 27 13:08:09 ip-10-77-20-248 sshd[1361]: Accepted publickey for ubuntu from 85.245.107.41 port 54259 ssh2: RSA SHA256:Kl8kPGZrTiz7g4FO1hyqHdsSBBb5Fge6NWOobN03XJg\n',
 'Mar 27 13:08:09 ip-10-77-20-248 sshd[1361]: pam_unix(sshd:session): session opened for user ubuntu by (uid=0)\n',
 'Mar 27 13:08:09 ip-10-77-20-248 systemd: pam_unix(systemd-user:session): session opened for user ubuntu by (uid=0)\n',
 'Mar 27 13:08:09 ip-10-77-20-248 systemd-logind[1118]: New session 1 of user ubuntu.\n',
 'Mar 27 13:09:37

## Testing Pygrok

In [3]:
import pygrok

In [4]:
from pygrok import Grok
text = 'gary is male, 25 years old and weighs 68.5 kilograms'
pattern = '%{WORD:name} is %{WORD:gender}, %{NUMBER:age} years old and weighs %{NUMBER:weight} kilograms'
grok = Grok(pattern)
print(grok.match(text))

{'gender': 'male', 'age': '25', 'weight': '68.5', 'name': 'gary'}


### Following the instructions from here: https://www.elastic.co/blog/grokking-the-linux-authorization-logs

In [5]:
text = 'Feb 21 00:13:35 localhost sshd[7483]: Accepted password for vagrant from 192.168.33.1 port 58803 ssh2'
pattern = '%{SYSLOGTIMESTAMP:system.auth.timestamp} %{SYSLOGHOST:system.auth.hostname} sshd(?:\\[%{POSINT:system.auth.pid}\\])?: %{DATA:system.auth.ssh.event} %{DATA:system.auth.ssh.method} for (invalid user )?%{DATA:system.auth.user} from %{IPORHOST:system.auth.ip} port %{NUMBER:system.auth.port} ssh2(: %{GREEDYDATA:system.auth.ssh.signature})?'
grok = Grok(pattern)
print(grok.match(text))

None


In [6]:
text = 'Feb 21 21:56:12 localhost sshd[3430]: Invalid user test from 10.0.2.2'
pattern = '%{SYSLOGTIMESTAMP:system.auth.timestamp} %{SYSLOGHOST:system.auth.hostname} sshd(?:\\[%{POSINT:system.auth.pid}\\])?: %{DATA:system.auth.ssh.event} user %{DATA:system.auth.user} from %{IPORHOST:system.auth.ip}'
grok = Grok(pattern)
print(grok.match(text))

None


### The above method doesn't seem to work

## Logalyzer - https://github.com/hatRiot/logalyzer

In [7]:
import re
# import gzip

#
# ParseLogs.py
# Parsing component of Logalyzer.  Compiled in Python 2.6
#

# log object 
# Stuck into a dictionary by user:Log, where log houses
# logs, fails, successes, logged IPs, and commands used
class Log:
    # dump date of first log
    def first_date(self):
        if len(self.logs) > 0:
            date = None
            i = 0
            # sometimes the first few aren't right, so look
            # until we find one
            while i < len(self.logs) and date is None:
                date = ParseDate(self.logs[i])
                i += 1
            return date
    # dump date of last log
    def last_date(self):
        if len(self.logs) > 0:
            return ParseDate(self.logs[len(self.logs) - 1])
    def __init__(self, usr):
        self.usr = usr
        self.logs = []
        self.fail_logs = []
        self.succ_logs = []
        self.ips = []
        self.commands = []

# parse user from various lines
def ParseUsr(line):
    usr = None
    if "Accepted password" in line:
        usr = re.search(r'(\bfor\s)(\w+)', line)
    elif "sudo:" in line:
        usr = re.search(r'(sudo:\s+)(\w+)', line)
    elif "authentication failure" in line:
        usr = re.search(r'USER=\w+', line)
    elif "for invalid user" in line:
        usr = re.search(r'(\buser\s)(\w+)', line)
    if usr is not None:
        return usr.group(2)

# parse an IP from a line
def ParseIP(line):
    ip = re.search(r'(\bfrom\s)(\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b)', line)
    if ip is not None:
        return ip.group(2)

# parse a date from the line
def ParseDate(line):
    date = re.search(r'^[A-Za-z]{3}\s*[0-9]{1,2}\s[0-9]{1,2}:[0-9]{2}:[0-9]{2}', line)
    if date is not None:
        return date.group(0)

# parse a command from a line
def ParseCmd(line):
    # parse command to end of line 
    cmd = re.search(r'(\bCOMMAND=)(.+?$)', line)
    if cmd is not None:
        return cmd.group(2)

# begin parsing the passed LOG
def ParseLogs(log):
    # initialize the dictionary
    logs = {}

    # parse the log
#     f = None
#     try:
#         f = gzip.open(LOG, 'r') if '.gz' in LOG else open(LOG, 'r')
#         log = f.read()
#     except Exception as e:
#         print('[-] Error opening \'%s\': %s'%(LOG,e))
#         return None
#     finally:
#         if f is not None: f.close()

    for line in log.split('\n'):
        # match a login
        if "Accepted password for" in line:
            usr = ParseUsr(line)
            
            # add 'em if they don't exist
            if not usr in logs:
                logs[usr] = Log(usr)
            
            ip = ParseIP(line)
            # set info
            if not ip in logs[usr].ips:
                logs[usr].ips.append(ip)
            logs[usr].succ_logs.append(line.rstrip('\n'))
            logs[usr].logs.append(line.rstrip('\n'))

        # match a failed login
        elif "Failed password for" in line:
            # parse user
            usr = ParseUsr(line)

            if not usr in logs:
                logs[usr] = Log(usr)
                
            ip = ParseIP(line)

            if not ip in logs[usr].ips:
                logs[usr].ips.append(ip)
            logs[usr].fail_logs.append(line.rstrip('\n'))
            logs[usr].logs.append(line.rstrip('\n'))
            
        # match failed auth
        elif ":auth): authentication failure;" in line:
            # so there are three flavors of authfail we care about;
            # su, sudo, and ssh.  Lets parse each.
            usr = re.search(r'(\blogname=)(\w+)', line)
            if usr is not None:
                usr = usr.group(2)
            # parse a fail log to ssh
            if "(sshd:auth)" in line:
                # ssh doesn't have a logname hurr
                usr = ParseUsr(line)
                if not usr in logs:
                    logs[usr] = Log(usr)
                logs[usr].ips.append(ParseIP(line))
            # parse sudo/su fails
            else:    
                if not usr in logs:
                    logs[usr] = Log(usr)
            logs[usr].fail_logs.append(line.rstrip('\n'))
            logs[usr].logs.append(line.rstrip('\n'))
            # match commands
        elif "sudo:" in line:
            # parse user
            usr = ParseUsr(line)
            if not usr in logs:
                logs[usr] = Log(usr)
    
            cmd = ParseCmd(line)
            # append the command if it isn't there already
            if cmd is not None:
                if not cmd in logs[usr].commands:
                    logs[usr].commands.append(cmd)
            logs[usr].logs.append(line.rstrip('\n'))
    return logs

In [8]:
parsed_logs = ParseLogs(''.join(data))

In [9]:
parsed_logs.keys()

dict_keys(['elastic_user_5', 'ems', 'cubrid', 'pam_unix', 'pruebas', 'elastic_user_1', 'openerp', 'elastic_user_9', 'elastic_user_4', 'elastic_user_6', 'guest', 'elastic_user_2', 'cloud', 'test', 'elastic_user_7', None, 'user1', 'johnny', 'elastic_user_3', 'default', 'webconfig', 'ajay', 'elastic_user_0', 'support', 'admin', 'monitor', 'root', 'ubnt', 'ubuntu', 'elastic_user_8', 'pi'])

In [10]:
parsed_logs['root'].__dict__

{'commands': ['/usr/bin/apt-get install zip',
  '/usr/sbin/service sshd restart',
  '/usr/sbin/service filebeat stop',
  '/usr/sbin/service filebeat start'],
 'fail_logs': [],
 'ips': [],
 'logs': ['Mar 27 16:50:03 ip-10-77-20-248 sudo:     root : TTY=pts/0 ; PWD=/usr/share/filebeat/scripts ; USER=root ; COMMAND=/usr/bin/apt-get install zip',
  'Mar 29 11:44:38 ip-10-77-20-248 sudo:     root : TTY=pts/0 ; PWD=/home/ubuntu ; USER=root ; COMMAND=/usr/sbin/service sshd restart',
  'Mar 29 11:45:36 ip-10-77-20-248 sudo:     root : TTY=pts/0 ; PWD=/home/ubuntu ; USER=root ; COMMAND=/usr/sbin/service filebeat stop',
  'Mar 29 11:52:42 ip-10-77-20-248 sudo:     root : TTY=pts/0 ; PWD=/home/ubuntu ; USER=root ; COMMAND=/usr/sbin/service filebeat start',
  'Mar 29 11:53:36 ip-10-77-20-248 sudo:     root : TTY=pts/0 ; PWD=/home/ubuntu ; USER=root ; COMMAND=/usr/sbin/service filebeat stop'],
 'succ_logs': [],
 'usr': 'root'}