In [None]:
import pandas as pd
import re
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt

In [None]:
train_df = pd.read_csv('./data/train.csv', index_col=0)
train_df

Unnamed: 0_level_0,level,full_log
id,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0,"Sep 24 10:02:22 localhost kibana: {""type"":""err..."
1,0,Feb 8 16:21:00 localhost logstash: [2021-02-0...
2,0,"Jan 13 01:50:40 localhost kibana: {""type"":""err..."
3,0,"Jan 4 10:18:31 localhost kibana: {""type"":""err..."
4,1,type=SYSCALL msg=audit(1603094402.016:52981): ...
...,...,...
472967,0,Feb 28 10:10:06 localhost logstash: 7738 error:
472968,1,type=SYSCALL msg=audit(1611890993.458:321827):...
472969,0,"Oct 12 02:20:29 localhost kibana: {""type"":""log..."
472970,0,"Jan 15 09:02:43 localhost kibana: {""type"":""err..."


In [None]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 472972 entries, 0 to 472971
Data columns (total 2 columns):
 #   Column    Non-Null Count   Dtype 
---  ------    --------------   ----- 
 0   level     472972 non-null  int64 
 1   full_log  472972 non-null  object
dtypes: int64(1), object(1)
memory usage: 10.8+ MB


In [None]:
train_df['level'].value_counts()

0    334065
1    132517
3      4141
5      2219
2        12
4        10
6         8
Name: level, dtype: int64

## 첫번째 단어

In [None]:
train_df['full_log'].str.extract('^([^ ]+)(?: |$)')

Unnamed: 0_level_0,0
id,Unnamed: 1_level_1
0,Sep
1,Feb
2,Jan
3,Jan
4,type=SYSCALL
...,...
472967,Feb
472968,type=SYSCALL
472969,Oct
472970,Jan


In [None]:
train_df.head()

Unnamed: 0_level_0,level,full_log
id,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0,"Sep 24 10:02:22 localhost kibana: {""type"":""err..."
1,0,Feb 8 16:21:00 localhost logstash: [2021-02-0...
2,0,"Jan 13 01:50:40 localhost kibana: {""type"":""err..."
3,0,"Jan 4 10:18:31 localhost kibana: {""type"":""err..."
4,1,type=SYSCALL msg=audit(1603094402.016:52981): ...


In [None]:
train_df['first_word'] = train_df['full_log'].str.extract('^([^ ]+)(?: |$)')

In [None]:
train_df['first_word'].nunique()

26

In [None]:
train_df['first_word'].value_counts()

Jan              207025
type=SYSCALL     116496
Feb               35544
Dec               25882
Oct               23883
Sep               23193
Nov               21875
Mar               12174
level              2331
ossec:             1318
File                822
System              820
error:              459
type=AVC            369
oscap:              366
E:                  297
NTFS                 25
2021                 22
Trojaned             15
Windows              13
--MARK--:            12
The                  11
2020                  9
juniper               5
type=USER_AVC         3
OpenSCAP              3
Name: first_word, dtype: int64

In [None]:
train_df['first_word'].value_counts().index

Index(['Jan', 'type=SYSCALL', 'Feb', 'Dec', 'Oct', 'Sep', 'Nov', 'Mar',
       'level', 'ossec:', 'File', 'System', 'error:', 'type=AVC', 'oscap:',
       'E:', 'NTFS', '2021', 'Trojaned', 'Windows', '--MARK--:', 'The', '2020',
       'juniper', 'type=USER_AVC', 'OpenSCAP'],
      dtype='object')

In [None]:
pivot = train_df.pivot_table(values='full_log', index='first_word', columns='level', aggfunc='count', fill_value=0)
pivot = pivot.loc[train_df['first_word'].value_counts().index]
pivot

level,0,1,2,3,4,5,6
Jan,200574,3019,0,3414,1,14,3
type=SYSCALL,0,116496,0,0,0,0,0
Feb,33114,2381,0,44,0,1,4
Dec,24370,1498,0,14,0,0,0
Oct,20973,2904,0,6,0,0,0
Sep,22392,799,1,1,0,0,0
Nov,18980,2646,0,239,9,1,0
Mar,11368,804,0,1,0,0,1
level,1519,736,0,11,0,65,0
ossec:,0,29,0,0,0,1289,0


## 첫번째 단어 기준 그룹 생성

In [None]:
groups = train_df.groupby('first_word')

In [None]:
def show_all(first_word):
    for log in groups.get_group(first_word)['full_log']:
        print(log, '\n')

## level

In [None]:
level = train_df[train_df['first_word'] == 'level'].copy()
level

Unnamed: 0_level_0,level,full_log,first_word
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
562,0,"level : 5, log : No mode specified for interfa...",level
838,0,"level : 5, log : PIC pic-slot in FPC fpc-slot ...",level
1076,5,"level : 10, log : fru-name#fru-slot - command",level
1390,0,"level : 5, log : Error getting class usage sta...",level
1569,0,"level : 5, log : Fabric fc-fabric-name interfa...",level
...,...,...,...
471241,0,"level : 3, log : routing socket open error arg...",level
471457,0,"level : 10, log : Symmetric key generation fai...",level
471722,1,"level : 3, log : Invalid XML data '\\data'",level
471998,0,"level : 3, log : pathname: error-message",level


In [None]:
level['log_level'] = level['full_log'].str.extract('level : (\S+),')
level

Unnamed: 0_level_0,level,full_log,first_word,log_level
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
562,0,"level : 5, log : No mode specified for interfa...",level,5
838,0,"level : 5, log : PIC pic-slot in FPC fpc-slot ...",level,5
1076,5,"level : 10, log : fru-name#fru-slot - command",level,10
1390,0,"level : 5, log : Error getting class usage sta...",level,5
1569,0,"level : 5, log : Fabric fc-fabric-name interfa...",level,5
...,...,...,...,...
471241,0,"level : 3, log : routing socket open error arg...",level,3
471457,0,"level : 10, log : Symmetric key generation fai...",level,10
471722,1,"level : 3, log : Invalid XML data '\\data'",level,3
471998,0,"level : 3, log : pathname: error-message",level,3


In [None]:
level['log_level'].isnull().sum()

0

In [None]:
pivot_level = level.pivot_table(values='full_log', index='log_level', columns='level', aggfunc='count', fill_value=0)
pivot_level

level,0,1,3,5
log_level,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
%{rule.level},10,8,0,0
10,27,13,0,3
2,18,6,0,2
3,593,282,7,18
4,2,2,0,0
5,859,422,4,41
7,10,2,0,0
8,0,1,0,1


-> 더 확인

## 날짜로 시작하는 로그

날짜로 시작하는 로그
- Jan
- Feb
- Dec
- Oct
- Sep
- Nov
- Mar

In [None]:
MONTH = {'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'}

In [None]:
log_time = train_df[train_df['first_word'].isin(MONTH)].copy()
log_time

Unnamed: 0_level_0,level,full_log,first_word
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0,"Sep 24 10:02:22 localhost kibana: {""type"":""err...",Sep
1,0,Feb 8 16:21:00 localhost logstash: [2021-02-0...,Feb
2,0,"Jan 13 01:50:40 localhost kibana: {""type"":""err...",Jan
3,0,"Jan 4 10:18:31 localhost kibana: {""type"":""err...",Jan
6,0,"Jan 22 06:28:59 localhost kibana: {""type"":""err...",Jan
...,...,...,...
472966,0,Mar 5 20:33:06 localhost logstash: 14312 ...,Mar
472967,0,Feb 28 10:10:06 localhost logstash: 7738 error:,Feb
472969,0,"Oct 12 02:20:29 localhost kibana: {""type"":""log...",Oct
472970,0,"Jan 15 09:02:43 localhost kibana: {""type"":""err...",Jan


In [None]:
log_time[log_time['full_log'].map(lambda a: not 'localhost' in a)]

Unnamed: 0_level_0,level,full_log,first_word
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2806,0,Nov 19 08:55:07 m2datateksolaris SC Alert: [ID...,Nov
4841,3,Nov 17 14:29:00 sv260 sshd[6563]: Failed none ...,Nov
12837,3,Nov 18 14:02:45 sv260 sshd[8621]: Disconnectin...,Nov
12867,3,Nov 17 14:25:07 sv260 sshd[6481]: Disconnectin...,Nov
13723,3,Nov 17 17:34:26 sv260 sshd[8537]: Failed passw...,Nov
...,...,...,...
465161,0,Nov 19 09:55:06 m2datateksolaris SC Alert: [ID...,Nov
468207,3,Nov 17 14:25:36 sv260 sshd[6506]: Disconnectin...,Nov
468491,3,Nov 17 17:31:10 sv260 sshd[8480]: Disconnectin...,Nov
469314,3,Nov 17 17:30:17 sv260 sshd[8467]: Failed passw...,Nov


In [None]:
log_time_etc = log_time[log_time['full_log'].map(lambda a: not 'localhost' in a)].copy()

### localhost

In [None]:
log_time['localhost'] = log_time['full_log'].str.extract('.+localhost (\S+): ')
log_time

Unnamed: 0_level_0,level,full_log,first_word,localhost
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0,"Sep 24 10:02:22 localhost kibana: {""type"":""err...",Sep,kibana
1,0,Feb 8 16:21:00 localhost logstash: [2021-02-0...,Feb,logstash
2,0,"Jan 13 01:50:40 localhost kibana: {""type"":""err...",Jan,kibana
3,0,"Jan 4 10:18:31 localhost kibana: {""type"":""err...",Jan,kibana
6,0,"Jan 22 06:28:59 localhost kibana: {""type"":""err...",Jan,kibana
...,...,...,...,...
472966,0,Mar 5 20:33:06 localhost logstash: 14312 ...,Mar,logstash
472967,0,Feb 28 10:10:06 localhost logstash: 7738 error:,Feb,logstash
472969,0,"Oct 12 02:20:29 localhost kibana: {""type"":""log...",Oct,kibana
472970,0,"Jan 15 09:02:43 localhost kibana: {""type"":""err...",Jan,kibana


In [None]:
log_time['localhost'].nunique()

1255

In [None]:
log_time['localhost'].value_counts()

kibana                170237
logstash              138415
sudo                   13813
suricata[1447]          5553
suricata[1454]          5076
                       ...  
sshd[27922]                1
unix_chkpwd[26274]         1
sshd[6824]                 1
sshd[26255]                1
sshd[26919]                1
Name: localhost, Length: 1255, dtype: int64

In [None]:
log_time['localhost'].value_counts()[:30]

kibana               170237
logstash             138415
sudo                  13813
suricata[1447]         5553
suricata[1454]         5076
esild-ml-start.sh      4383
suricata[1444]         4125
systemd                2210
suricata[1457]         1537
suricata[1437]          595
auditd[808]             497
suricata[1442]          480
suricata[1434]          398
kernel                  173
suricata[1445]          134
journal                  92
suricata[1450]           18
suricata                 14
yum[26878]               11
auditd[840]               9
augenrules                8
elasticsearch             7
suricata[1441]            5
suricata[1456]            5
auditd[832]               4
sshd[735]                 4
sshd[19037]               4
sshd[13005]               3
sshd[27462]               3
sshd[27404]               3
Name: localhost, dtype: int64

### 포트 무시

In [None]:
log_time['localhost_2'] = log_time['full_log'].str.extract('localhost ([^ \[]+)(?::|\[)')
log_time

Unnamed: 0_level_0,level,full_log,first_word,localhost,localhost_2
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,0,"Sep 24 10:02:22 localhost kibana: {""type"":""err...",Sep,kibana,kibana
1,0,Feb 8 16:21:00 localhost logstash: [2021-02-0...,Feb,logstash,logstash
2,0,"Jan 13 01:50:40 localhost kibana: {""type"":""err...",Jan,kibana,kibana
3,0,"Jan 4 10:18:31 localhost kibana: {""type"":""err...",Jan,kibana,kibana
6,0,"Jan 22 06:28:59 localhost kibana: {""type"":""err...",Jan,kibana,kibana
...,...,...,...,...,...
472966,0,Mar 5 20:33:06 localhost logstash: 14312 ...,Mar,logstash,logstash
472967,0,Feb 28 10:10:06 localhost logstash: 7738 error:,Feb,logstash,logstash
472969,0,"Oct 12 02:20:29 localhost kibana: {""type"":""log...",Oct,kibana,kibana
472970,0,"Jan 15 09:02:43 localhost kibana: {""type"":""err...",Jan,kibana,kibana


In [None]:
log_time['localhost_2'].nunique()

26

In [None]:
log_time['localhost_2'].value_counts()

kibana               170219
logstash             138436
suricata              17949
sudo                  13813
esild-ml-start.sh      4383
systemd                2210
sshd                   1376
auditd                  510
kernel                  173
unix_chkpwd             122
journal                  92
yum                      16
augenrules                8
elasticsearch             7
postfix/master            3
su                        3
rc.local                  2
bluetoothd                2
mcelog                    2
polkitd                   2
pulseaudio                1
useradd                   1
dbus-daemon               1
gnome-session             1
dbus                      1
kdumpctl                  1
Name: localhost_2, dtype: int64

In [None]:
log_time.pivot_table(values='full_log', index='localhost_2', columns=['level'], aggfunc='count', fill_value=0)

level,0,1,2,3,4,5,6
localhost_2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
auditd,510,0,0,0,0,0,0
augenrules,8,0,0,0,0,0,0
bluetoothd,2,0,0,0,0,0,0
dbus,1,0,0,0,0,0,0
dbus-daemon,1,0,0,0,0,0,0
elasticsearch,7,0,0,0,0,0,0
esild-ml-start.sh,4383,0,0,0,0,0,0
gnome-session,1,0,0,0,0,0,0
journal,92,0,0,0,0,0,0
kdumpctl,1,0,0,0,0,0,0


-> 조금 더 확인 필요

### localhost가 아닌 경우

In [None]:
log_time_etc['second'] = log_time_etc['full_log'].str.extract('\d{2}:\d{2}:\d{2} (\S+) ')
log_time_etc

Unnamed: 0_level_0,level,full_log,first_word,second
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2806,0,Nov 19 08:55:07 m2datateksolaris SC Alert: [ID...,Nov,m2datateksolaris
4841,3,Nov 17 14:29:00 sv260 sshd[6563]: Failed none ...,Nov,sv260
12837,3,Nov 18 14:02:45 sv260 sshd[8621]: Disconnectin...,Nov,sv260
12867,3,Nov 17 14:25:07 sv260 sshd[6481]: Disconnectin...,Nov,sv260
13723,3,Nov 17 17:34:26 sv260 sshd[8537]: Failed passw...,Nov,sv260
...,...,...,...,...
465161,0,Nov 19 09:55:06 m2datateksolaris SC Alert: [ID...,Nov,m2datateksolaris
468207,3,Nov 17 14:25:36 sv260 sshd[6506]: Disconnectin...,Nov,sv260
468491,3,Nov 17 17:31:10 sv260 sshd[8480]: Disconnectin...,Nov,sv260
469314,3,Nov 17 17:30:17 sv260 sshd[8467]: Failed passw...,Nov,sv260


In [None]:
log_time_etc['second'].nunique()

2

In [None]:
log_time_etc['second'].value_counts()

sv260               238
m2datateksolaris      4
Name: second, dtype: int64

### m2datateksolaris

In [None]:
log_time_etc[log_time_etc['second']=='m2datateksolaris']

Unnamed: 0_level_0,level,full_log,first_word,second
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2806,0,Nov 19 08:55:07 m2datateksolaris SC Alert: [ID...,Nov,m2datateksolaris
157873,0,Nov 19 08:55:06 m2datateksolaris SC Alert: [ID...,Nov,m2datateksolaris
441122,0,Nov 19 08:55:09 m2datateksolaris SC Alert: [ID...,Nov,m2datateksolaris
465161,0,Nov 19 09:55:06 m2datateksolaris SC Alert: [ID...,Nov,m2datateksolaris


In [None]:
for log in log_time_etc[log_time_etc['second']=='m2datateksolaris']['full_log']:
    print(log, '\n')

Nov 19 08:55:07 m2datateksolaris SC Alert: [ID 556868 daemon.error] SC unretrieved msg: [Chassis | major: Hot removal of HDD2] 

Nov 19 08:55:06 m2datateksolaris SC Alert: [ID 677427 daemon.error] SC unretrieved msg: [Chassis | major: Hot removal of HDD3] 

Nov 19 08:55:09 m2datateksolaris SC Alert: [ID 217180 daemon.error] SC unretrieved msg: [Chassis | major: Host is running] 

Nov 19 09:55:06 m2datateksolaris SC Alert: [ID 887218 daemon.error] Chassis | major: System shutdown has been requested via power button. 



### sv260

In [None]:
log_time_etc['sv260'] = log_time_etc['full_log'].str.extract('sv260 ([^ \[]+)(?::|\[)')
log_time_etc

Unnamed: 0_level_0,level,full_log,first_word,second,sv260
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2806,0,Nov 19 08:55:07 m2datateksolaris SC Alert: [ID...,Nov,m2datateksolaris,
4841,3,Nov 17 14:29:00 sv260 sshd[6563]: Failed none ...,Nov,sv260,sshd
12837,3,Nov 18 14:02:45 sv260 sshd[8621]: Disconnectin...,Nov,sv260,sshd
12867,3,Nov 17 14:25:07 sv260 sshd[6481]: Disconnectin...,Nov,sv260,sshd
13723,3,Nov 17 17:34:26 sv260 sshd[8537]: Failed passw...,Nov,sv260,sshd
...,...,...,...,...,...
465161,0,Nov 19 09:55:06 m2datateksolaris SC Alert: [ID...,Nov,m2datateksolaris,
468207,3,Nov 17 14:25:36 sv260 sshd[6506]: Disconnectin...,Nov,sv260,sshd
468491,3,Nov 17 17:31:10 sv260 sshd[8480]: Disconnectin...,Nov,sv260,sshd
469314,3,Nov 17 17:30:17 sv260 sshd[8467]: Failed passw...,Nov,sv260,sshd


In [None]:
log_time_etc['sv260'].value_counts()

sshd       237
telnetd      1
Name: sv260, dtype: int64

In [None]:
log_time_etc[log_time_etc['sv260']=='telnetd']

Unnamed: 0_level_0,level,full_log,first_word,second,sv260
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
188247,0,Nov 21 12:23:09 sv260 telnetd[20641]: getpid: ...,Nov,sv260,telnetd


In [None]:
log_sv260 = log_time_etc[log_time_etc['second']=='sv260'].copy()
log_sv260['level'].value_counts()

3    229
4      5
0      2
1      2
Name: level, dtype: int64

In [None]:
for level in (0, 1, 4):
    print(f'level = {level}', '\n')
    for log in log_sv260[log_sv260['level']==level]['full_log']:
        print(log, '\n')

level = 0 

Nov 21 12:23:09 sv260 telnetd[20641]: getpid: peer died: Error 0 

Nov 17 09:42:12 sv260 sshd[2684]: error: setsockopt SO_KEEPALIVE: Invalid argument 

level = 1 

Nov 18 13:08:45 sv260 sshd[6969]: Accepted password for root from 61.41.101.142 port 59553 ssh2 

Nov 16 11:26:48 sv260 sshd[19238]: Accepted password for root from 61.41.101.142 port 53017 ssh2 

level = 4 

Nov 29 22:16:32 sv260 sshd[39585]: Did not receive identification string from 211.253.243.66 

Nov 21 01:02:56 sv260 sshd[9196]: Did not receive identification string from 211.253.243.66 

Nov 27 08:32:12 sv260 sshd[3110]: Did not receive identification string from 192.168.0.195 

Nov 19 01:03:09 sv260 sshd[13841]: Did not receive identification string from 192.168.0.195 

Nov 8 20:25:42 sv260 sshd[37597]: Did not receive identification string from 192.168.0.181 



In [None]:
log_sv260[log_sv260['level']==3]

Unnamed: 0_level_0,level,full_log,first_word,second,sv260
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4841,3,Nov 17 14:29:00 sv260 sshd[6563]: Failed none ...,Nov,sv260,sshd
12837,3,Nov 18 14:02:45 sv260 sshd[8621]: Disconnectin...,Nov,sv260,sshd
12867,3,Nov 17 14:25:07 sv260 sshd[6481]: Disconnectin...,Nov,sv260,sshd
13723,3,Nov 17 17:34:26 sv260 sshd[8537]: Failed passw...,Nov,sv260,sshd
23420,3,Nov 17 17:39:20 sv260 sshd[8713]: Failed passw...,Nov,sv260,sshd
...,...,...,...,...,...
463132,3,Nov 17 17:28:45 sv260 sshd[8440]: Failed passw...,Nov,sv260,sshd
468207,3,Nov 17 14:25:36 sv260 sshd[6506]: Disconnectin...,Nov,sv260,sshd
468491,3,Nov 17 17:31:10 sv260 sshd[8480]: Disconnectin...,Nov,sv260,sshd
469314,3,Nov 17 17:30:17 sv260 sshd[8467]: Failed passw...,Nov,sv260,sshd


In [None]:
log_sv260['log'] = log_sv260['full_log'].str.extract('\]: (.+)')
log_sv260

Unnamed: 0_level_0,level,full_log,first_word,second,sv260,log
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
4841,3,Nov 17 14:29:00 sv260 sshd[6563]: Failed none ...,Nov,sv260,sshd,Failed none for invalid user test from 61.41.1...
12837,3,Nov 18 14:02:45 sv260 sshd[8621]: Disconnectin...,Nov,sv260,sshd,Disconnecting: Too many authentication failure...
12867,3,Nov 17 14:25:07 sv260 sshd[6481]: Disconnectin...,Nov,sv260,sshd,Disconnecting: Too many authentication failure...
13723,3,Nov 17 17:34:26 sv260 sshd[8537]: Failed passw...,Nov,sv260,sshd,Failed password for root from 61.41.101.142 po...
23420,3,Nov 17 17:39:20 sv260 sshd[8713]: Failed passw...,Nov,sv260,sshd,Failed password for root from 61.41.101.142 po...
...,...,...,...,...,...,...
463132,3,Nov 17 17:28:45 sv260 sshd[8440]: Failed passw...,Nov,sv260,sshd,Failed password for root from 61.41.101.142 po...
468207,3,Nov 17 14:25:36 sv260 sshd[6506]: Disconnectin...,Nov,sv260,sshd,Disconnecting: Too many authentication failure...
468491,3,Nov 17 17:31:10 sv260 sshd[8480]: Disconnectin...,Nov,sv260,sshd,Disconnecting: Too many authentication failure...
469314,3,Nov 17 17:30:17 sv260 sshd[8467]: Failed passw...,Nov,sv260,sshd,Failed password for root from 61.41.101.142 po...


In [None]:
log_sv260['log'].value_counts()

Disconnecting: Too many authentication failures for root [preauth]          64
Invalid user test from 61.41.101.142                                        14
Disconnecting: Too many authentication failures for test [preauth]          11
Failed password for root from 61.41.101.142 port 41298 ssh2                  2
Failed password for root from 61.41.101.142 port 60063 ssh2                  2
                                                                            ..
Failed password for invalid user test from 61.41.101.142 port 52388 ssh2     1
Failed password for root from 61.41.101.142 port 59995 ssh2                  1
Failed password for root from 61.41.101.142 port 41210 ssh2                  1
Failed password for invalid user test from 61.41.101.142 port 52416 ssh2     1
Failed password for root from 61.41.101.142 port 60096 ssh2                  1
Name: log, Length: 135, dtype: int64

In [None]:
log_sv260['log'] = log_sv260['full_log'].str.extract('\]: (.+?)(?:port|$)')
log_sv260['log'].value_counts()

Failed password for root from 61.41.101.142                           104
Disconnecting: Too many authentication failures for root [preauth]     64
Failed password for invalid user test from 61.41.101.142               25
Invalid user test from 61.41.101.142                                   14
Disconnecting: Too many authentication failures for test [preauth]     11
Failed none for invalid user test from 61.41.101.142                   11
Did not receive identification string from 192.168.0.195                2
Accepted password for root from 61.41.101.142                           2
Did not receive identification string from 211.253.243.66               2
Did not receive identification string from 192.168.0.181                1
getpid: peer died: Error 0                                              1
error: setsockopt SO_KEEPALIVE: Invalid argument                        1
Name: log, dtype: int64

In [None]:
log_sv260.pivot_table(values='full_log', index='log', columns='level', aggfunc='count', fill_value=0)

level,0,1,3,4
log,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Accepted password for root from 61.41.101.142,0,2,0,0
Did not receive identification string from 192.168.0.181,0,0,0,1
Did not receive identification string from 192.168.0.195,0,0,0,2
Did not receive identification string from 211.253.243.66,0,0,0,2
Disconnecting: Too many authentication failures for root [preauth],0,0,64,0
Disconnecting: Too many authentication failures for test [preauth],0,0,11,0
Failed none for invalid user test from 61.41.101.142,0,0,11,0
Failed password for invalid user test from 61.41.101.142,0,0,25,0
Failed password for root from 61.41.101.142,0,0,104,0
Invalid user test from 61.41.101.142,0,0,14,0


## 연도

In [None]:
groups.get_group('2020')

Unnamed: 0_level_0,level,full_log,first_word
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
63675,3,2020 Oct 30 08:39:31 WinEvtLog: System: ERROR(...,2020
226155,3,2020 Oct 28 13:32:06 WinEvtLog: System: ERROR(...,2020
246839,3,2020 Oct 30 09:55:55 WinEvtLog: System: ERROR(...,2020
275279,3,2020 Oct 20 09:58:04 WinEvtLog: System: ERROR(...,2020
279151,3,2020 Oct 19 10:32:47 WinEvtLog: Application: E...,2020
332210,3,2020 Oct 21 17:59:46 WinEvtLog: System: ERROR(...,2020
361699,3,2020 Oct 23 08:22:32 WinEvtLog: System: ERROR(...,2020
393148,3,2020 Oct 22 09:59:04 WinEvtLog: System: ERROR(...,2020
454020,3,2020 Oct 23 08:23:30 WinEvtLog: System: ERROR(...,2020


In [None]:
groups.get_group('2021')

Unnamed: 0_level_0,level,full_log,first_word
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
7983,3,2021 Feb 04 17:33:57 WinEvtLog: Application: E...,2021
35553,3,2021 Feb 08 10:08:16 WinEvtLog: Application: E...,2021
56540,3,2021 Feb 24 16:21:07 WinEvtLog: Application: I...,2021
65234,3,2021 Feb 03 17:45:23 WinEvtLog: Application: E...,2021
126924,3,2021 Feb 04 15:03:53 WinEvtLog: Application: E...,2021
138261,3,2021 Feb 02 12:58:37 WinEvtLog: Application: I...,2021
143074,3,2021 Feb 03 15:15:16 WinEvtLog: Application: E...,2021
147305,3,2021 Feb 05 10:33:56 WinEvtLog: Application: E...,2021
196254,3,2021 Feb 04 13:03:52 WinEvtLog: Application: E...,2021
211913,3,2021 Feb 24 16:15:36 WinEvtLog: Application: I...,2021


In [None]:
show_all('2020')

2020 Oct 30 08:39:31 WinEvtLog: System: ERROR(10010): DCOM: Laptop: DESKTOP-UDJIJER: DESKTOP-UDJIJER: Microsoft.SkypeApp_15.65.78.0_x86__kzf8qxf38zg5c!App.AppXtwmqn4em5r5dpafgj4t4yyxgjfe0hr50.mca  

2020 Oct 28 13:32:06 WinEvtLog: System: ERROR(10010): DCOM: Laptop: DESKTOP-UDJIJER: DESKTOP-UDJIJER: Microsoft.SkypeApp_15.65.78.0_x86__kzf8qxf38zg5c!App.AppXtwmqn4em5r5dpafgj4t4yyxgjfe0hr50.mca  

2020 Oct 30 09:55:55 WinEvtLog: System: ERROR(10010): DCOM: Laptop: DESKTOP-UDJIJER: DESKTOP-UDJIJER: Microsoft.SkypeApp_15.65.78.0_x86__kzf8qxf38zg5c!App.AppXtwmqn4em5r5dpafgj4t4yyxgjfe0hr50.mca  

2020 Oct 20 09:58:04 WinEvtLog: System: ERROR(10010): DCOM: Laptop: DESKTOP-UDJIJER: DESKTOP-UDJIJER: Microsoft.SkypeApp_15.64.80.0_x86__kzf8qxf38zg5c!App.AppXtwmqn4em5r5dpafgj4t4yyxgjfe0hr50.mca  

2020 Oct 19 10:32:47 WinEvtLog: Application: ERROR(256): DPTF: (no user): no domain: DESKTOP-M1KNS65: Intel(R) Dynamic Platform and Thermal Framework : ESIF(8.3.10207.5567) TYPE: ERROR MODULE: DPTF TIME 7

In [None]:
show_all('2021')

2021 Feb 04 17:33:57 WinEvtLog: Application: ERROR(1024): MsiInstaller: SYSTEM: NT AUTHORITY: DESKTOP-0011M0V: Dell SupportAssist Dell SupportAssist 1603 (NULL) (NULL) (NULL) ?  

2021 Feb 08 10:08:16 WinEvtLog: Application: ERROR(1024): MsiInstaller: SYSTEM: NT AUTHORITY: DESKTOP-0011M0V: Dell SupportAssist Dell SupportAssist 1603 (NULL) (NULL) (NULL) ?  

2021 Feb 24 16:21:07 WinEvtLog: Application: INFORMATION(11707): MsiInstaller: DATATEK: DESKTOP-QQU2HFQ: DESKTOP-QQU2HFQ: Product: Microsoft Visual C++ 2010  x64 Redistributable - 10.0.30319 -- Installation completed successfully. (NULL) (NULL) (NULL) (NULL) (NULL) ?  

2021 Feb 03 17:45:23 WinEvtLog: Application: ERROR(1024): MsiInstaller: SYSTEM: NT AUTHORITY: DESKTOP-0011M0V: Dell SupportAssist Dell SupportAssist 1603 (NULL) (NULL) (NULL) ?  

2021 Feb 04 15:03:53 WinEvtLog: Application: ERROR(1024): MsiInstaller: SYSTEM: NT AUTHORITY: DESKTOP-0011M0V: Dell SupportAssist Dell SupportAssist 1603 (NULL) (NULL) (NULL) ?  

2021 Feb 

## OpenSCAP

In [None]:
groups.get_group('OpenSCAP')

Unnamed: 0_level_0,level,full_log,first_word
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
145945,0,OpenSCAP Error: Probe with PID=21944 has been ...,OpenSCAP
271689,0,OpenSCAP Error: Probe with PID=27844 has been ...,OpenSCAP
462254,0,OpenSCAP Error: Probe with PID=7984 has been k...,OpenSCAP


In [None]:
show_all('OpenSCAP')

OpenSCAP Error: Probe with PID=21944 has been killed with signal 9 [sch_pipe.c:178] 

OpenSCAP Error: Probe with PID=27844 has been killed with signal 9 [sch_pipe.c:178] 

OpenSCAP Error: Probe with PID=7984 has been killed with signal 9 [sch_pipe.c:178] 



## juniper

In [None]:
groups.get_group('juniper')

Unnamed: 0_level_0,level,full_log,first_word
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
5144,0,juniper,juniper
108472,0,juniper,juniper
170195,3,juniper,juniper
207811,0,juniper,juniper
470486,0,juniper,juniper


## The

In [None]:
groups.get_group('The')

Unnamed: 0_level_0,level,full_log,first_word
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
39057,2,The average number of logs between 10:00 and 1...,The
87600,2,The average number of logs between 5:00 and 6:...,The
109510,2,The average number of logs between 6:00 and 7:...,The
262892,2,The average number of logs between 21:00 and 2...,The
294072,2,The average number of logs between 5:00 and 6:...,The
294127,2,The average number of logs between 4:00 and 5:...,The
343011,2,The average number of logs between 14:00 and 1...,The
345903,2,The average number of logs between 15:00 and 1...,The
376681,2,The average number of logs between 2:00 and 3:...,The
380583,2,The average number of logs between 10:00 and 1...,The


In [None]:
show_all('The')

The average number of logs between 10:00 and 11:00 is 5399. We reached 37090. 

The average number of logs between 5:00 and 6:00 is 222. We reached 77946. 

The average number of logs between 6:00 and 7:00 is 6420. We reached 28494. 

The average number of logs between 21:00 and 22:00 is 5325. We reached 95. 

The average number of logs between 5:00 and 6:00 is 8362. We reached 93476. 

The average number of logs between 4:00 and 5:00 is 2806. We reached 11476. 

The average number of logs between 14:00 and 15:00 is 9734. We reached 26958. 

The average number of logs between 15:00 and 16:00 is 4411. We reached 9487. 

The average number of logs between 2:00 and 3:00 is 9428. We reached 75572. 

The average number of logs between 10:00 and 11:00 is 12612. We reached 31532. 

The average number of logs between 13:00 and 14:00 is 2697. We reached 93028. 



- 시간 -> `<TIME>`
- 숫자 -> `<NUM>`

## Windows

In [None]:
groups.get_group('Windows')

Unnamed: 0_level_0,level,full_log,first_word
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4134,1,Windows Audit: Null sessions allowed {PCI_DSS:...,Windows
122200,1,Windows Audit: Null sessions allowed {PCI_DSS:...,Windows
145433,1,Windows Audit: Null sessions allowed {PCI_DSS:...,Windows
150488,1,Windows Audit: Null sessions allowed {PCI_DSS:...,Windows
236515,1,Windows Audit: Winpcap packet filter driver fo...,Windows
282994,1,Windows Audit: Null sessions allowed {PCI_DSS:...,Windows
296413,1,Windows Audit: Null sessions allowed {PCI_DSS:...,Windows
318658,1,Windows Audit: Null sessions allowed {PCI_DSS:...,Windows
363534,1,Windows Audit: Null sessions allowed {PCI_DSS:...,Windows
391901,1,Windows Audit: Null sessions allowed {PCI_DSS:...,Windows


In [None]:
show_all('Windows')

Windows Audit: Null sessions allowed {PCI_DSS: 11.4}. 

Windows Audit: Null sessions allowed {PCI_DSS: 11.4}. 

Windows Audit: Null sessions allowed {PCI_DSS: 11.4}. 

Windows Audit: Null sessions allowed {PCI_DSS: 11.4}. 

Windows Audit: Winpcap packet filter driver found {PCI_DSS: 10.6.1}. File: C:\WINDOWS\Sysnative\drivers\npf.sys. 

Windows Audit: Null sessions allowed {PCI_DSS: 11.4}. 

Windows Audit: Null sessions allowed {PCI_DSS: 11.4}. 

Windows Audit: Null sessions allowed {PCI_DSS: 11.4}. 

Windows Audit: Null sessions allowed {PCI_DSS: 11.4}. 

Windows Audit: Null sessions allowed {PCI_DSS: 11.4}. 

Windows Audit: Null sessions allowed {PCI_DSS: 11.4}. 

Windows Audit: Null sessions allowed {PCI_DSS: 11.4}. 

Windows Audit: Null sessions allowed {PCI_DSS: 11.4}. 



## Trojaned

In [None]:
groups.get_group('Trojaned')

Unnamed: 0_level_0,level,full_log,first_word
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4728,5,Trojaned version of file '/bin/netstat' detect...,Trojaned
58077,5,Trojaned version of file '/usr/bin/netstat' de...,Trojaned
227579,5,Trojaned version of file '/usr/sbin/netstat' d...,Trojaned
234605,5,Trojaned version of file '/bin/netstat' detect...,Trojaned
269504,5,Trojaned version of file '/usr/bin/netstat' de...,Trojaned
305705,5,Trojaned version of file '/usr/sbin/netstat' d...,Trojaned
322077,5,Trojaned version of file '/usr/sbin/netstat' d...,Trojaned
359733,5,Trojaned version of file '/bin/netstat' detect...,Trojaned
367519,5,Trojaned version of file '/usr/bin/netstat' de...,Trojaned
383455,5,Trojaned version of file '/usr/sbin/ifconfig' ...,Trojaned


In [None]:
show_all('Trojaned')

Trojaned version of file '/bin/netstat' detected. Signature used: 'bash|^/bin/sh|/dev/[^aik]|/prof|grep|addr\.h' (Generic). 

Trojaned version of file '/usr/bin/netstat' detected. Signature used: 'bash|^/bin/sh|/dev/[^aik]|/prof|grep|addr\.h' (Generic). 

Trojaned version of file '/usr/sbin/netstat' detected. Signature used: 'bash|^/bin/sh|/dev/[^aik]|/prof|grep|addr\.h' (Generic). 

Trojaned version of file '/bin/netstat' detected. Signature used: 'bash|^/bin/sh|/dev/[^aik]|/prof|grep|addr\.h' (Generic). 

Trojaned version of file '/usr/bin/netstat' detected. Signature used: 'bash|^/bin/sh|/dev/[^aik]|/prof|grep|addr\.h' (Generic). 

Trojaned version of file '/usr/sbin/netstat' detected. Signature used: 'bash|^/bin/sh|/dev/[^aik]|/prof|grep|addr\.h' (Generic). 

Trojaned version of file '/usr/sbin/netstat' detected. Signature used: 'bash|^/bin/sh|/dev/[^aik]|/prof|grep|addr\.h' (Generic). 

Trojaned version of file '/bin/netstat' detected. Signature used: 'bash|^/bin/sh|/dev/[^aik]|/p

## NTFS

In [None]:
groups.get_group('NTFS')

Unnamed: 0_level_0,level,full_log,first_word
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
26041,5,NTFS Alternate data stream found: 'C:\Program ...,NTFS
34838,5,NTFS Alternate data stream found: 'C:\Program ...,NTFS
49230,5,NTFS Alternate data stream found: 'C:\Program ...,NTFS
67688,5,NTFS Alternate data stream found: 'C:\Program ...,NTFS
70121,5,NTFS Alternate data stream found: 'C:\Program ...,NTFS
92820,5,NTFS Alternate data stream found: 'C:\Program ...,NTFS
96475,5,NTFS Alternate data stream found: 'C:\Program ...,NTFS
180259,5,NTFS Alternate data stream found: 'C:\Program ...,NTFS
189843,5,NTFS Alternate data stream found: 'C:\Program ...,NTFS
202893,5,NTFS Alternate data stream found: 'C:\Program ...,NTFS


In [None]:
show_all('NTFS')

NTFS Alternate data stream found: 'C:\Program Files/Sublime Text 3:Win32App_1'. Possible hidden content. 

NTFS Alternate data stream found: 'C:\Program Files/rempl:Win32App_1'. Possible hidden content. 

NTFS Alternate data stream found: 'C:\Program Files/Sublime Text 3:Win32App_1'. Possible hidden content. 

NTFS Alternate data stream found: 'C:\Program Files/Sublime Text 3:Win32App_1'. Possible hidden content. 

NTFS Alternate data stream found: 'C:\Program Files/rempl:Win32App_1'. Possible hidden content. 

NTFS Alternate data stream found: 'C:\Program Files/rempl:Win32App_1'. Possible hidden content. 

NTFS Alternate data stream found: 'C:\Program Files/rempl:Win32App_1'. Possible hidden content. 

NTFS Alternate data stream found: 'C:\Program Files/CUAssistant:Win32App_1'. Possible hidden content. 

NTFS Alternate data stream found: 'C:\Program Files/rempl:Win32App_1'. Possible hidden content. 

NTFS Alternate data stream found: 'C:\Program Files/rempl:Win32App_1'. Possible hidde

## System

In [None]:
log_system = groups.get_group('System').copy()
log_system

Unnamed: 0_level_0,level,full_log,first_word
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
334,1,System Audit: CIS - RHEL7 - 4.1.1 - Network pa...,System
482,1,System Audit: CIS - RHEL7 - 1.4.4 - SELinux se...,System
622,1,System Audit: SSH Hardening - 3: Root can log ...,System
906,1,System Audit: SSH Hardening - 8: Wrong Grace T...,System
1563,1,System Audit: CIS - RHEL7 - 1.4.4 - SELinux se...,System
...,...,...,...
469301,1,System Audit: CIS - RHEL7 - 3.2 - Avahi daemon...,System
469804,1,System Audit: SSH Hardening - 8: Wrong Grace T...,System
470654,1,System Audit: SSH Hardening - 5: Password Auth...,System
471334,1,System Audit: CIS - RHEL7 - 1.4.2 - SELinux no...,System


In [None]:
log_system['full_log'].str.extract('System Audit: (.+)').value_counts()

SSH Hardening - 5: Password Authentication {PCI_DSS: 2.2.4}. File: /etc/ssh/sshd_config. Reference: 5 .                                                                                                                                                                               49
SSH Hardening - 4: No Public Key authentication {PCI_DSS: 2.2.4}. File: /etc/ssh/sshd_config. Reference: 4 .                                                                                                                                                                          44
CIS - Testing against the CIS Red Hat Enterprise Linux 7 Benchmark v1.1.0. File: /etc/redhat-release. Reference: https://benchmarks.cisecurity.org/tools2/linux/CIS_Red_Hat_Enterprise_Linux_7_Benchmark_v1.1.0.pdf .                                                                 43
SSH Hardening - 6: Empty passwords allowed {PCI_DSS: 2.2.4}. File: /etc/ssh/sshd_config. Reference: 6 .                                                      

## File

In [None]:
log_file = groups.get_group('File')
log_file

Unnamed: 0_level_0,level,full_log,first_word
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
515,3,File 'HKEY_LOCAL_MACHINE\System\CurrentControl...,File
794,5,File '/var/www/html_update/management/hashconf...,File
1997,3,File '/usr/bin/elasticdump' was added.\n,File
2320,3,File 'HKEY_LOCAL_MACHINE\System\CurrentControl...,File
2858,5,File 'HKEY_LOCAL_MACHINE\System\CurrentControl...,File
...,...,...,...
471343,5,File 'HKEY_LOCAL_MACHINE\System\CurrentControl...,File
471567,5,File '/var/www/html/management/hashbin.txt' is...,File
471822,5,File 'HKEY_LOCAL_MACHINE\System\CurrentControl...,File
472052,5,File '/var/esild/etc/ossec.conf' checksum chan...,File


In [None]:
log_file['level'].value_counts()

5    641
3    181
Name: level, dtype: int64

In [None]:
groups.get_group('File')['full_log'].value_counts()

File '/var/www/html/management/ossec.conf' is owned by root and has written permissions to anyone.                                                                                                                                                                                                                                                  38
File '/var/www/html_update/management/hashbin.txt' is owned by root and has written permissions to anyone.                                                                                                                                                                                                                                          36
File '/var/www/html_update/management/ossec.conf' is owned by root and has written permissions to anyone.                                                                                                                                                                                                                 

\<FILE\>

In [None]:
log_file = groups.get_group('File').copy()

In [None]:
for log in log_file['full_log'].str.replace("(?<=File )'.+'", "<FILE>", regex=True):
    print(log, '\n')

File <FILE> was added.
 

File <FILE> is owned by root and has written permissions to anyone. 

File <FILE> was added.
 

File <FILE> was added.
 

File <FILE> checksum changed.
Old md5sum was: '6bf8548555a7ab4a34b83436539b6495'
New md5sum is : '85c899e225e74c712f8e2d2ef32924c0'
Old sha1sum was: 'c34db755b2af9472d44d2a51e87699991fee9b8e'
New sha1sum is : 'efdfe108fb55c6a564a3c1821af6b388f9808223'
 

File <FILE> was added.
 

File <FILE> checksum changed.
Old modification time was: 'Mon Nov 16 13:01:41 2020', now it is 'Thu Nov 19 08:55:16 2020'
 

File <FILE> checksum changed.
Old md5sum was: '664191fb850026c06ea88ed971daa7fd'
New md5sum is : '9b126e6218876ca57284259416fd8e6e'
Old sha1sum was: '23f3ae9ce553344e9361e5187e81946162119151'
New sha1sum is : '52143ce22a06f4d78e7ae83193b02737902169df'
 

File <FILE> is owned by root and has written permissions to anyone. 

File <FILE> checksum changed.
Old md5sum was: 'bee03237ea803f91e468ce5404efa622'
New md5sum is : '2f43dfc7a4f84a246bcfda2


File <FILE> checksum changed.
Old md5sum was: '0373e8f5f3390355ef3feb572f5068a9'
New md5sum is : '3a0ca8abff99652b561d8517ea24ad78'
Old sha1sum was: 'ff719b84c5960b8e07310a6862da9da67d526880'
New sha1sum is : '8d1ac075d65e6feae13461ac884afc6ca3a34a4e'
 

File <FILE> checksum changed.
Old md5sum was: '249085b249f96c40fc5e55ec7be2e2db'
New md5sum is : '14923062928febf6920ad4942eed0edd'
Old sha1sum was: 'cd2cc2c0c213701d65b8ac86b7d2fe6beeb7d6fa'
New sha1sum is : 'fd9b0d13e546b6de8900141f1430165152fe2f44'
 

File <FILE> was added.
 

File <FILE> checksum changed.
Old md5sum was: 'a14b17840dddff7cc661e7b751e017b7'
New md5sum is : '5e3cc6d18e4acc3b0e65d3f9d2b3efa2'
Old sha1sum was: '11224088d39572c685b42703664b8b5ea07f94ea'
New sha1sum is : '175c5af9433afe323fc45c9834e71a9e9eca5e94'
 

File <FILE> checksum changed.
Old md5sum was: 'f9e97fa1e2754fe7d0cb8fcb3f82645e'
New md5sum is : '31b119a2cc3e2ad42a5b26d47620be75'
Old sha1sum was: 'c0b78cbc9573e355fd5fd5e4671486f3bc6c175b'
New sha1sum is :

 

File <FILE> checksum changed.
Old md5sum was: '6775ad633fa1200f5c0475524a501178'
New md5sum is : '0b18151a36c48379a163fd93c4ba6f84'
Old sha1sum was: '8b58ec792511ac0ad628fb3fbcaa65fb55f333b1'
New sha1sum is : '0ea96ad2d634020c729f91d9cbd7dc2674c83e10'
 

File <FILE> is owned by root and has written permissions to anyone. 

File <FILE> checksum changed.
Old md5sum was: 'fec0e960d2563cb976f740a64370e0d2'
New md5sum is : '940515197ce89325ffaab18d48ace0d0'
Old sha1sum was: '5a815f9a33e942e7767111fe81ee1a72845c0a7e'
New sha1sum is : 'da0207ad617f2519cd2893f5e46b3bf820233cd8'
 

File <FILE> is owned by root and has written permissions to anyone. 

File <FILE> is owned by root and has written permissions to anyone. 

File <FILE> checksum changed.
Old md5sum was: 'd70e3573b1f16875592411aa704d94bf'
New md5sum is : '54629603f299e5781b2d31b8477e7f2f'
Old sha1sum was: '0e17bf5913c0a4151e1bbed7dcb222ea3885ffb4'
New sha1sum is : '364cf9574d355639bad1ce00ed61f185f8d186cc'
 

File <FILE> is owned b


File <FILE> is owned by root and has written permissions to anyone. 

File <FILE> was added.
 

File <FILE> checksum changed.
Old md5sum was: '59b9b62f32e02e1458c8c048a14ba2db'
New md5sum is : 'b3641f1af170e6e1545c5929b106f545'
Old sha1sum was: 'b0568af5c97f4f9f312f89381e3275618697e53f'
New sha1sum is : 'b62ea54b16c4f1cfa08880579b3d6f0f8df042dd'
 

File <FILE> is owned by root and has written permissions to anyone. 

File <FILE> is owned by root and has written permissions to anyone. 

File <FILE> was added.
 

File <FILE> checksum changed.
Old md5sum was: '6b43b97dbb8a3cd1c3800d723beb40b5'
New md5sum is : '845929e56db72b29e7ba4df795ed25fa'
Old sha1sum was: '5c5f49f0123c3b7ec81be64e619f9274e3c2434b'
New sha1sum is : '1273d8cfc7946cb893aea8729260e388fe24b87e'
 

File <FILE> is owned by root and has written permissions to anyone. 

File <FILE> was added.
 

File <FILE> checksum changed.
Old md5sum was: 'e8b3f6570e2ac60f76a60fad1e28df32'
New md5sum is : 'fc7aebda3d3096298d4170fb2367f396'

In [None]:
log_file['log'] = log_file['full_log'].str.replace("(?<=File )'.+'", "<FILE>", regex=True)
log_file

Unnamed: 0_level_0,level,full_log,first_word,log
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
515,3,File 'HKEY_LOCAL_MACHINE\System\CurrentControl...,File,File <FILE> was added.\n
794,5,File '/var/www/html_update/management/hashconf...,File,File <FILE> is owned by root and has written p...
1997,3,File '/usr/bin/elasticdump' was added.\n,File,File <FILE> was added.\n
2320,3,File 'HKEY_LOCAL_MACHINE\System\CurrentControl...,File,File <FILE> was added.\n
2858,5,File 'HKEY_LOCAL_MACHINE\System\CurrentControl...,File,File <FILE> checksum changed.\nOld md5sum was:...
...,...,...,...,...
471343,5,File 'HKEY_LOCAL_MACHINE\System\CurrentControl...,File,File <FILE> checksum changed.\nOld md5sum was:...
471567,5,File '/var/www/html/management/hashbin.txt' is...,File,File <FILE> is owned by root and has written p...
471822,5,File 'HKEY_LOCAL_MACHINE\System\CurrentControl...,File,File <FILE> checksum changed.\nOld md5sum was:...
472052,5,File '/var/esild/etc/ossec.conf' checksum chan...,File,File <FILE> checksum changed.\nSize changed fr...


In [None]:
log_file['log'] = log_file['log'].str.extract("(.+)(?:\n|$)")
log_file['log'].value_counts()

File <FILE> checksum changed.                                          397
File <FILE> is owned by root and has written permissions to anyone.    244
File <FILE> was added.                                                 181
Name: log, dtype: int64

In [None]:
log_file.pivot_table(values='full_log', index='log', columns='level', aggfunc='count', fill_value=0)

level,3,5
log,Unnamed: 1_level_1,Unnamed: 2_level_1
File <FILE> checksum changed.,0,397
File <FILE> is owned by root and has written permissions to anyone.,0,244
File <FILE> was added.,181,0


## --MARK--:

In [None]:
groups.get_group('--MARK--:')

Unnamed: 0_level_0,level,full_log,first_word
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
9365,0,--MARK--: =?s_w*?!5u]7c/s?Z1(FATAl?o0khglYB]Q0...,--MARK--:
79217,0,"--MARK--: Lnsb5;CD,&vN2!S_+!hlX0?4N9FCI%WFSD]D...",--MARK--:
88229,0,"--MARK--: QhU7AXVHGsFdh,3wL4bR;,x&q1?1PQV2(ik?...",--MARK--:
127292,0,"--MARK--: %&)Y,UK%AV1Q$*H_U!C1HyPwEtE?ox8@1xQO...",--MARK--:
180373,0,"--MARK--: [kOlJY!(1OE83l7Rj,mQQvgoT&Cu!p_BAYrB...",--MARK--:
187935,0,--MARK--: 2k06(D+8h?6q;fFD34mn2lmfGI'(vl6K4Sh#...,--MARK--:
205605,0,"--MARK--: N[$9H&R]stqazsx,D)'26MPV?C[E/AeRpf]+...",--MARK--:
263076,0,--MARK--: Bve5Z-oLF9#)LT;.snZlXJbizTDVmEOjQP86...,--MARK--:
326054,0,--MARK--: 0KY#u2STO^[TWE^uuL.+#a3!+;KeBpHtbuyJ...,--MARK--:
340462,0,--MARK--: Klq)#FqoV_]r_@[C!s.t)E!9xlY^M?^PWIM7...,--MARK--:


In [None]:
show_all('--MARK--:')

--MARK--: =?s_w*?!5u]7c/s?Z1(FATAl?o0khglYB]Q0OTwjGzB!nVlVSdiJnnN/VJJ#qi091Xao?iduzHEdkUw&rDtQQc2sV7(6a8$U#j0MC&wG,YRG$rbf+@1c0-n6E^.2Cv&3=T7mXZ$D]1nf9yl6@QER690Yup*5Zt48Lu'9gc65Ixx]B2'JMqv_qAY0=XEdVmW6NpRRFqy+3S&mKFPru+erp3d]mRJ;v@D*8nfRlrFu!1vU;pNsHl5524*pD)VOkSOf$62(&)J?E6FERs4KV;'y6cZxVcO6Rf/@+s?kK5LSVHo'D2](^l_zzD9]5o&m!&;Kw^@*.?tBL.,L.K))IKkU*6kD4+DmgYrYG9$WZn)hTf0ozna4wdJ&f5u]te.8&)vSJdMUuNU55H??J3LNru!T@PZja(b5h.atdRP6T/Kq5A5M)!dIL36c0h-_TPg3*%5d*+(*5H!?EU3_c/wEYGq 

--MARK--: Lnsb5;CD,&vN2!S_+!hlX0?4N9FCI%WFSD]Dh9rOuBt6yg+ljcZ_1RJuBBq+pll&V5vxRDYl)r1w*H9q6*;DGr/ul,$EN1Gw=w2h17iWBi+idDd)KxN.u=v'c%B,*v]B'qTD)W(!_l[PJb@@k.wSJj^euz[).+)B!&K&)qb.n+Xw,SiXNygqoVPLXLOS5,zkMY?!Ya-S$kw6pSKxc/XVdwmJ$78bQzpDF=QK,)Qt)&mj&$(fnnPpcW2UWt&L$v@8jWnut6D8glF*tgx5$5V3LNrD]x$Je[''+q3pBse]%n@'(XJMOcCGwI)Y]4G[2)?k08YRF*V1A6KuHuU1)2zqv^4rL52wf@erroRp_fy*pP+P(je7p/YP%8??ph1BR;a'O36VW!9oQtjV_s9Vo!9FKuwO/XWO--ZwTrpe-cEkg[X,$#@pBW1?,7Q^V,q,3*7utvb!^VDlYyRZ=dlJINCAfce(R6DyL;hrTneF#kXmkcsHq[a9!G%gq*?hYp@-Ui

## E:

In [None]:
groups.get_group('E:')

Unnamed: 0_level_0,level,full_log,first_word
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1642,0,E: probe_rpminfo: RPM: rpmdb: BDB0113 Thread/p...,E:
1701,0,E: probe_rpminfo: RPM: rpmdb: BDB0113 Thread/p...,E:
3256,0,E: probe_rpminfo: RPM: rpmdb: BDB0113 Thread/p...,E:
3862,0,E: probe_rpminfo: RPM: db5 error(-30973) from ...,E:
4318,0,E: probe_rpminfo: RPM: rpmdb: BDB0113 Thread/p...,E:
...,...,...,...
468744,0,E: probe_rpmverifyfile: RPM: rpmdb: BDB0113 Th...,E:
469612,0,E: probe_rpminfo: RPM: rpmdb: BDB0113 Thread/p...,E:
469694,0,E: probe_rpminfo: RPM: rpmdb: BDB0113 Thread/p...,E:
471535,0,E: probe_rpminfo: RPM: rpmdb: BDB0113 Thread/p...,E:


In [None]:
groups.get_group('E:')['level'].value_counts()

0    297
Name: level, dtype: int64

In [None]:
groups.get_group('E:')['full_log'].value_counts()

E: probe_rpminfo: RPM: db5 error(-30973) from dbenv->failchk: BDB0087 DB_RUNRECOVERY: Fatal error, run database recovery               124
E: probe_rpminfo: RPM: rpmdb: BDB0113 Thread/process 26357/140408208971520 failed: BDB1507 Thread died in Berkeley DB library          105
E: probe_rpmverifyfile: RPM: db5 error(-30973) from dbenv->failchk: BDB0087 DB_RUNRECOVERY: Fatal error, run database recovery          23
E: probe_rpmverifyfile: RPM: rpmdb: BDB0113 Thread/process 26357/140408208971520 failed: BDB1507 Thread died in Berkeley DB library     17
E: probe_rpminfo: RPM: rpmdb: BDB0113 Thread/process 13362/139912875566848 failed: BDB1507 Thread died in Berkeley DB library           16
E: probe_rpmverifyfile: RPM: rpmdb: BDB0113 Thread/process 13362/139912875566848 failed: BDB1507 Thread died in Berkeley DB library      7
E: probe_rpminfo: RPM: rpmdb: BDB0113 Thread/process 22079/139845236422400 failed: BDB1507 Thread died in Berkeley DB library            2
E: probe_rpminfo: RPM: rpmd

## oscap:

In [None]:
groups.get_group('oscap:')

Unnamed: 0_level_0,level,full_log,first_word
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3396,3,"oscap: msg: ""xccdf-result"", scan-id: ""00016030...",oscap:
3661,3,"oscap: msg: ""xccdf-result"", scan-id: ""00016020...",oscap:
5658,5,"oscap: msg: ""xccdf-result"", scan-id: ""00016030...",oscap:
5695,5,"oscap: msg: ""xccdf-result"", scan-id: ""00016019...",oscap:
7206,3,"oscap: msg: ""xccdf-result"", scan-id: ""00016052...",oscap:
...,...,...,...
466619,5,"oscap: msg: ""xccdf-result"", scan-id: ""00016007...",oscap:
467561,5,"oscap: msg: ""xccdf-result"", scan-id: ""00016019...",oscap:
468063,3,"oscap: msg: ""xccdf-result"", scan-id: ""00016020...",oscap:
468233,5,"oscap: msg: ""xccdf-result"", scan-id: ""00016080...",oscap:


In [None]:
groups.get_group('oscap:')['level'].value_counts()

3    198
5    168
Name: level, dtype: int64

In [None]:
show_all('oscap:')

oscap: msg: "xccdf-result", scan-id: "0001603070557", content: "ssg-centos-7-ds.xml", title: "Record Events that Modify the System's Discretionary Access Controls - setxattr", id: "xccdf_org.ssgproject.content_rule_audit_rules_dac_modification_setxattr", result: "fail", severity: "low", description: "At a minimum, the audit system should collect file permission changes for all users and root. If the auditd daemon is configured to use the augenrules program to read audit rules during daemon startup (the default), add the following line to a file with suffix .rules in the directory /etc/audit/rules.d: -a always,exit -F arch=b32 -S setxattr -F auid>=1000 -F auid!=4294967295 -F key=perm_mod If the system is 64 bit then also add the following line: -a always,exit -F arch=b64 -S setxattr -F auid>=1000 -F auid!=4294967295 -F key=perm_mod If the auditd daemon is configured to use the auditctl utility to read audit rules during daemon startup, add the following line to /etc/audit/audit.rules fi

In [None]:
log_oscap = groups.get_group('oscap:').copy()

In [None]:
log_oscap['title'] = log_oscap['full_log'].str.extract('title: "(.+?)"')
log_oscap

Unnamed: 0_level_0,level,full_log,first_word,title
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3396,3,"oscap: msg: ""xccdf-result"", scan-id: ""00016030...",oscap:,Record Events that Modify the System's Discret...
3661,3,"oscap: msg: ""xccdf-result"", scan-id: ""00016020...",oscap:,Record Events that Modify the System's Discret...
5658,5,"oscap: msg: ""xccdf-result"", scan-id: ""00016030...",oscap:,Ensure auditd Collects Information on the Use ...
5695,5,"oscap: msg: ""xccdf-result"", scan-id: ""00016019...",oscap:,Record Events that Modify the System's Discret...
7206,3,"oscap: msg: ""xccdf-result"", scan-id: ""00016052...",oscap:,Record Events that Modify the System's Discret...
...,...,...,...,...
466619,5,"oscap: msg: ""xccdf-result"", scan-id: ""00016007...",oscap:,Set Lockout Time For Failed Password Attempts
467561,5,"oscap: msg: ""xccdf-result"", scan-id: ""00016019...",oscap:,Set Password Strength Minimum Lowercase Charac...
468063,3,"oscap: msg: ""xccdf-result"", scan-id: ""00016020...",oscap:,Record Events that Modify the System's Discret...
468233,5,"oscap: msg: ""xccdf-result"", scan-id: ""00016080...",oscap:,Set Password Strength Minimum Uppercase Charac...


In [None]:
log_oscap['title'].value_counts()

Record Events that Modify the System's Discretionary Access Controls - fchmod          16
Ensure auditd Collects System Administrator Actions                                    15
Ensure auditd Collects Information on the Use of Privileged Commands                   12
Record Events that Modify the System's Discretionary Access Controls - setxattr        12
Record Events that Modify the System's Network Environment                             11
Record Events that Modify the System's Discretionary Access Controls - lchown          11
Record attempts to alter time through settimeofday                                     11
Record Events that Modify the System's Discretionary Access Controls - lsetxattr       10
Record Events that Modify the System's Discretionary Access Controls - fremovexattr    10
Record Attempts to Alter the localtime File                                            10
Ensure auditd Collects Unauthorized Access Attempts to Files (unsuccessful)            10
Ensure aud

In [None]:
log_oscap.pivot_table(values='full_log', index='title', columns='level', aggfunc='count', fill_value=0)

level,3,5
title,Unnamed: 1_level_1,Unnamed: 2_level_1
Common Profile for General-Purpose Systems,0,2
Configure Periodic Execution of AIDE,0,5
Configure auditd admin_space_left Action on Low Disk Space,0,5
Configure auditd space_left Action on Low Disk Space,0,4
Configure auditd to use audispd's syslog plugin,0,4
Disable At Service (atd),1,0
Disable Automatic Bug Reporting Tool (abrtd),5,0
Enable GNOME3 Screensaver Idle Activation,0,5
Enable GNOME3 Screensaver Lock After Idle Period,0,6
Enable Smart Card Login,0,6


## error:

In [None]:
groups.get_group('error:')

Unnamed: 0_level_0,level,full_log,first_word
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
585,0,error: cannot open Packages index using db5 - ...,error:
871,0,error: cannot open Packages index using db5 - ...,error:
992,0,error: db5 error(-30973) from dbenv->failchk: ...,error:
2638,0,error: cannot open Packages database in,error:
3613,0,error: cannot open Packages index using db5 - ...,error:
...,...,...,...
464123,0,error: cannot open Packages database in,error:
467463,0,error: rpmdb: BDB0113 Thread/process 26357/140...,error:
467518,0,error: rpmdb: BDB0113 Thread/process 26357/140...,error:
468068,0,error: cannot open Packages database in,error:


In [None]:
groups.get_group('error:')['level'].value_counts()

0    459
Name: level, dtype: int64

In [None]:
groups.get_group('error:')['full_log'].value_counts()

error: cannot open Packages database in                                                                          138
error: db5 error(-30973) from dbenv->failchk: BDB0087 DB_RUNRECOVERY: Fatal error, run database recovery         132
error: rpmdb: BDB0113 Thread/process 26357/140408208971520 failed: BDB1507 Thread died in Berkeley DB library    123
error: cannot open Packages index using db5 -  (-30973)                                                           45
error: rpmdb: BDB0113 Thread/process 13362/139912875566848 failed: BDB1507 Thread died in Berkeley DB library     18
error: rpmdb: BDB0113 Thread/process 30478/140432988804864 failed: BDB1507 Thread died in Berkeley DB library      2
error: rpmdb: BDB0113 Thread/process 22079/139845236422400 failed: BDB1507 Thread died in Berkeley DB library      1
Name: full_log, dtype: int64

## ossec:

In [None]:
groups.get_group('ossec:')

Unnamed: 0_level_0,level,full_log,first_word
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
590,5,ossec: output: 'netstat listening ports':\ntcp...,ossec:
621,5,ossec: output: 'netstat listening ports':\ntcp...,ossec:
870,5,ossec: output: 'netstat listening ports':\ntcp...,ossec:
1853,5,ossec: output: 'netstat listening ports':\ntcp...,ossec:
1877,5,ossec: output: 'netstat listening ports':\ntcp...,ossec:
...,...,...,...
471468,5,ossec: output: 'netstat listening ports':\ntcp...,ossec:
471837,5,ossec: output: 'netstat listening ports':\ntcp...,ossec:
471864,5,ossec: output: 'netstat listening ports':\ntcp...,ossec:
472400,5,ossec: output: 'netstat listening ports':\ntcp...,ossec:


In [None]:
groups.get_group('ossec:')['level'].value_counts()

5    1289
1      29
Name: level, dtype: int64

In [None]:
log_ossec = groups.get_group('ossec:').copy()

In [None]:
log_ossec[log_ossec['level']==1]

Unnamed: 0_level_0,level,full_log,first_word
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
17817,1,ossec: File rotated (inode changed): '/var/log...,ossec:
55922,1,ossec: File rotated (inode changed): '/var/log...,ossec:
64931,1,ossec: File rotated (inode changed): '/var/log...,ossec:
95525,1,ossec: File rotated (inode changed): '/var/log...,ossec:
109160,1,ossec: File rotated (inode changed): '/var/log...,ossec:
117075,1,ossec: File rotated (inode changed): '/var/log...,ossec:
127872,1,ossec: File rotated (inode changed): '/var/log...,ossec:
132274,1,ossec: File rotated (inode changed): '/var/log...,ossec:
139846,1,ossec: File rotated (inode changed): '/var/log...,ossec:
139915,1,ossec: File rotated (inode changed): '/var/log...,ossec:


In [None]:
log_ossec[log_ossec['level']==1]['full_log'].value_counts()

ossec: File rotated (inode changed): '/var/log/suricata/eve.json'.    8
ossec: File rotated (inode changed): '/var/log/audit/audit.log'.      8
ossec: File rotated (inode changed): '/var/log/maillog'.              3
ossec: File rotated (inode changed): '/var/log/secure'.               3
ossec: File rotated (inode changed): '/var/log/messages'.             2
ossec: Agent started: 'Solaris-46->192.168.0.46'.                     1
ossec: Agent started: 'TTA_HP-UX11.31_Itanium->211.253.243.71'.       1
ossec: Agent started: 'sjlee->192.168.0.189'.                         1
ossec: Agent started: 'Windows10->192.168.0.190'.                     1
ossec: Agent started: 'tukim->192.168.0.181'.                         1
Name: full_log, dtype: int64

In [None]:
log_ossec[log_ossec['level']==5]

Unnamed: 0_level_0,level,full_log,first_word
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
590,5,ossec: output: 'netstat listening ports':\ntcp...,ossec:
621,5,ossec: output: 'netstat listening ports':\ntcp...,ossec:
870,5,ossec: output: 'netstat listening ports':\ntcp...,ossec:
1853,5,ossec: output: 'netstat listening ports':\ntcp...,ossec:
1877,5,ossec: output: 'netstat listening ports':\ntcp...,ossec:
...,...,...,...
471468,5,ossec: output: 'netstat listening ports':\ntcp...,ossec:
471837,5,ossec: output: 'netstat listening ports':\ntcp...,ossec:
471864,5,ossec: output: 'netstat listening ports':\ntcp...,ossec:
472400,5,ossec: output: 'netstat listening ports':\ntcp...,ossec:


In [None]:
log_ossec['full_log'].str.extract("output: '(.+?)'").value_counts()

netstat listening ports    1289
dtype: int64

## type=USER_AVC

In [None]:
groups.get_group('type=USER_AVC')

Unnamed: 0_level_0,level,full_log,first_word
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10516,1,type=USER_AVC msg=audit(1603070359.442:167): p...,type=USER_AVC
110767,1,type=USER_AVC msg=audit(1605233330.597:600): p...,type=USER_AVC
339998,1,type=USER_AVC msg=audit(1613363937.137:2481): ...,type=USER_AVC


In [None]:
show_all('type=USER_AVC')

type=USER_AVC msg=audit(1603070359.442:167): pid=820 uid=81 auid=4294967295 ses=4294967295 subj=system_u:system_r:system_dbusd_t:s0-s0:c0.c1023 msg='avc:  denied  { send_msg } for msgtype=method_return dest=:1.69 spid=815 tpid=7464 scontext=system_u:system_r:systemd_logind_t:s0 tcontext=system_u:system_r:httpd_t:s0 tclass=dbus  exe="/usr/bin/dbus-daemon" sauid=81 hostname=? addr=? terminal=?' 

type=USER_AVC msg=audit(1605233330.597:600): pid=831 uid=81 auid=4294967295 ses=4294967295 subj=system_u:system_r:system_dbusd_t:s0-s0:c0.c1023 msg='avc:  denied  { send_msg } for msgtype=method_return dest=:1.187 spid=890 tpid=10423 scontext=system_u:system_r:systemd_logind_t:s0 tcontext=system_u:system_r:httpd_t:s0 tclass=dbus  exe="/usr/bin/dbus-daemon" sauid=81 hostname=? addr=? terminal=?' 

type=USER_AVC msg=audit(1613363937.137:2481): pid=813 uid=81 auid=4294967295 ses=4294967295 subj=system_u:system_r:system_dbusd_t:s0-s0:c0.c1023 msg='avc:  denied  { send_msg } for msgtype=method_call i

## type=AVC

In [None]:
groups.get_group('type=AVC')

Unnamed: 0_level_0,level,full_log,first_word
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
171,1,type=AVC msg=audit(1613366627.272:4960): avc: ...,type=AVC
205,1,type=AVC msg=audit(1608016269.687:144817): avc...,type=AVC
288,1,type=AVC msg=audit(1608012869.400:142921): avc...,type=AVC
365,1,type=AVC msg=audit(1613362948.863:2329): avc: ...,type=AVC
1490,1,type=AVC msg=audit(1608009012.412:140705): avc...,type=AVC
...,...,...,...
463717,1,type=AVC msg=audit(1613364043.098:3757): avc: ...,type=AVC
467401,1,type=AVC msg=audit(1600740224.421:52502): avc:...,type=AVC
470305,1,type=AVC msg=audit(1602655801.484:113985): avc...,type=AVC
471284,1,type=AVC msg=audit(1610086680.367:53452): avc:...,type=AVC


In [None]:
groups.get_group('type=AVC')['level'].value_counts()

1    369
Name: level, dtype: int64

In [None]:
for log in groups.get_group('type=AVC')['full_log'][:100]:
    print(log, '\n')

type=AVC msg=audit(1613366627.272:4960): avc:  denied  { execute } for  pid=10016 comm="sudo" name="agent_control" dev="dm-0" ino=404839312 scontext=system_u:system_r:httpd_t:s0 tcontext=system_u:object_r:var_t:s0 tclass=file permissive=1 type=AVC msg=audit(1613366627.272:4960): avc:  denied  { read open } for  pid=10016 comm="sudo" path="/var/esild/bin/agent_control" dev="dm-0" ino=404839312 scontext=system_u:system_r:httpd_t:s0 tcontext=system_u:object_r:var_t:s0 tclass=file permissive=1 type=AVC msg=audit(1613366627.272:4960): avc:  denied  { execute_no_trans } for  pid=10016 comm="sudo" path="/var/esild/bin/agent_control" dev="dm-0" ino=404839312 scontext=system_u:system_r:httpd_t:s0 tcontext=system_u:object_r:var_t:s0 tclass=file permissive=1 type=SYSCALL msg=audit(1613366627.272:4960): arch=c000003e syscall=59 success=yes exit=0 a0=55936cea74e8 a1=55936cea11d8 a2=55936cec57f0 a3=5 items=0 ppid=9939 pid=10016 auid=4294967295 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 

## type=SYSCALL

In [None]:
groups.get_group('type=SYSCALL')

Unnamed: 0_level_0,level,full_log,first_word
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4,1,type=SYSCALL msg=audit(1603094402.016:52981): ...,type=SYSCALL
5,1,type=SYSCALL msg=audit(1611889244.855:247124):...,type=SYSCALL
10,1,type=SYSCALL msg=audit(1611895525.367:489785):...,type=SYSCALL
15,1,type=SYSCALL msg=audit(1603072847.573:38657): ...,type=SYSCALL
22,1,type=SYSCALL msg=audit(1603159410.494:164830):...,type=SYSCALL
...,...,...,...
472945,1,type=SYSCALL msg=audit(1611893440.465:417650):...,type=SYSCALL
472951,1,type=SYSCALL msg=audit(1611890997.087:322803):...,type=SYSCALL
472953,1,type=SYSCALL msg=audit(1611895531.931:497276):...,type=SYSCALL
472955,1,type=SYSCALL msg=audit(1611887841.572:191604):...,type=SYSCALL


In [None]:
groups.get_group('type=SYSCALL')['level'].value_counts()

1    116496
Name: level, dtype: int64

In [None]:
for log in groups.get_group('type=SYSCALL')['full_log'][:100]:
    print(log, '\n')

type=SYSCALL msg=audit(1603094402.016:52981): arch=c000003e syscall=2 success=yes exit=3 a0=7ff220e805a4 a1=80000 a2=1 a3=7ff2210864f8 items=1 ppid=5877 pid=5878 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=(none) ses=45 comm="date" exe="/usr/bin/date" subj=system_u:system_r:sysstat_t:s0-s0:c0.c1023 key="audit-wazuh-r" type=CWD msg=audit(1603094402.016:52981):  cwd="/root" type=PATH msg=audit(1603094402.016:52981): item=0 name="/etc/ld.so.cache" inode=69008420 dev=fd:00 mode=0100644 ouid=0 ogid=0 rdev=00:00 obj=unconfined_u:object_r:ld_so_cache_t:s0 objtype=NORMAL type=PROCTITLE msg=audit(1603094402.016:52981): proctitle=64617465002B2564 

type=SYSCALL msg=audit(1611889244.855:247124): arch=c000003e syscall=2 success=yes exit=3 a0=7f1c14d535a4 a1=80000 a2=1 a3=7f1c14f594f8 items=1 ppid=100038 pid=100039 auid=4294967295 uid=0 gid=980 euid=0 suid=0 fsuid=0 egid=980 sgid=980 fsgid=980 tty=(none) ses=4294967295 comm="ps" exe="/usr/bin/ps" subj=system_u:system_r:unconf

# 한글 포함

In [None]:
korean = train_df[train_df['full_log'].str.contains('[가-힣]')]
korean

Unnamed: 0_level_0,level,full_log,first_word
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
7,0,Jan 19 09:19:14 localhost logstash: [2021-01-1...,Jan
12,0,Dec 28 14:48:52 localhost logstash: [2020-12-2...,Dec
59,0,Jan 28 04:40:05 localhost logstash: [2021-01-2...,Jan
60,0,Jan 15 06:01:11 localhost logstash: [2021-01-1...,Jan
61,0,Jan 4 10:53:32 localhost logstash: [2021-01-0...,Jan
...,...,...,...
472919,0,Jan 18 08:21:00 localhost logstash: [2021-01-1...,Jan
472925,0,Sep 24 01:52:01 localhost logstash: [2020-09-2...,Sep
472954,0,Jan 18 10:54:00 localhost logstash: [2021-01-1...,Jan
472956,0,Jan 20 00:20:01 localhost logstash: [2021-01-2...,Jan


In [None]:
korean['first_word'].value_counts()

Jan    36284
Dec     4644
Nov     3124
Sep     2847
Oct     2806
Feb      322
Name: first_word, dtype: int64

In [None]:
korean['level'].value_counts()

0    50027
Name: level, dtype: int64

In [None]:
korean['full_log'].str.extract('([가-힣].*[가-힣])').nunique()

0    22
dtype: int64

In [None]:
for kor in korean['full_log'].str.extract('([가-힣].*[가-힣])')[0].unique():
    print(kor, '\n')

연결이 거부됨 

호스트로 갈 루트가 없음 

네트워크가 접근 불가능합니다 

구매 오더 ( PO-5674967 ).zip\",\"sample-sha1\":\"\",\"sample-type\":\"unknown\",\"tlp\":\"GREEN\",\"path\":\"/var/esild/ctas/migration/B_악성코드 

악성코드 

열린 파일이 너무 많음 

경로 /org/gnome/Shell의 객체에 'org.gnome.Shell' 인터페이스가 없습니다 

연결이 거부됨 (Connection refused) {:url=>http://localhost:9200/, :error_message=>"Elasticsearch Unreachable: [http://localhost:9200/][Manticore::SocketException] 연결이 거부됨 

파이프가 깨어짐 (Write failed) {:url=>http://localhost:9200/, :error_message=>"Elasticsearch Unreachable: [http://localhost:9200/][Manticore::SocketException] 파이프가 깨어짐 

파일 “/var/lib/gdm/.local/share/icc/edid-ce8e43f2db967b9a8fd3e14110146a62.icc” 열기 실패: 허가 거부 

악성코드-HASH/202005/2020-05-29.csv\",\"confidence\":\"Low\",\"host\":\"localhost.localdomain\",\"sample-name\":\"전산 및 비전산자료 보존 요청서 

연결이 거부됨 (Connection refused) {:url=>http://127.0.0.1:9200/, :error_message=>"Elasticsearch Unreachable: [http://127.0.0.1:9200/][Manticore::SocketException] 연결이 거부됨 

파이프가 깨어짐 

잠글 수 없습

In [None]:
korean['full_log'].str.extract('([가-힣].*[가-힣])').value_counts()

연결이 거부됨                                                                                                                                                                                                45386
호스트로 갈 루트가 없음                                                                                                                                                                                           4269
악성코드                                                                                                                                                                                                     113
네트워크가 접근 불가능합니다                                                                                                                                                                                          100
열린 파일이 너무 많음                                                                                                                                                                        