In [1]:
import json
import os
import sys
import time
import pandas as pd
import numpy as np
from drain3 import TemplateMiner
from drain3.template_miner_config import TemplateMinerConfig

import logging
logger = logging.getLogger(__name__)
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')

In [2]:
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
import tensorflow as tf

LIMIT = 12
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    tf.config.experimental.set_virtual_device_configuration(
        gpus[0],
        [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024*LIMIT)])
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')

In [3]:
config = TemplateMinerConfig()
config.load("drain3.ini")
config.profiling_enabled = False
template_miner = TemplateMiner(config=config)

Starting Drain3 template miner


In [4]:
train_path = "/tf/jingsai/liantong/data"
result_path = ''
test_path = "/tf/jingsai/liantong/data"
# train_path = sys.argv[1]
# result_path = sys.argv[2]
# test_path = sys.argv[3]
# print(train_path)
# print(result_path)
# print(test_path)

In [5]:
line_count = 0

with open(os.path.join(train_path, "sysmonitor_trainset.txt")) as f, open(os.path.join(train_path, "sysmonitor_testset.txt")) as t:
    lines = f.readlines() + t.readlines()

In [6]:
start_time = time.time()
batch_start_time = start_time
batch_size = 10000

for line in lines:
    line = line.rstrip()
#     line = line.partition(": ")[2]
#     line = line.partition("|")[2]
    timestring = line.split(maxsplit=1)[0].split(sep="|",maxsplit=1)[0]
    line = line[len(timestring)+1:].strip()
    result = template_miner.add_log_message(line)
    line_count += 1
    if line_count % batch_size == 0:
        time_took = time.time() - batch_start_time
        rate = batch_size / time_took
        logger.info(f"Processing line: {line_count}, rate {rate:.1f} lines/sec, "
                    f"{len(template_miner.drain.clusters)} clusters so far.")
        batch_start_time = time.time()
    if result["change_type"] != "none":
        result_json = json.dumps(result)
        logger.info(f"Input ({line_count}): " + line)
        logger.info("Result: " + result_json)

time_took = time.time() - start_time
rate = line_count / time_took
logger.info(f"--- Done processing file in {time_took:.2f} sec. Total of {line_count} lines, rate {rate:.1f} lines/sec, "
            f"{len(template_miner.drain.clusters)} clusters")



Input (1): sysmonitor[39896]|pkill[54810](parent:getosstat[40372]) send SIGTERM to iostat[17504]. <-[/bin/sh /usr/bin/getosstat(40372)]
Result: {"change_type": "cluster_created", "cluster_id": 1, "cluster_size": 1, "template_mined": "sysmonitor[<:NUM:>]|pkill[<:NUM:>](parent:getosstat[<:NUM:>]) send SIGTERM to iostat[<:NUM:>]. <-[/bin/sh /usr/bin/getosstat(<:NUM:>)]", "cluster_count": 1}
Input (2): sysmonitor[39896]|pkill[54834](parent:getosstat[40372]) send SIGTERM to sar[17544]. <-[/bin/sh /usr/bin/getosstat(40372)]
Result: {"change_type": "cluster_template_changed", "cluster_id": 1, "cluster_size": 2, "template_mined": "sysmonitor[<:NUM:>]|pkill[<:NUM:>](parent:getosstat[<:NUM:>]) send SIGTERM to <:*:> <-[/bin/sh /usr/bin/getosstat(<:NUM:>)]", "cluster_count": 1}
Input (7): sysmonitor[39896]|systemd[1](parent:swapper/0[0]) send SIGTERM to irqbalance[72173].
Result: {"change_type": "cluster_created", "cluster_id": 2, "cluster_size": 1, "template_mined": "sysmonitor[<:NUM:>]|systemd[<

Input (23775): sysmonitor[39896]|neutron-evs-age[15405](parent:neutron-evs-age[15247]) send SIGKILL to ovsdb-client[48479]. <-[/bin/bash /bin/neutron-evs-agentControl -S CHANGE -(15247)]<-[sudo -n numactl --preferred=0 neutron-evs-agentCont(15204)]<-[python /usr/bin/cpsRunControl --compo
Result: {"change_type": "cluster_created", "cluster_id": 11, "cluster_size": 1, "template_mined": "sysmonitor[<:NUM:>]|neutron-evs-age[<:NUM:>](parent:neutron-evs-age[<:NUM:>]) send SIGKILL to ovsdb-client[<:NUM:>]. <-[/bin/bash /bin/neutron-evs-agentControl -S CHANGE -(<:NUM:>)]<-[sudo -n numactl --preferred=<:NUM:> neutron-evs-agentCont(<:NUM:>)]<-[python /usr/bin/cpsRunControl --compo", "cluster_count": 11}
Input (23777): sysmonitor[39896]|neutron-evs-age[15405](parent:neutron-evs-age[15247]) send SIGKILL to safe_ovsdb-clie[48469]. <-[/bin/bash /bin/neutron-evs-agentControl -S CHANGE -(15247)]<-[sudo -n numactl --preferred=0 neutron-evs-agentCont(15204)]<-[python /usr/bin/cpsRunControl --co
Result: 

Input (23805): sysmonitor[39896]|kill[22444](parent:xargs[22407]) send SIGKILL to nova-rootwrap-d[4048]. <-[xargs kill -9(22407)]<-[/bin/bash /bin/nova-computeControl -S CHANGE -N {'i(22402)]<-[/bin/bash /bin/nova-computeControl -S CHANGE -N {'i(14716)]<-[/bin/bash /bin/nova-computeContr
Result: {"change_type": "cluster_created", "cluster_id": 20, "cluster_size": 1, "template_mined": "sysmonitor[<:NUM:>]|kill[<:NUM:>](parent:xargs[<:NUM:>]) send SIGKILL to nova-rootwrap-d[<:NUM:>]. <-[xargs kill <:NUM:>(<:NUM:>)]<-[/bin/bash /bin/nova-computeControl -S CHANGE -N {'i(<:NUM:>)]<-[/bin/bash /bin/nova-computeControl -S CHANGE -N {'i(<:NUM:>)]<-[/bin/bash /bin/nova-computeContr", "cluster_count": 20}
Input (23806): sysmonitor[39896]|sudo[4041](parent:systemd[1]) send SIGKILL to sudo[4041].
Result: {"change_type": "cluster_created", "cluster_id": 21, "cluster_size": 1, "template_mined": "sysmonitor[<:NUM:>]|sudo[<:NUM:>](parent:systemd[<:NUM:>]) send SIGKILL to sudo[<:NUM:>].", "cluster_coun

Input (44457): sysmonitor[39580]|custom periodic monitor starting up
Result: {"change_type": "cluster_created", "cluster_id": 34, "cluster_size": 1, "template_mined": "sysmonitor[<:NUM:>]|custom periodic monitor starting up", "cluster_count": 34}
Input (44458): sysmonitor[39580]|local disk io delay monitor starting up
Result: {"change_type": "cluster_created", "cluster_id": 35, "cluster_size": 1, "template_mined": "sysmonitor[<:NUM:>]|local disk io delay monitor starting up", "cluster_count": 35}
Input (44459): sysmonitor[39580]|disk inode monitor starting up
Result: {"change_type": "cluster_created", "cluster_id": 36, "cluster_size": 1, "template_mined": "sysmonitor[<:NUM:>]|disk inode monitor starting up", "cluster_count": 36}
Input (44460): sysmonitor[39580]|disk monitor starting up
Result: {"change_type": "cluster_created", "cluster_id": 37, "cluster_size": 1, "template_mined": "sysmonitor[<:NUM:>]|disk monitor starting up", "cluster_count": 37}
Input (44461): sysmonitor[39580]|psc

Input (44515): sysmonitor[39580]|Watch path is in /var/log, watch /var/log/sysmonitor/process_monitor_ucompute.log for only delete event
Result: {"change_type": "cluster_created", "cluster_id": 55, "cluster_size": 1, "template_mined": "sysmonitor[<:NUM:>]|Watch path is in /var/log, watch /var/log/sysmonitor/process monitor ucompute.log for only delete event", "cluster_count": 55}
Input (44520): sysmonitor[39580]|Watch path is in /var/log, watch /var/log/sysmonitor/process_monitor_unetwork.log for only delete event
Result: {"change_type": "cluster_template_changed", "cluster_id": 55, "cluster_size": 2, "template_mined": "sysmonitor[<:NUM:>]|Watch path is in /var/log, watch /var/log/sysmonitor/process monitor <:*:> for only delete event", "cluster_count": 55}
Input (44522): sysmonitor[39580]|file name is "/etc/uvplogdump.d/uvplogdump.conf", watch event is 0x200
Result: {"change_type": "cluster_created", "cluster_id": 56, "cluster_size": 1, "template_mined": "sysmonitor[<:NUM:>]|file name

Input (44634): sysmonitor[39580]|ext3 /proc/mounts:
Result: {"change_type": "cluster_created", "cluster_id": 69, "cluster_size": 1, "template_mined": "sysmonitor[<:NUM:>]|ext3 /proc/mounts:", "cluster_count": 69}
Input (44635): sysmonitor[39580]|ext4 /proc/mounts:/dev/mapper/cpsVG-bak_rootfs
Result: {"change_type": "cluster_created", "cluster_id": 70, "cluster_size": 1, "template_mined": "sysmonitor[<:NUM:>]|ext4 /proc/mounts:/dev/mapper/cpsVG-bak rootfs", "cluster_count": 70}
Input (44641): sysmonitor[39580]|custom daemon monitor: child process[40842] name qemu_hotreplace_alarm exit[1].
Result: {"change_type": "cluster_created", "cluster_id": 71, "cluster_size": 1, "template_mined": "sysmonitor[<:NUM:>]|custom daemon monitor: child process[<:NUM:>] name qemu hotreplace alarm exit[<:NUM:>].", "cluster_count": 71}
Input (44644): sysmonitor[39580]|process monitor started
Result: {"change_type": "cluster_created", "cluster_id": 72, "cluster_size": 1, "template_mined": "sysmonitor[<:NUM:>]

Input (44730): sysmonitor[39580]|1 events queued
Result: {"change_type": "cluster_created", "cluster_id": 97, "cluster_size": 1, "template_mined": "sysmonitor[<:NUM:>]|<:NUM:> events queued", "cluster_count": 97}
Input (44731): sysmonitor[39580]|1th event handled
Result: {"change_type": "cluster_created", "cluster_id": 98, "cluster_size": 1, "template_mined": "sysmonitor[<:NUM:>]|1th event handled", "cluster_count": 98}
Input (44732): sysmonitor[39580]|File "/etc/qemu/qemu.conf" was deleted. It's maybe changed,rc:19
Result: {"change_type": "cluster_created", "cluster_id": 99, "cluster_size": 1, "template_mined": "sysmonitor[<:NUM:>]|File \"/etc/qemu/qemu.conf\" was deleted. It's maybe changed,rc:<:NUM:>", "cluster_count": 99}
Input (44733): sysmonitor[39580]|memory usage resume: 64.7%
Result: {"change_type": "cluster_created", "cluster_id": 100, "cluster_size": 1, "template_mined": "sysmonitor[<:NUM:>]|memory usage resume: <:NUM:>.<:NUM:>%", "cluster_count": 100}
Input (44752): sysmoni

Input (46716): sysmonitor[38230]|timeout[51350](parent:sudo[51343]) send SIGTERM to ntpdate[57101]. <-[sudo -n timeout 30 sh /etc/ntp/control/ntp-client-s(51343)]<-[sh -c { sudo -n timeout 30 sh /etc/ntp/control/ntp-(51342)]<-[/usr/bin/python2.7 /bin/cps_starter/cpsstarter.py F(43825)]
Result: {"change_type": "cluster_template_changed", "cluster_id": 114, "cluster_size": 2, "template_mined": "sysmonitor[<:NUM:>]|timeout[<:NUM:>](parent:sudo[<:NUM:>]) send SIGTERM to <:*:> <-[sudo -n timeout <:NUM:> sh /etc/ntp/control/ntp-client-s(<:NUM:>)]<-[sh -c { sudo -n timeout <:NUM:> sh /etc/ntp/control/ntp-(<:NUM:>)]<-[/usr/bin/python2.<:NUM:> /bin/cps starter/cpsstarter.py F(<:NUM:>)]", "cluster_count": 114}
--- Done processing file in 3.84 sec. Total of 47870 lines, rate 12460.1 lines/sec, 114 clusters


In [7]:
sys_train_list = []
with open(os.path.join(train_path, "sysmonitor_trainset.txt"),'r',encoding='utf-8') as f:
    for i,line in enumerate(f.readlines()):
        try:
            timestring = line.split(maxsplit=1)[0].split(sep="|",maxsplit=1)[0]
            _timestring = timestring[:19]
            timeArray = time.strptime(_timestring, "%Y-%m-%dT%H:%M:%S")
#             timestamp = int(time.mktime(timeArray) // 300)
            timestamp = time.mktime(timeArray)

        except:
            continue
        
        message = line[len(timestring)+1:].strip()        
        sys_train_list.append([i+1, timestamp, message, line]) 
df_sys_train = pd.DataFrame(sys_train_list,columns=['LineId','timestamp','message','line'])
print(df_sys_train.shape)


sys_test_list = []
with open(os.path.join(test_path, "sysmonitor_testset.txt"),'r',encoding='utf-8') as f:
    for i,line in enumerate(f.readlines()):
        try:
            timestring = line.split(maxsplit=1)[0].split(sep="|",maxsplit=1)[0]
            _timestring = timestring[:19]
            timeArray = time.strptime(_timestring, "%Y-%m-%dT%H:%M:%S")
#             timestamp = int(time.mktime(timeArray) // 300)
            timestamp = time.mktime(timeArray)
            
        except:
            continue
                        
        message = line[len(timestring)+1:].strip()      
        sys_test_list.append([i+1, timestamp, message, line]) 
            
df_sys_test = pd.DataFrame(sys_test_list,columns=['LineId','timestamp','message','line'])
print(df_sys_test.shape)

(38018, 4)
(9852, 4)


In [8]:
def spell_templates(x):
    obj = template_miner.match(x)
    if obj: 
        param = list(obj.log_template_tokens)
        lcsseq_id = obj.cluster_id
        lcsseq = obj.get_template()
        result = {'lcsseq':lcsseq, 'lcsseq_id':lcsseq_id, 'param':param, 'match':1}
    else:
        result = {'lcsseq':np.nan, 'lcsseq_id':np.nan, 'param':[], 'match':0}
    return result

df_sys_train['spell_templates'] = df_sys_train['message'].apply(spell_templates)
df_sys_train[['lcsseq', 'lcsseq_id', 'param', 'match']] = df_sys_train.spell_templates.apply(pd.Series)


df_sys_test['spell_templates'] = df_sys_test['message'].apply(spell_templates)
df_sys_test[['lcsseq', 'lcsseq_id', 'param', 'match']] = df_sys_test.spell_templates.apply(pd.Series)


In [9]:
df_sys_train[df_sys_train.lcsseq_id.isna()]

Unnamed: 0,LineId,timestamp,message,line,spell_templates,lcsseq,lcsseq_id,param,match


In [10]:
df_sys_test[df_sys_test.lcsseq_id.isna()]

Unnamed: 0,LineId,timestamp,message,line,spell_templates,lcsseq,lcsseq_id,param,match


In [11]:
def time_gap(df):
    time_gap = []
    for i in range(len(df)):
        if i == 0:
            time_gap.append('0')
        else:
            time_last = df["timestamp"][i-1]
            time_now = df["timestamp"][i]
            elapsed = int(time_now - time_last)
            time_gap.append(str(elapsed))
    return time_gap

time_gap_train = time_gap(df_sys_train)
time_gap_test = time_gap(df_sys_test)

df_sys_train['time_gap'] = time_gap_train
df_sys_test['time_gap'] = time_gap_test


def paralist(row):
    p = row['param']
    t = row['time_gap']
    p = [p]
    p.append(t)
    return p

df_sys_train['ParameterList'] = df_sys_train.apply(paralist,axis=1)
df_sys_test['ParameterList'] = df_sys_test.apply(paralist,axis=1)

In [12]:
df_sys_train.columns = ['LineId', 'timestamp', 'Content', 'line', 'spell_templates',
       'EventTemplate', 'EventId', 'param', 'match', 'time_gap', 'ParameterList']
df_sys_test.columns = ['LineId', 'timestamp', 'Content', 'line', 'spell_templates',
       'EventTemplate', 'EventId', 'param', 'match', 'time_gap', 'ParameterList']

In [13]:
df_sys_train.loc[0,'ParameterList']

[['sysmonitor[<:NUM:>]|pkill[<:NUM:>](parent:getosstat[<:NUM:>])',
  'send',
  'SIGTERM',
  'to',
  '<:*:>',
  '<-[/bin/sh',
  '/usr/bin/getosstat(<:NUM:>)]'],
 '0']

In [14]:
df_sys_train.to_csv('df_sys_train.csv',index=None)
df_sys_test.to_csv('df_sys_test.csv',index=None)

In [20]:
# 执行路径异常检测
print("执行路径异常检测")
from execution_path_detect import execution_path
execution_path(df_sys_train, df_sys_test)

执行路径异常检测
Epoch 1/500
 - 1s - loss: 3.2920 - accuracy: 0.1979
Epoch 2/500
 - 0s - loss: 3.2771 - accuracy: 0.1979
Epoch 3/500
 - 0s - loss: 3.2605 - accuracy: 0.1615
Epoch 4/500
 - 0s - loss: 3.2396 - accuracy: 0.1562
Epoch 5/500
 - 0s - loss: 3.2111 - accuracy: 0.1302
Epoch 6/500
 - 0s - loss: 3.1686 - accuracy: 0.1302
Epoch 7/500
 - 0s - loss: 3.0997 - accuracy: 0.1302
Epoch 8/500
 - 0s - loss: 2.9814 - accuracy: 0.1302
Epoch 9/500
 - 0s - loss: 2.7833 - accuracy: 0.1302
Epoch 10/500
 - 0s - loss: 2.5709 - accuracy: 0.1771
Epoch 11/500
 - 0s - loss: 2.4894 - accuracy: 0.1406
Epoch 12/500
 - 0s - loss: 2.4434 - accuracy: 0.1979
Epoch 13/500
 - 0s - loss: 2.4269 - accuracy: 0.2552
Epoch 14/500
 - 0s - loss: 2.4224 - accuracy: 0.1979
Epoch 15/500
 - 0s - loss: 2.4196 - accuracy: 0.1979
Epoch 16/500
 - 0s - loss: 2.4168 - accuracy: 0.1979
Epoch 17/500
 - 0s - loss: 2.4141 - accuracy: 0.1979
Epoch 18/500
 - 0s - loss: 2.4118 - accuracy: 0.1979
Epoch 19/500
 - 0s - loss: 2.4097 - accuracy: 

Epoch 155/500
 - 0s - loss: 1.9267 - accuracy: 0.3385
Epoch 156/500
 - 0s - loss: 1.9260 - accuracy: 0.3385
Epoch 157/500
 - 0s - loss: 1.9229 - accuracy: 0.3385
Epoch 158/500
 - 0s - loss: 1.9215 - accuracy: 0.3333
Epoch 159/500
 - 0s - loss: 1.9188 - accuracy: 0.3333
Epoch 160/500
 - 0s - loss: 1.9178 - accuracy: 0.3333
Epoch 161/500
 - 0s - loss: 1.9151 - accuracy: 0.3385
Epoch 162/500
 - 0s - loss: 1.9141 - accuracy: 0.3385
Epoch 163/500
 - 0s - loss: 1.9110 - accuracy: 0.3385
Epoch 164/500
 - 0s - loss: 1.9111 - accuracy: 0.3385
Epoch 165/500
 - 0s - loss: 1.9071 - accuracy: 0.3438
Epoch 166/500
 - 0s - loss: 1.9064 - accuracy: 0.3385
Epoch 167/500
 - 0s - loss: 1.9037 - accuracy: 0.3385
Epoch 168/500
 - 0s - loss: 1.9020 - accuracy: 0.3438
Epoch 169/500
 - 0s - loss: 1.9009 - accuracy: 0.3385
Epoch 170/500
 - 0s - loss: 1.8983 - accuracy: 0.3438
Epoch 171/500
 - 0s - loss: 1.8967 - accuracy: 0.3385
Epoch 172/500
 - 0s - loss: 1.8941 - accuracy: 0.3438
Epoch 173/500
 - 0s - loss: 

Epoch 307/500
 - 0s - loss: 1.6464 - accuracy: 0.4115
Epoch 308/500
 - 0s - loss: 1.6382 - accuracy: 0.4115
Epoch 309/500
 - 0s - loss: 1.6444 - accuracy: 0.4167
Epoch 310/500
 - 0s - loss: 1.6359 - accuracy: 0.4219
Epoch 311/500
 - 0s - loss: 1.6434 - accuracy: 0.4271
Epoch 312/500
 - 0s - loss: 1.6357 - accuracy: 0.4219
Epoch 313/500
 - 0s - loss: 1.6436 - accuracy: 0.4271
Epoch 314/500
 - 0s - loss: 1.6376 - accuracy: 0.4219
Epoch 315/500
 - 0s - loss: 1.6427 - accuracy: 0.4271
Epoch 316/500
 - 0s - loss: 1.6365 - accuracy: 0.4115
Epoch 317/500
 - 0s - loss: 1.6420 - accuracy: 0.4323
Epoch 318/500
 - 0s - loss: 1.6331 - accuracy: 0.4271
Epoch 319/500
 - 0s - loss: 1.6383 - accuracy: 0.4427
Epoch 320/500
 - 0s - loss: 1.6308 - accuracy: 0.4167
Epoch 321/500
 - 0s - loss: 1.6350 - accuracy: 0.4375
Epoch 322/500
 - 0s - loss: 1.6256 - accuracy: 0.4271
Epoch 323/500
 - 0s - loss: 1.6294 - accuracy: 0.4479
Epoch 324/500
 - 0s - loss: 1.6196 - accuracy: 0.4323
Epoch 325/500
 - 0s - loss: 

Epoch 459/500
 - 0s - loss: 1.4479 - accuracy: 0.4792
Epoch 460/500
 - 0s - loss: 1.4512 - accuracy: 0.4844
Epoch 461/500
 - 0s - loss: 1.4459 - accuracy: 0.4844
Epoch 462/500
 - 0s - loss: 1.4496 - accuracy: 0.4844
Epoch 463/500
 - 0s - loss: 1.4435 - accuracy: 0.4844
Epoch 464/500
 - 0s - loss: 1.4467 - accuracy: 0.4896
Epoch 465/500
 - 0s - loss: 1.4408 - accuracy: 0.4844
Epoch 466/500
 - 0s - loss: 1.4446 - accuracy: 0.4948
Epoch 467/500
 - 0s - loss: 1.4391 - accuracy: 0.4896
Epoch 468/500
 - 0s - loss: 1.4416 - accuracy: 0.4948
Epoch 469/500
 - 0s - loss: 1.4359 - accuracy: 0.4896
Epoch 470/500
 - 0s - loss: 1.4386 - accuracy: 0.4948
Epoch 471/500
 - 0s - loss: 1.4329 - accuracy: 0.4896
Epoch 472/500
 - 0s - loss: 1.4363 - accuracy: 0.4948
Epoch 473/500
 - 0s - loss: 1.4315 - accuracy: 0.4896
Epoch 474/500
 - 0s - loss: 1.4339 - accuracy: 0.4948
Epoch 475/500
 - 0s - loss: 1.4280 - accuracy: 0.4948
Epoch 476/500
 - 0s - loss: 1.4304 - accuracy: 0.4948
Epoch 477/500
 - 0s - loss: 

In [21]:
df_anomaly_message = pd.read_csv('execute_path_anormal.csv')
df_anomaly_message.head()

Unnamed: 0,LineId,timestamp,Content,line,spell_templates,EventTemplate,EventId,param,match,time_gap,ParameterList
0,4,1618964000.0,sysmonitor[39896]|bash[35957](parent:bash[3592...,2021-04-21T00:07:47.937864+08:00|sysmonitor[39...,{'lcsseq': 'sysmonitor[<:NUM:>]|bash[<:NUM:>](...,sysmonitor[<:NUM:>]|bash[<:NUM:>](parent:bash[...,4,['sysmonitor[<:NUM:>]|bash[<:NUM:>](parent:bas...,1,0,[['sysmonitor[<:NUM:>]|bash[<:NUM:>](parent:ba...
1,9,1618964000.0,sysmonitor[39896]|bash[35957](parent:bash[3592...,2021-04-21T00:07:48.187071+08:00|sysmonitor[39...,{'lcsseq': 'sysmonitor[<:NUM:>]|bash[<:NUM:>](...,sysmonitor[<:NUM:>]|bash[<:NUM:>](parent:bash[...,4,['sysmonitor[<:NUM:>]|bash[<:NUM:>](parent:bas...,1,0,[['sysmonitor[<:NUM:>]|bash[<:NUM:>](parent:ba...
2,14,1618965000.0,sysmonitor[39896]|pkill[63786](parent:getossta...,2021-04-21T00:27:00.470193+08:00|sysmonitor[39...,{'lcsseq': 'sysmonitor[<:NUM:>]|pkill[<:NUM:>]...,sysmonitor[<:NUM:>]|pkill[<:NUM:>](parent:geto...,1,['sysmonitor[<:NUM:>]|pkill[<:NUM:>](parent:ge...,1,0,[['sysmonitor[<:NUM:>]|pkill[<:NUM:>](parent:g...
3,23,1618966000.0,sysmonitor[39896]|bash[73676](parent:bash[7359...,2021-04-21T00:45:20.767392+08:00|sysmonitor[39...,{'lcsseq': 'sysmonitor[<:NUM:>]|bash[<:NUM:>](...,sysmonitor[<:NUM:>]|bash[<:NUM:>](parent:bash[...,4,['sysmonitor[<:NUM:>]|bash[<:NUM:>](parent:bas...,1,0,[['sysmonitor[<:NUM:>]|bash[<:NUM:>](parent:ba...
4,27,1618966000.0,sysmonitor[39896]|bash[73676](parent:bash[7359...,2021-04-21T00:45:20.917612+08:00|sysmonitor[39...,{'lcsseq': 'sysmonitor[<:NUM:>]|bash[<:NUM:>](...,sysmonitor[<:NUM:>]|bash[<:NUM:>](parent:bash[...,4,['sysmonitor[<:NUM:>]|bash[<:NUM:>](parent:bas...,1,0,[['sysmonitor[<:NUM:>]|bash[<:NUM:>](parent:ba...


In [17]:
df_anomaly_message.shape

(557, 11)

In [18]:
# 创建中间文件的存放目录
tmpdata_path = ["struct", "EventNpy", "SpellResult", "ParamData", "ParamModel", "ExecutePathModel"]
for path in tmpdata_path:
    if not os.path.exists(f"tmpdata/{path}"):
        os.makedirs(f"tmpdata/{path}")

In [19]:
# 参数值向量异常检测
print("参数值向量异常检测")
from param_value_detect import param_value
param_value(df_sys_train, df_sys_test)

参数值向量异常检测


IndexError: list index out of range

In [None]:
# 另外用一个notebook   保存test  train  测试这个错误

In [23]:
# df_sys_test[['Content']].drop_duplicates().shape

(8944, 1)

In [24]:
# df_sys_test[['Content']].drop_duplicates().to_csv('sys_messages.csv',index=None)

In [28]:
# df_sys_test[['Content']].drop_duplicates().to_excel('sys_messages.xlsx',index=None)