In [1]:
import pandas as pd
df = pd.read_csv("20171020_183001__full.log", sep='~', header=None, squeeze=True)
logs = df.str.extract('(\d.+)\s-\s+([^\s]+)\s-\s+([^\s]+)\s-\s+(.+)')
logs.rename(columns={0:'timestamp',1:'type',2:'module',3:'message'}, inplace=True)

In [2]:
logs.tail()

Unnamed: 0,timestamp,type,module,message
344417,"2017-10-22 22:28:38,146",ERROR,Microgate.instruments.climateChamber,error occured while retrieving climate cell st...
344418,,,,
344419,,,,
344420,,,,
344421,,,,


In [3]:
#if message is na, then it is stacktrace, as shown here.
df.loc[logs.message.isna()].head()

213                   Traceback (most recent call last):
214      File "/home/elt/E_ELT_M4/Develop/ProductionT...
215                                 self.highSpeedTest()
216      File "/home/elt/E_ELT_M4/Develop/ProductionT...
217        hs_send_command(aoSupp.ao, 0, HS.CMD_MEM_W...
Name: 0, dtype: object

In [3]:
#forward fill timestamp to replace NaN generated by stacktrace, which do not match structure
logs['timestamp'] = logs['timestamp'].fillna(method = 'ffill')
logs['message'] = logs['message'].fillna("stacktrace")
#forward fill the type, this will fill the stacktrace with ERROR type
logs['type'] = logs['type'].fillna(method='ffill')

In [4]:
#make categorical to save memory
logs.type = logs.type.astype('category')
logs.module = logs.module.astype('category')

In [5]:
logs.loc[logs['module'] == 'Microgate.instruments.climateChamber'].head(15)

Unnamed: 0,timestamp,type,module,message
0,"2017-10-20 18:30:01,459",INFO,Microgate.instruments.climateChamber,"Thermal test started, saving data to 20171020_..."
1,"2017-10-20 18:30:01,865",INFO,Microgate.instruments.climateChamber,Time Fri Oct 20 18:30:01 2017 tSet=21.20 tRead...
6,"2017-10-20 18:30:01,883",INFO,Microgate.instruments.climateChamber,"Done: time 0:00:00, cycles 1 stepOfCycle 1 - T..."
7,"2017-10-20 18:30:31,721",INFO,Microgate.instruments.climateChamber,Time Fri Oct 20 18:30:31 2017 tSet=20.60 tRead...
10,"2017-10-20 18:30:31,735",INFO,Microgate.instruments.climateChamber,"Done: time 0:00:30, cycles 1 stepOfCycle 2 - T..."
11,"2017-10-20 18:31:01,714",INFO,Microgate.instruments.climateChamber,Time Fri Oct 20 18:31:01 2017 tSet=20.00 tRead...
205,"2017-10-20 18:31:53,488",INFO,Microgate.instruments.climateChamber,"Done: time 0:01:00, cycles 1 stepOfCycle 3 - T..."
206,"2017-10-20 18:31:53,488",WARNING,Microgate.instruments.climateChamber,sleepTime=-22 is negative!!!
207,"2017-10-20 18:32:01,698",INFO,Microgate.instruments.climateChamber,Time Fri Oct 20 18:32:01 2017 tSet=20.00 tRead...
446,"2017-10-20 18:32:42,001",INFO,Microgate.instruments.climateChamber,"Done: time 0:02:00, cycles 1 stepOfCycle 5 - T..."


In [6]:
logs['isstep'] = logs.message.str.contains('cycles \d+ stepOfCycle \d+')

In [7]:
#use cumulative sum of step marker to get the step
logs['step'] = logs.isstep.iloc[::-1].cumsum()

In [8]:
last = logs['step'].max()
logs['step'] = (last - logs['step']) + 1

In [9]:
#make all log entries with same timestamp belong to the same step (earliest possible)
logs['step'] = logs.groupby('timestamp')['step'].transform(min).astype(int)

In [10]:
logs['info'] = logs['type'] == 'INFO'
logs['warning'] = logs['type'] == 'WARNING'
logs['error'] = logs['type'] == 'ERROR'

In [11]:
logs.tail(10)

Unnamed: 0,timestamp,type,module,message,isstep,step,info,warning,error
344412,"2017-10-22 22:28:36,143",ERROR,Microgate.instruments.climateChamber,error occured while retrieving climate cell st...,False,5264,False,False,True
344413,"2017-10-22 22:28:36,143",ERROR,,stacktrace,False,5264,False,False,True
344414,"2017-10-22 22:28:36,143",ERROR,,stacktrace,False,5264,False,False,True
344415,"2017-10-22 22:28:36,143",ERROR,,stacktrace,False,5264,False,False,True
344416,"2017-10-22 22:28:36,143",ERROR,,stacktrace,False,5264,False,False,True
344417,"2017-10-22 22:28:38,146",ERROR,Microgate.instruments.climateChamber,error occured while retrieving climate cell st...,False,5264,False,False,True
344418,"2017-10-22 22:28:38,146",ERROR,,stacktrace,False,5264,False,False,True
344419,"2017-10-22 22:28:38,146",ERROR,,stacktrace,False,5264,False,False,True
344420,"2017-10-22 22:28:38,146",ERROR,,stacktrace,False,5264,False,False,True
344421,"2017-10-22 22:28:38,146",ERROR,,stacktrace,False,5264,False,False,True


In [12]:
#get a dataframe of what kind of messages is in each step
logsteps = logs.groupby('step')['info', 'warning', 'error'].agg(any)
logsteps.head()

Unnamed: 0_level_0,info,warning,error
step,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,True,False,False
2,True,False,False
3,True,True,True
4,True,True,True
5,True,False,True


In [12]:
#only info
logsteps.loc[logsteps['info'] & ~logsteps['warning'] & ~logsteps['error']].shape[0]

965

In [15]:
#only warning
logsteps.loc[~logsteps['info'] & logsteps['warning'] & ~logsteps['error']].shape[0]

0

In [16]:
#only error
logsteps.loc[~logsteps['info'] & ~logsteps['warning'] & logsteps['error']].shape[0]

0

In [17]:
#only info and warning
logsteps.loc[logsteps['info'] & logsteps['warning'] & ~logsteps['error']].shape[0]

0

In [18]:
#only info and error
logsteps.loc[logsteps['info'] & ~logsteps['warning'] & logsteps['error']].shape[0]

214

In [19]:
#only warning and error
logsteps.loc[~logsteps['info'] & logsteps['warning'] & logsteps['error']].shape[0]

0

In [20]:
#all three
logsteps.loc[logsteps['info'] & logsteps['warning'] & logsteps['error']].shape[0]

4085

In [13]:
#change the warning message into template
#template mapping
def replace_template_warning(logs):
    mapping = [('ADC jump of \d+ detected on brick #\d+', 'ADC jump of {} detected on brick #{}'),
    ('sleepTime=-\d+ is negative!!!', 'sleepTime={} is negative')]
    for pat, replace in mapping:
        logs.loc[(logs.type == 'WARNING') & (logs.message.str.match(pat)), 'message'] = replace
    #logs.loc[logs.type == 'WARNING'].groupby('message').count()
    return logs

In [14]:
logs = replace_template_warning(logs)
logs.loc[logs.type == 'WARNING'].groupby('message').count()

Unnamed: 0_level_0,timestamp,type,module,isstep,step,info,warning,error
message,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ADC jump of {} detected on brick #{},15119,15119,15119,15119,15119,15119,15119,15119
Error not fatal,114,114,114,114,114,114,114,114
sleepTime={} is negative,14,14,14,14,14,14,14,14


In [16]:
#change the error message into template
#template error
def replace_template_err(logs):
    mapping = [('ADC reading error = .+%', 'ADC reading error = {}%'),
    ('ADC \d+ on Brick \d+ error .+% -- value \d+\[bit\]', 'ADC {} on Brick {} error {}% -- value {}[bit]'),
    ('can1RErrCnt:\s+\d+', 'can1RErrCnt: {}'), 
    ('wrong brick serial number= \d+ != \d+ detected on device \d+', 'wrong brick serial number= {} != {} detected on device {}'),
    ('ADC \d+ error .+% -- value \d+\[bit\]', 'ADC {} error {}% -- value {}[bit]')]
    for pat, replace in mapping:
        logs.loc[(logs.type == 'ERROR') & (logs.message.str.match(pat)), 'message'] = replace
    #logs.loc[logs.type == 'WARNING'].groupby('message').count()
    return logs

In [17]:
logs = replace_template_err(logs)
logs.loc[logs.type == 'ERROR'].groupby('message').count()

Unnamed: 0_level_0,timestamp,type,module,isstep,step,info,warning,error
message,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ADC reading error = {}%,21066,21066,21066,21066,21066,21066,21066,21066
ADC {} error {}% -- value {}[bit],87024,87024,87024,87024,87024,87024,87024,87024
Digital test failed,1,1,1,1,1,1,1,1
High speed error command detected,9,9,9,9,9,9,9,9
High speed test failed,9,9,9,9,9,9,9,9
Initialization after power on failed,2,2,2,2,2,2,2,2
No can messages?...,179,179,179,179,179,179,179,179
PIC not enabled force to IDLE,11262,11262,11262,11262,11262,11262,11262,11262
Unexcpected error power cycling...,8,8,8,8,8,8,8,8
can1RErrCnt: {},179,179,179,179,179,179,179,179


In [18]:
#calculate associations: supports
sup_w = logsteps.loc[logsteps['warning']].shape[0] / logsteps.shape[0]
sup_e = logsteps.loc[logsteps['error']].shape[0] / logsteps.shape[0]
sup_i = logsteps.loc[logsteps['info']].shape[0] / logsteps.shape[0]
sup_iw = logsteps.loc[logsteps['info'] & logsteps['warning']].shape[0] / logsteps.shape[0]
sup_ie = logsteps.loc[logsteps['info'] & logsteps['error']].shape[0] / logsteps.shape[0]
sup_we = logsteps.loc[logsteps['warning'] & logsteps['error']].shape[0] / logsteps.shape[0]
sup_iwe = logsteps.loc[logsteps['info'] & logsteps['warning'] & logsteps['error']].shape[0] / logsteps.shape[0]

print("Support for (warning) = {}".format(sup_w))
print("Support for (error) = {}".format(sup_e))
print("Support for (info) = {}".format(sup_i))
print("Support for (info, warning) = {}".format(sup_iw))
print("Support for (info, error) = {}".format(sup_ie))
print("Support for (warning, error) = {}".format(sup_we))
print("Support for (info, warning, error) = {}".format(sup_iwe))

Support for (error) = 0.8166793313069909
Support for (info) = 1.0
Support for (info, error) = 0.8166793313069909


In [19]:
#calculate associations: confidence
con_i_w = sup_iw / sup_i
con_i_e = sup_ie / sup_i
con_w_e = sup_we / sup_w
con_e_w = sup_we / sup_e
con_i_we = sup_iwe / sup_i

print("Confidence for (info)->(warning) = {}".format(con_i_w))
print("Confidence for (info)->(error) = {}".format(con_i_e))
print("Confidence for (warning)->(error) = {}".format(con_w_e))
print("Confidence for (error)->(warning) = {}".format(con_e_w))
print("Confidence for (info)->(warning, error) = {}".format(con_i_we))

Confidence for (info)->(error) = 0.8166793313069909


In [20]:
#calculate associations: lift
lift_w_e = sup_we / (sup_w * sup_e)
lift_i_we = sup_iwe / (sup_i * sup_we)

print("Lift for (warning) -> (error) = {}".format(lift_w_e))
print("Lift for (info) -> (warning, error) = {}".format(lift_i_we))



In [21]:
#mark the steps with stacktrace
logs['stacktrace'] = logs['message'] == 'stacktrace'
logsteps = logs.groupby('step')['info', 'warning', 'error', 'stacktrace'].agg(any)
logsteps.head()

Unnamed: 0_level_0,info,warning,error,stacktrace
step,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,True,False,False,False
2,True,False,False,False
3,True,True,True,False
4,True,True,True,True
5,True,False,True,False


In [22]:
#calculate association about stacktrace
sup_st = logsteps.loc[logsteps['stacktrace']].shape[0] / logsteps.shape[0]
sup_wst = logsteps.loc[logsteps['warning'] & logsteps['stacktrace']].shape[0] / logsteps.shape[0]

con_w_st = sup_wst / sup_w
con_e_st = sup_st / sup_e #sup_est == sup_st since every stacktrace is also error
con_st_w = sup_wst / sup_st

lift_w_st = sup_wst / (sup_w * sup_st)
lift_e_st = sup_st / (sup_e * sup_st)

print("Support for (stacktrace) is {}".format(sup_st))
print("Support for (warning, stacktrace) is {}".format(sup_wst))
print("Confidence for (warning) -> (stacktrace) is {}".format(con_w_st))
print("Confidence for (error) -> (stacktrace) is {}".format(con_e_st))
print("Confidence for (stacktrace) -> (warning) is {}".format(con_st_w))
print("Lift for (warning) -> (stacktrace) is {}".format(lift_w_st))
print("Lift for (error) -> (stacktrace) is {}".format(lift_e_st))

Support for (stacktrace) is 0.03704407294832827
Confidence for (error) -> (stacktrace) is 0.045359385903698535
Lift for (error) -> (stacktrace) is 1.2244708071644568


In [23]:
#find the association of each error type to stacktrace being printed:
summary = dict(error=list(), sup = list(), sup_with_trace = list(), conf_this_trace = list(), lift_this_trace = list())
for err_msg in logs.loc[logs.type == 'ERROR'].message.unique():
    logs['this_err'] = logs.message == err_msg
    logsteps = logs.groupby('step')['this_err', 'stacktrace'].agg(any)
    
    sup_err = logsteps.loc[logsteps['this_err']].shape[0] / logsteps.shape[0]
    sup_st = logsteps.loc[logsteps['stacktrace']].shape[0] / logsteps.shape[0]
    sup_err_st = logsteps.loc[logsteps['stacktrace'] & logsteps['this_err']].shape[0] / logsteps.shape[0]
    
    con_err_st = sup_err_st / sup_err
    lift = sup_err_st / (sup_err * sup_st)
    """
    print("For error: {}".format(err_msg))
    print("Support for (this, stacktrace) is {}".format(sup_err_st))
    print("Confidence for (this->stacktrace) is {}".format(con_err_st))
    print("Lift for (this->stacktrace) is {}".format(lift))
    print("---------------------------------")
    """
    summary['error'].append(err_msg)
    summary['sup'].append(sup_err)
    summary['sup_with_trace'].append(sup_err_st)
    summary['conf_this_trace'].append(con_err_st)
    summary['lift_this_trace'].append(lift)

summary_frame = pd.DataFrame(summary)
summary_frame

Unnamed: 0,error,sup,sup_with_trace,conf_this_trace,lift_this_trace
0,wrong brick serial number= {} != {} detected o...,0.00285,0.0019,0.666667,17.996581
1,ADC reading error = {}%,0.815919,0.036284,0.04447,1.20047
2,ADC {} error {}% -- value {}[bit],0.815919,0.036284,0.04447,1.20047
3,High speed error command detected,0.00171,0.00171,1.0,26.994872
4,High speed test failed,0.00171,0.00171,1.0,26.994872
5,stacktrace,0.037044,0.037044,1.0,26.994872
6,diagnostic buffers are still pending after 0.0...,0.00133,0.00133,1.0,26.994872
7,Unexcpected error power cycling...,0.00152,0.00152,1.0,26.994872
8,Initialization after power on failed,0.00038,0.00038,1.0,26.994872
9,Digital test failed,0.00019,0.00019,1.0,26.994872


In [24]:
#find the association of each warning type to stacktrace being printed:
summary_w = dict(warning=list(), sup = list(), sup_with_trace = list(), conf_this_trace = list(), lift_this_trace = list())
for warn_msg in logs.loc[logs.type == 'WARNING'].message.unique():
    logs['this_warn'] = logs.message == warn_msg
    logsteps = logs.groupby('step')['this_warn', 'stacktrace'].agg(any)
    
    sup_warn = logsteps.loc[logsteps['this_warn']].shape[0] / logsteps.shape[0]
    sup_st = logsteps.loc[logsteps['stacktrace']].shape[0] / logsteps.shape[0]
    sup_warn_st = logsteps.loc[logsteps['stacktrace'] & logsteps['this_warn']].shape[0] / logsteps.shape[0]
    
    con_warn_st = sup_warn_st / sup_warn
    lift = sup_warn_st / (sup_warn * sup_st)
    
    summary_w['warning'].append(warn_msg)
    summary_w['sup'].append(sup_warn)
    summary_w['sup_with_trace'].append(sup_warn_st)
    summary_w['conf_this_trace'].append(con_warn_st)
    summary_w['lift_this_trace'].append(lift)

summary_w_frame = pd.DataFrame(summary_w)
summary_w_frame

Unnamed: 0,warning,sup,sup_with_trace,conf_this_trace,lift_this_trace
0,Error not fatal,0.00285,0.0019,0.666667,17.996581
1,sleepTime={} is negative,0.00266,0.00171,0.642857,17.353846
2,ADC jump of {} detected on brick #{},0.774506,0.034574,0.044641,1.205069


# **---------------------------Run with another log file---------------------------------**

In [25]:
#try with another file
df2 = pd.read_csv("20171026_175308__full.log", sep='~', header=None, squeeze=True)
logs2 = df2.str.extract('(\d.+)\s-\s+([^\s]+)\s-\s+([^\s]+)\s-\s+(.+)')
logs2.rename(columns={0:'timestamp',1:'type',2:'module',3:'message'}, inplace=True)

#forward fill timestamp to replace NaN generated by stacktrace, which do not match structure
logs2['timestamp'] = logs2['timestamp'].fillna(method = 'ffill')
logs2['message'] = logs2['message'].fillna("stacktrace")
#forward fill the type, this will fill the stacktrace with ERROR type
logs2['type'] = logs2['type'].fillna(method='ffill')
#make categorical to save memory
logs.type = logs2.type.astype('category')
logs.module = logs2.module.astype('category')

In [26]:
logs2['isstep'] = logs2.message.str.contains('cycles \d+ stepOfCycle \d+')
#use cumulative sum of step marker to get the step
logs2['step'] = logs2.isstep.iloc[::-1].cumsum()
last = logs2['step'].max()
logs2['step'] = (last - logs2['step']) + 1
#make all log entries with same timestamp belong to the same step (earliest possible)
logs2['step'] = logs2.groupby('timestamp')['step'].transform(min).astype(int)
logs2['info'] = logs2['type'] == 'INFO'
logs2['warning'] = logs2['type'] == 'WARNING'
logs2['error'] = logs2['type'] == 'ERROR'
logs2.tail()

Unnamed: 0,timestamp,type,module,message,isstep,step,info,warning,error
332062,"2017-10-28 13:53:08,347",INFO,Microgate.instruments.climateChamber,Time Sat Oct 28 13:53:08 2017 tSet=20.00 tRead...,False,5236,True,False,False
332063,"2017-10-28 13:53:08,348",INFO,__main__,State = Off,False,5236,True,False,False
332064,"2017-10-28 13:53:09,163",INFO,Microgate.utils.testing,Saved file 20171026_175308__fullBrickBurnIn026...,False,5236,True,False,False
332065,"2017-10-28 13:53:09,163",INFO,Microgate.instruments.climateChamber,"Done: time 1 day, 20:00:00, cycles 2 stepOfCyc...",True,5236,True,False,False
332066,"2017-10-28 13:53:38,124",INFO,Microgate.instruments.climateChamber,Test Finished,False,5237,True,False,False


In [27]:
#get a dataframe of what kind of messages is in each step
logsteps2 = logs2.groupby('step')['info', 'warning', 'error'].agg(any)
logsteps2.head()

Unnamed: 0_level_0,info,warning,error
step,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,True,False,False
2,True,False,False
3,True,True,True
4,True,True,True
5,True,True,True


In [28]:
logs2 = replace_template_warning(logs2)
logs2.loc[logs2.type == 'WARNING'].groupby('message').count()

Unnamed: 0_level_0,timestamp,type,module,isstep,step,info,warning,error
message,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ADC jump of {} detected on brick #{},9198,9198,9198,9198,9198,9198,9198,9198
Error not fatal,271,271,271,271,271,271,271,271
sleepTime={} is negative,41,41,41,41,41,41,41,41


In [29]:
logs2 = replace_template_err(logs2)
logs2.loc[logs2.type == 'ERROR'].groupby('message').count()

Unnamed: 0_level_0,timestamp,type,module,isstep,step,info,warning,error
message,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ADC reading error = {}%,21730,21730,21730,21730,21730,21730,21730,21730
ADC {} error {}% -- value {}[bit],47675,47675,47675,47675,47675,47675,47675,47675
Digital test failed,1,1,1,1,1,1,1,1
High speed error command detected,15,15,15,15,15,15,15,15
High speed test failed,14,14,14,14,14,14,14,14
Initialization after power on failed,3,3,3,3,3,3,3,3
No can messages?...,1255,1255,1255,1255,1255,1255,1255,1255
PIC not enabled force to IDLE,4478,4478,4478,4478,4478,4478,4478,4478
Unexcpected error power cycling...,35,35,35,35,35,35,35,35
can1RErrCnt: {},1255,1255,1255,1255,1255,1255,1255,1255


In [30]:
#calculate associations: supports
sup_w = logsteps2.loc[logsteps2['warning']].shape[0] / logsteps2.shape[0]
sup_e = logsteps2.loc[logsteps2['error']].shape[0] / logsteps2.shape[0]
sup_i = logsteps2.loc[logsteps2['info']].shape[0] / logsteps2.shape[0]
sup_iw = logsteps2.loc[logsteps2['info'] & logsteps2['warning']].shape[0] / logsteps2.shape[0]
sup_ie = logsteps2.loc[logsteps2['info'] & logsteps2['error']].shape[0] / logsteps2.shape[0]
sup_we = logsteps2.loc[logsteps2['warning'] & logsteps2['error']].shape[0] / logsteps2.shape[0]
sup_iwe = logsteps2.loc[logsteps2['info'] & logsteps2['warning'] & logsteps2['error']].shape[0] / logsteps2.shape[0]

print("Support for (warning) = {}".format(sup_w))
print("Support for (error) = {}".format(sup_e))
print("Support for (info) = {}".format(sup_i))
print("Support for (info, warning) = {}".format(sup_iw))
print("Support for (info, error) = {}".format(sup_ie))
print("Support for (warning, error) = {}".format(sup_we))
print("Support for (info, warning, error) = {}".format(sup_iwe))

Support for (error) = 0.8155432499522628
Support for (info) = 1.0
Support for (info, error) = 0.8155432499522628


In [31]:
#calculate associations: confidence
con_i_w = sup_iw / sup_i
con_i_e = sup_ie / sup_i
con_w_e = sup_we / sup_w
con_e_w = sup_we / sup_e
con_i_we = sup_iwe / sup_i

print("Confidence for (info)->(warning) = {}".format(con_i_w))
print("Confidence for (info)->(error) = {}".format(con_i_e))
print("Confidence for (warning)->(error) = {}".format(con_w_e))
print("Confidence for (error)->(warning) = {}".format(con_e_w))
print("Confidence for (info)->(warning, error) = {}".format(con_i_we))

Confidence for (info)->(error) = 0.8155432499522628


In [32]:
#calculate associations: lift
lift_w_e = sup_we / (sup_w * sup_e)
lift_i_we = sup_iwe / (sup_i * sup_we)

print("Lift for (warning) -> (error) = {}".format(lift_w_e))
print("Lift for (info) -> (warning, error) = {}".format(lift_i_we))



In [33]:
#mark the steps with stacktrace
logs2['stacktrace'] = logs2['message'] == 'stacktrace'
logsteps2 = logs2.groupby('step')['info', 'warning', 'error', 'stacktrace'].agg(any)
logsteps2.head()

Unnamed: 0_level_0,info,warning,error,stacktrace
step,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,True,False,False,False
2,True,False,False,False
3,True,True,True,False
4,True,True,True,False
5,True,True,True,False


In [34]:
#calculate association about stacktrace
sup_st = logsteps2.loc[logsteps2['stacktrace']].shape[0] / logsteps2.shape[0]
sup_wst = logsteps2.loc[logsteps2['warning'] & logsteps2['stacktrace']].shape[0] / logsteps2.shape[0]

con_w_st = sup_wst / sup_w
con_e_st = sup_st / sup_e #sup_est == sup_st since every stacktrace is also error
con_st_w = sup_wst / sup_st

lift_w_st = sup_wst / (sup_w * sup_st)
lift_e_st = sup_st / (sup_e * sup_st)

print("Support for (stacktrace) is {}".format(sup_st))
print("Support for (warning, stacktrace) is {}".format(sup_wst))
print("Confidence for (warning) -> (stacktrace) is {}".format(con_w_st))
print("Confidence for (error) -> (stacktrace) is {}".format(con_e_st))
print("Confidence for (stacktrace) -> (warning) is {}".format(con_st_w))
print("Lift for (warning) -> (stacktrace) is {}".format(lift_w_st))
print("Lift for (error) -> (stacktrace) is {}".format(lift_e_st))

Support for (stacktrace) is 0.24746992552988353
Confidence for (error) -> (stacktrace) is 0.3034418169047062
Lift for (error) -> (stacktrace) is 1.226176539452119


In [36]:
print(logs[logs.message == 'stacktrace'].shape)
print(logs2[logs2.message == 'stacktrace'].shape)

(986, 12)
(5511, 10)


In [40]:
#find the association of each error type to stacktrace being printed:
summary = dict(error=list(), sup = list(), sup_with_trace = list(), conf_this_trace = list(), lift_this_trace = list())
for err_msg in logs2.loc[logs2.type == 'ERROR'].message.unique():
    logs2['this_err'] = logs2.message == err_msg
    logsteps2 = logs2.groupby('step')['this_err', 'stacktrace'].agg(any)
    
    sup_err = logsteps2.loc[logsteps2['this_err']].shape[0] / logsteps2.shape[0]
    sup_st = logsteps2.loc[logsteps2['stacktrace']].shape[0] / logsteps2.shape[0]
    sup_err_st = logsteps2.loc[logsteps2['stacktrace'] & logsteps2['this_err']].shape[0] / logsteps2.shape[0]
    
    con_err_st = sup_err_st / sup_err
    lift = sup_err_st / (sup_err * sup_st)
    """
    print("For error: {}".format(err_msg))
    print("Support for (this, stacktrace) is {}".format(sup_err_st))
    print("Confidence for (this->stacktrace) is {}".format(con_err_st))
    print("Lift for (this->stacktrace) is {}".format(lift))
    print("---------------------------------")
    """
    summary['error'].append(err_msg)
    summary['sup'].append(sup_err)
    summary['sup_with_trace'].append(sup_err_st)
    summary['conf_this_trace'].append(con_err_st)
    summary['lift_this_trace'].append(lift)

summary_frame = pd.DataFrame(summary)
summary_frame

Unnamed: 0,error,sup,sup_with_trace,conf_this_trace,lift_this_trace
0,wrong brick serial number= {} != {} detected o...,0.007829,0.006874,0.878049,3.548103
1,ADC reading error = {}%,0.812106,0.244033,0.300494,1.214264
2,ADC {} error {}% -- value {}[bit],0.812106,0.244033,0.300494,1.214264
3,High speed error command detected,0.002673,0.002673,1.0,4.040895
4,High speed test failed,0.002673,0.002673,1.0,4.040895
5,stacktrace,0.24747,0.24747,1.0,4.040895
6,diagnostic buffers are still pending after 0.0...,0.006492,0.006492,1.0,4.040895
7,Unexcpected error power cycling...,0.006683,0.006683,1.0,4.040895
8,Initialization after power on failed,0.000573,0.000573,1.0,4.040895
9,Digital test failed,0.000191,0.000191,1.0,4.040895


In [41]:
#find the association of each warning type to stacktrace being printed:
summary_w = dict(warning=list(), sup = list(), sup_with_trace = list(), conf_this_trace = list(), lift_this_trace = list())
for warn_msg in logs2.loc[logs2.type == 'WARNING'].message.unique():
    logs2['this_warn'] = logs2.message == warn_msg
    logsteps2 = logs2.groupby('step')['this_warn', 'stacktrace'].agg(any)
    
    sup_warn = logsteps2.loc[logsteps2['this_warn']].shape[0] / logsteps2.shape[0]
    sup_st = logsteps2.loc[logsteps2['stacktrace']].shape[0] / logsteps2.shape[0]
    sup_warn_st = logsteps2.loc[logsteps2['stacktrace'] & logsteps2['this_warn']].shape[0] / logsteps2.shape[0]
    
    con_warn_st = sup_warn_st / sup_warn
    lift = sup_warn_st / (sup_warn * sup_st)
    
    summary_w['warning'].append(warn_msg)
    summary_w['sup'].append(sup_warn)
    summary_w['sup_with_trace'].append(sup_warn_st)
    summary_w['conf_this_trace'].append(con_warn_st)
    summary_w['lift_this_trace'].append(lift)

summary_w_frame = pd.DataFrame(summary_w)
summary_w_frame

Unnamed: 0,warning,sup,sup_with_trace,conf_this_trace,lift_this_trace
0,Error not fatal,0.007829,0.006874,0.878049,3.548103
1,ADC jump of {} detected on brick #{},0.681115,0.218255,0.320437,1.294854
2,sleepTime={} is negative,0.007638,0.00611,0.8,3.232716
