Skip to content

Commit 1330e4a

Browse files
authored
Add error log and metrics to AI telemetry. (#656)
* Accelerate metrics report from every 30 mins to every 5 mins. * Add errCountTest metric. * Refactor SendAiMetrics. AI initialization is in main routine while send metrics is in another go routine. * Add aiMetadata config. * Add SendErrorMetrics function in ai utils. * Going to push error log to AI telemetry. * Add error log to AI telemetry. * Change error message format. * Add error log and metrics to AI telemetry. * Remove unnecessary const. * Change heartbeat back to every 30 mins. * Seperate send log from SendErrorMetric function for better reuse. * Change a unit test set name to avoid kernel conflict. * Address comments. Make error log and metrics sending more generic. * Fix typo. * Fix indentation. * Fix AI initialize issue. * Remove unnecessary log. * Use break in if condition.
1 parent f4c0802 commit 1330e4a

File tree

7 files changed

+200
-98
lines changed

7 files changed

+200
-98
lines changed

npm/ipsm/ipsm.go

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
// Package ipsm focus on ip set operation
12
// Copyright 2018 Microsoft. All rights reserved.
23
// MIT License
34
package ipsm
@@ -95,7 +96,7 @@ func (ipsMgr *IpsetManager) CreateList(listName string) error {
9596
}
9697
log.Logf("Creating List: %+v", entry)
9798
if errCode, err := ipsMgr.Run(entry); err != nil && errCode != 1 {
98-
log.Errorf("Error: failed to create ipset list %s.", listName)
99+
metrics.SendErrorMetric(util.IpsmID, "Error: failed to create ipset list %s.", listName)
99100
return err
100101
}
101102

@@ -116,7 +117,7 @@ func (ipsMgr *IpsetManager) DeleteList(listName string) error {
116117
return nil
117118
}
118119

119-
log.Errorf("Error: failed to delete ipset %s %+v", listName, entry)
120+
metrics.SendErrorMetric(util.IpsmID, "Error: failed to delete ipset %s %+v", listName, entry)
120121
return err
121122
}
122123

@@ -146,7 +147,7 @@ func (ipsMgr *IpsetManager) AddToList(listName string, setName string) error {
146147
}
147148

148149
if errCode, err := ipsMgr.Run(entry); err != nil && errCode != 1 {
149-
log.Errorf("Error: failed to create ipset rules. rule: %+v", entry)
150+
metrics.SendErrorMetric(util.IpsmID, "Error: failed to create ipset rules. rule: %+v", entry)
150151
return err
151152
}
152153

@@ -158,7 +159,7 @@ func (ipsMgr *IpsetManager) AddToList(listName string, setName string) error {
158159
// DeleteFromList removes an ipset to an ipset list.
159160
func (ipsMgr *IpsetManager) DeleteFromList(listName string, setName string) error {
160161
if _, exists := ipsMgr.listMap[listName]; !exists {
161-
log.Logf("ipset list with name %s not found", listName)
162+
metrics.SendErrorMetric(util.IpsmID, "ipset list with name %s not found", listName)
162163
return nil
163164
}
164165

@@ -170,7 +171,7 @@ func (ipsMgr *IpsetManager) DeleteFromList(listName string, setName string) erro
170171
}
171172

172173
if _, err := ipsMgr.Run(entry); err != nil {
173-
log.Errorf("Error: failed to delete ipset entry. %+v", entry)
174+
metrics.SendErrorMetric(util.IpsmID, "Error: failed to delete ipset entry. %+v", entry)
174175
return err
175176
}
176177

@@ -181,7 +182,7 @@ func (ipsMgr *IpsetManager) DeleteFromList(listName string, setName string) erro
181182

182183
if len(ipsMgr.listMap[listName].elements) == 0 {
183184
if err := ipsMgr.DeleteList(listName); err != nil {
184-
log.Errorf("Error: failed to delete ipset list %s.", listName)
185+
metrics.SendErrorMetric(util.IpsmID, "Error: failed to delete ipset list %s.", listName)
185186
return err
186187
}
187188
}
@@ -206,7 +207,7 @@ func (ipsMgr *IpsetManager) CreateSet(setName string, spec []string) error {
206207
}
207208
log.Logf("Creating Set: %+v", entry)
208209
if errCode, err := ipsMgr.Run(entry); err != nil && errCode != 1 {
209-
log.Errorf("Error: failed to create ipset.")
210+
metrics.SendErrorMetric(util.IpsmID, "Error: failed to create ipset.")
210211
return err
211212
}
212213

@@ -222,7 +223,7 @@ func (ipsMgr *IpsetManager) CreateSet(setName string, spec []string) error {
222223
// DeleteSet removes a set from ipset.
223224
func (ipsMgr *IpsetManager) DeleteSet(setName string) error {
224225
if _, exists := ipsMgr.setMap[setName]; !exists {
225-
log.Logf("ipset with name %s not found", setName)
226+
metrics.SendErrorMetric(util.IpsmID, "ipset with name %s not found", setName)
226227
return nil
227228
}
228229

@@ -236,7 +237,7 @@ func (ipsMgr *IpsetManager) DeleteSet(setName string) error {
236237
return nil
237238
}
238239

239-
log.Errorf("Error: failed to delete ipset %s. Entry: %+v", setName, entry)
240+
metrics.SendErrorMetric(util.IpsmID, "Error: failed to delete ipset %s. Entry: %+v", setName, entry)
240241
return err
241242
}
242243

@@ -285,7 +286,7 @@ func (ipsMgr *IpsetManager) AddToSet(setName, ip, spec, podUid string) error {
285286
}
286287

287288
if errCode, err := ipsMgr.Run(entry); err != nil && errCode != 1 {
288-
log.Logf("Error: failed to create ipset rules. %+v", entry)
289+
metrics.SendErrorMetric(util.IpsmID, "Error: failed to create ipset rules. %+v", entry)
289290
return err
290291
}
291292

@@ -329,7 +330,7 @@ func (ipsMgr *IpsetManager) DeleteFromSet(setName, ip, podUid string) error {
329330
return nil
330331
}
331332

332-
log.Errorf("Error: failed to delete ipset entry. Entry: %+v", entry)
333+
metrics.SendErrorMetric(util.IpsmID, "Error: failed to delete ipset entry. Entry: %+v", entry)
333334
return err
334335
}
335336

@@ -354,7 +355,7 @@ func (ipsMgr *IpsetManager) Clean() error {
354355
}
355356

356357
if err := ipsMgr.DeleteSet(setName); err != nil {
357-
log.Errorf("Error: failed to clean ipset")
358+
metrics.SendErrorMetric(util.IpsmID, "Error: failed to clean ipset")
358359
return err
359360
}
360361
}
@@ -365,7 +366,7 @@ func (ipsMgr *IpsetManager) Clean() error {
365366
}
366367

367368
if err := ipsMgr.DeleteList(listName); err != nil {
368-
log.Errorf("Error: failed to clean ipset list")
369+
metrics.SendErrorMetric(util.IpsmID, "Error: failed to clean ipset list")
369370
return err
370371
}
371372
}
@@ -379,13 +380,13 @@ func (ipsMgr *IpsetManager) Destroy() error {
379380
operationFlag: util.IpsetFlushFlag,
380381
}
381382
if _, err := ipsMgr.Run(entry); err != nil {
382-
log.Errorf("Error: failed to flush ipset")
383+
metrics.SendErrorMetric(util.IpsmID, "Error: failed to flush ipset")
383384
return err
384385
}
385386

386387
entry.operationFlag = util.IpsetDestroyFlag
387388
if _, err := ipsMgr.Run(entry); err != nil {
388-
log.Errorf("Error: failed to destroy ipset")
389+
metrics.SendErrorMetric(util.IpsmID, "Error: failed to destroy ipset")
389390
return err
390391
}
391392

@@ -405,7 +406,7 @@ func (ipsMgr *IpsetManager) Run(entry *ipsEntry) (int, error) {
405406
if msg, failed := err.(*exec.ExitError); failed {
406407
errCode := msg.Sys().(syscall.WaitStatus).ExitStatus()
407408
if errCode > 0 {
408-
log.Errorf("Error: There was an error running command: [%s %v] Stderr: [%v, %s]", cmdName, strings.Join(cmdArgs, " "), err, strings.TrimSuffix(string(msg.Stderr), "\n"))
409+
metrics.SendErrorMetric(util.IpsmID, "Error: There was an error running command: [%s %v] Stderr: [%v, %s]", cmdName, strings.Join(cmdArgs, " "), err, strings.TrimSuffix(string(msg.Stderr), "\n"))
409410
}
410411

411412
return errCode, err
@@ -422,7 +423,7 @@ func (ipsMgr *IpsetManager) Save(configFile string) error {
422423

423424
cmd := exec.Command(util.Ipset, util.IpsetSaveFlag, util.IpsetFileFlag, configFile)
424425
if err := cmd.Start(); err != nil {
425-
log.Errorf("Error: failed to save ipset to file.")
426+
metrics.SendErrorMetric(util.IpsmID, "Error: failed to save ipset to file.")
426427
return err
427428
}
428429
cmd.Wait()
@@ -438,7 +439,7 @@ func (ipsMgr *IpsetManager) Restore(configFile string) error {
438439

439440
f, err := os.Stat(configFile)
440441
if err != nil {
441-
log.Errorf("Error: failed to get file %s stat from ipsm.Restore", configFile)
442+
metrics.SendErrorMetric(util.IpsmID, "Error: failed to get file %s stat from ipsm.Restore", configFile)
442443
return err
443444
}
444445

@@ -450,12 +451,12 @@ func (ipsMgr *IpsetManager) Restore(configFile string) error {
450451

451452
cmd := exec.Command(util.Ipset, util.IpsetRestoreFlag, util.IpsetFileFlag, configFile)
452453
if err := cmd.Start(); err != nil {
453-
log.Errorf("Error: failed to restore ipset from file.")
454+
metrics.SendErrorMetric(util.IpsmID, "Error: failed to to restore ipset from file.")
454455
return err
455456
}
456457
cmd.Wait()
457458

458459
//TODO based on the set name and number of entries in the config file, update IPSetInventory
459460

460461
return nil
461-
}
462+
}

npm/ipsm/ipsm_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,7 @@ func TestDestroy(t *testing.T) {
468468
}
469469
}()
470470

471-
if err := ipsMgr.AddToSet("test-set", "1.2.3.4", util.IpsetNetHashFlag, ""); err != nil {
471+
if err := ipsMgr.AddToSet("test-destroy-set", "1.2.3.4", util.IpsetNetHashFlag, ""); err != nil {
472472
t.Errorf("TestDestroy failed @ ipsMgr.AddToSet")
473473
}
474474

npm/iptm/iptm.go

Lines changed: 21 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
1-
/*
1+
// Part of this file is modified from iptables package from Kuberenetes.
2+
// https://github.com/kubernetes/kubernetes/blob/master/pkg/util/iptables
23

3-
Part of this file is modified from iptables package from Kuberenetes.
4-
https://github.com/kubernetes/kubernetes/blob/master/pkg/util/iptables
5-
6-
*/
74
package iptm
85

96
import (
@@ -15,7 +12,6 @@ import (
1512
"time"
1613

1714
"golang.org/x/sys/unix"
18-
1915
"github.com/Azure/azure-container-networking/log"
2016
"github.com/Azure/azure-container-networking/npm/metrics"
2117
"github.com/Azure/azure-container-networking/npm/util"
@@ -92,7 +88,7 @@ func (iptMgr *IptablesManager) InitNpmChains() error {
9288
iptMgr.OperationFlag = util.IptablesInsertionFlag
9389
entry.Specs = append([]string{index}, entry.Specs...)
9490
if _, err = iptMgr.Run(entry); err != nil {
95-
log.Errorf("Error: failed to add AZURE-NPM chain to FORWARD chain.")
91+
metrics.SendErrorMetric(util.IptmID, "Error: failed to add AZURE-NPM chain to FORWARD chain.")
9692
return err
9793
}
9894
}
@@ -113,7 +109,7 @@ func (iptMgr *IptablesManager) InitNpmChains() error {
113109
if !exists {
114110
iptMgr.OperationFlag = util.IptablesAppendFlag
115111
if _, err := iptMgr.Run(entry); err != nil {
116-
log.Errorf("Error: failed to add AZURE-NPM-INGRESS-PORT chain to AZURE-NPM chain.")
112+
metrics.SendErrorMetric(util.IptmID, "Error: failed to add AZURE-NPM-INGRESS-PORT chain to AZURE-NPM chain.")
117113
return err
118114
}
119115
}
@@ -139,7 +135,7 @@ func (iptMgr *IptablesManager) InitNpmChains() error {
139135
if !exists {
140136
iptMgr.OperationFlag = util.IptablesAppendFlag
141137
if _, err := iptMgr.Run(entry); err != nil {
142-
log.Errorf("Error: failed to add AZURE-NPM-EGRESS-PORT chain to AZURE-NPM chain.")
138+
metrics.SendErrorMetric(util.IptmID, "Error: failed to add AZURE-NPM-INGRESS-PORT chain to AZURE-NPM chain.")
143139
return err
144140
}
145141
}
@@ -165,7 +161,7 @@ func (iptMgr *IptablesManager) InitNpmChains() error {
165161
if !exists {
166162
iptMgr.OperationFlag = util.IptablesAppendFlag
167163
if _, err := iptMgr.Run(entry); err != nil {
168-
log.Errorf("Error: failed to add AZURE-NPM-TARGET-SETS chain to AZURE-NPM chain.")
164+
metrics.SendErrorMetric(util.IptmID, "Error: failed to add AZURE-NPM-TARGET-SETS chain to AZURE-NPM chain.")
169165
return err
170166
}
171167
}
@@ -188,7 +184,7 @@ func (iptMgr *IptablesManager) InitNpmChains() error {
188184
if !exists {
189185
iptMgr.OperationFlag = util.IptablesAppendFlag
190186
if _, err = iptMgr.Run(entry); err != nil {
191-
log.Logf("Error: failed to add default allow CONNECTED/RELATED rule to AZURE-NPM chain.")
187+
metrics.SendErrorMetric(util.IptmID, "Error: failed to add default allow CONNECTED/RELATED rule to AZURE-NPM chain.")
192188
return err
193189
}
194190
}
@@ -218,7 +214,7 @@ func (iptMgr *IptablesManager) UninitNpmChains() error {
218214
iptMgr.OperationFlag = util.IptablesDeletionFlag
219215
errCode, err := iptMgr.Run(entry)
220216
if errCode != iptablesErrDoesNotExist && err != nil {
221-
log.Errorf("Error: failed to remove default rule from FORWARD chain.")
217+
metrics.SendErrorMetric(util.IptmID, "Error: failed to add default allow CONNECTED/RELATED rule to AZURE-NPM chain.")
222218
return err
223219
}
224220

@@ -229,7 +225,7 @@ func (iptMgr *IptablesManager) UninitNpmChains() error {
229225
}
230226
errCode, err := iptMgr.Run(entry)
231227
if errCode != iptablesErrDoesNotExist && err != nil {
232-
log.Errorf("Error: failed to flush iptables chain %s.", chain)
228+
metrics.SendErrorMetric(util.IptmID, "Error: failed to flush iptables chain %s.", chain)
233229
}
234230
}
235231

@@ -270,7 +266,7 @@ func (iptMgr *IptablesManager) AddChain(chain string) error {
270266
return nil
271267
}
272268

273-
log.Errorf("Error: failed to create iptables chain %s.", entry.Chain)
269+
metrics.SendErrorMetric(util.IptmID, "Error: failed to create iptables chain %s.", entry.Chain)
274270
return err
275271
}
276272

@@ -290,7 +286,7 @@ func (iptMgr *IptablesManager) DeleteChain(chain string) error {
290286
return nil
291287
}
292288

293-
log.Errorf("Error: failed to delete iptables chain %s.", entry.Chain)
289+
metrics.SendErrorMetric(util.IptmID, "Error: failed to delete iptables chain %s.", entry.Chain)
294290
return err
295291
}
296292

@@ -309,7 +305,7 @@ func (iptMgr *IptablesManager) Add(entry *IptEntry) error {
309305
iptMgr.OperationFlag = util.IptablesInsertionFlag
310306
}
311307
if _, err := iptMgr.Run(entry); err != nil {
312-
log.Errorf("Error: failed to create iptables rules.")
308+
metrics.SendErrorMetric(util.IptmID, "Error: failed to create iptables rules.")
313309
return err
314310
}
315311

@@ -334,7 +330,7 @@ func (iptMgr *IptablesManager) Delete(entry *IptEntry) error {
334330

335331
iptMgr.OperationFlag = util.IptablesDeletionFlag
336332
if _, err := iptMgr.Run(entry); err != nil {
337-
log.Errorf("Error: failed to delete iptables rules.")
333+
metrics.SendErrorMetric(util.IptmID, "Error: failed to delete iptables rules.")
338334
return err
339335
}
340336

@@ -364,7 +360,7 @@ func (iptMgr *IptablesManager) Run(entry *IptEntry) (int, error) {
364360
if msg, failed := err.(*exec.ExitError); failed {
365361
errCode := msg.Sys().(syscall.WaitStatus).ExitStatus()
366362
if errCode > 0 && iptMgr.OperationFlag != util.IptablesCheckFlag {
367-
log.Errorf("Error: There was an error running command: [%s %v] Stderr: [%v, %s]", cmdName, strings.Join(cmdArgs, " "), err, strings.TrimSuffix(string(msg.Stderr), "\n"))
363+
metrics.SendErrorMetric(util.IptmID, "Error: There was an error running command: [%s %v] Stderr: [%v, %s]", cmdName, strings.Join(cmdArgs, " "), err, strings.TrimSuffix(string(msg.Stderr), "\n"))
368364
}
369365

370366
return errCode, err
@@ -393,15 +389,15 @@ func (iptMgr *IptablesManager) Save(configFile string) error {
393389
// create the config file for writing
394390
f, err := os.Create(configFile)
395391
if err != nil {
396-
log.Errorf("Error: failed to open file: %s.", configFile)
392+
metrics.SendErrorMetric(util.IptmID, "Error: failed to open file: %s.", configFile)
397393
return err
398394
}
399395
defer f.Close()
400396

401397
cmd := exec.Command(util.IptablesSave)
402398
cmd.Stdout = f
403399
if err := cmd.Start(); err != nil {
404-
log.Errorf("Error: failed to run iptables-save.")
400+
metrics.SendErrorMetric(util.IptmID, "Error: failed to run iptables-save.")
405401
return err
406402
}
407403
cmd.Wait()
@@ -429,15 +425,15 @@ func (iptMgr *IptablesManager) Restore(configFile string) error {
429425
// open the config file for reading
430426
f, err := os.Open(configFile)
431427
if err != nil {
432-
log.Errorf("Error: failed to open file: %s.", configFile)
428+
metrics.SendErrorMetric(util.IptmID, "Error: failed to open file: %s.", configFile)
433429
return err
434430
}
435431
defer f.Close()
436432

437433
cmd := exec.Command(util.IptablesRestore)
438434
cmd.Stdin = f
439435
if err := cmd.Start(); err != nil {
440-
log.Errorf("Error: failed to run iptables-restore.")
436+
metrics.SendErrorMetric(util.IptmID, "Error: failed to run iptables-restore.")
441437
return err
442438
}
443439
cmd.Wait()
@@ -460,7 +456,7 @@ func grabIptablesLocks() (*os.File, error) {
460456
// Grab 1.6.x style lock.
461457
l, err := os.OpenFile(util.IptablesLockFile, os.O_CREATE, 0600)
462458
if err != nil {
463-
log.Logf("Error: failed to open iptables lock file %s.", util.IptablesLockFile)
459+
metrics.SendErrorMetric(util.IptmID, "Error: failed to open iptables lock file %s.", util.IptablesLockFile)
464460
return nil, err
465461
}
466462

@@ -471,7 +467,7 @@ func grabIptablesLocks() (*os.File, error) {
471467

472468
return true, nil
473469
}); err != nil {
474-
log.Logf("Error: failed to acquire new iptables lock: %v.", err)
470+
metrics.SendErrorMetric(util.IptmID, "Error: failed to acquire new iptables lock: %v.", err)
475471
return nil, err
476472
}
477473

@@ -507,4 +503,4 @@ func grabIptablesFileLock(f *os.File) error {
507503
// // Write table headers.
508504
// writeLine(filterChains, "*filter")
509505

510-
// }
506+
// }

0 commit comments

Comments
 (0)