From 3ab4832efa65cf1c7e8858bc334b25d6aef35cd1 Mon Sep 17 00:00:00 2001 From: Tamilmani Manoharan Date: Mon, 11 Mar 2019 18:07:36 -0700 Subject: [PATCH 1/8] fixed logging part of telemetry --- cni/network/plugin/main.go | 61 ++++++++++++++++++++------ cni/telemetry/service/telemetrymain.go | 4 ++ telemetry/cnstelemetry.go | 4 ++ telemetry/telemetry.go | 21 ++------- telemetry/telemetrybuffer.go | 45 +++++-------------- telemetry/telemetrybuffer_linux.go | 8 ++-- telemetry/telemetrybuffer_windows.go | 6 +-- 7 files changed, 76 insertions(+), 73 deletions(-) diff --git a/cni/network/plugin/main.go b/cni/network/plugin/main.go index 6845f8a176..1402ba6696 100644 --- a/cni/network/plugin/main.go +++ b/cni/network/plugin/main.go @@ -133,6 +133,52 @@ func handleIfCniUpdate(update func(*skel.CmdArgs) error) (bool, error) { return isupdate, nil } +// startTelemetryService - Kills if any telemetry service runs and start new telemetry service +func startTelemetryService(path string) error { + platform.KillProcessByName(telemetry.TelemetryServiceProcessName) + + log.Printf("[cni] Starting telemetry service process") + + if err := common.StartProcess(path); err != nil { + log.Printf("[Telemetry] Failed to start telemetry service process :%v", err) + return err + } + + log.Printf("[cni] Telemetry service started") + + for attempt := 0; attempt < 5; attempt++ { + if telemetry.CheckIfSockExists() { + break + } + + time.Sleep(200 * time.Millisecond) + } + + return nil +} + +func connectToTelemetryService(tb *telemetry.TelemetryBuffer) { + path := fmt.Sprintf("%v/%v", telemetry.CniInstallDir, telemetry.TelemetryServiceProcessName) + + for attempt := 0; attempt < 2; attempt++ { + if err := tb.Connect(); err != nil { + log.Printf("Connection to telemetry socket failed: %v", err) + tb.Cleanup(telemetry.FdName) + + if isExists, _ := common.CheckIfFileExists(path); !isExists { + log.Printf("Skip starting telemetry service as file didn't exist") + return + } + + startTelemetryService(path) + } else { + tb.Connected = true + log.Printf("Connected to telemetry service") + return + } + } +} + // Main is the entry point for CNI network plugin. func main() { @@ -170,20 +216,7 @@ func main() { } tb := telemetry.NewTelemetryBuffer("") - - for attempt := 0; attempt < 2; attempt++ { - err = tb.Connect() - if err != nil { - log.Printf("Connection to telemetry socket failed: %v", err) - tb.Cleanup(telemetry.FdName) - telemetry.StartTelemetryService() - } else { - tb.Connected = true - log.Printf("Connected to telemetry service") - break - } - } - + connectToTelemetryService(tb) defer tb.Close() t := time.Now() diff --git a/cni/telemetry/service/telemetrymain.go b/cni/telemetry/service/telemetrymain.go index 519c789503..b1fe1daa3a 100644 --- a/cni/telemetry/service/telemetrymain.go +++ b/cni/telemetry/service/telemetrymain.go @@ -17,6 +17,10 @@ func main() { var tb *telemetry.TelemetryBuffer var err error + if err := telemetry.InitTelemetryLogger(); err == nil { + defer telemetry.CloseTelemetryLogger() + } + for { tb = telemetry.NewTelemetryBuffer("") err = tb.StartServer() diff --git a/telemetry/cnstelemetry.go b/telemetry/cnstelemetry.go index b5e15eff04..bcdc45387d 100644 --- a/telemetry/cnstelemetry.go +++ b/telemetry/cnstelemetry.go @@ -25,6 +25,10 @@ const ( // SendCnsTelemetry - handles cns telemetry reports func SendCnsTelemetry(interval int, reports chan interface{}, service *restserver.HTTPRestService, telemetryStopProcessing chan bool) { + if err := InitTelemetryLogger(); err == nil { + defer CloseTelemetryLogger() + } + CONNECT: telemetryBuffer := NewTelemetryBuffer("") err := telemetryBuffer.StartServer() diff --git a/telemetry/telemetry.go b/telemetry/telemetry.go index 488044af34..1228f34420 100644 --- a/telemetry/telemetry.go +++ b/telemetry/telemetry.go @@ -17,6 +17,7 @@ import ( "strings" "github.com/Azure/azure-container-networking/common" + "github.com/Azure/azure-container-networking/log" "github.com/Azure/azure-container-networking/platform" ) @@ -237,19 +238,6 @@ func (report *NPMReport) GetReport(clusterID, nodeName, npmVersion, kubernetesVe func (reportMgr *ReportManager) SendReport(tb *TelemetryBuffer) error { var err error if tb != nil && tb.Connected { - telemetryLogger.Printf("[Telemetry] Going to send Telemetry report to hostnetagent") - - switch reportMgr.Report.(type) { - case *CNIReport: - telemetryLogger.Printf("[Telemetry] %+v", reportMgr.Report.(*CNIReport)) - case *NPMReport: - telemetryLogger.Printf("[Telemetry] %+v", reportMgr.Report.(*NPMReport)) - case *DNCReport: - telemetryLogger.Printf("[Telemetry] %+v", reportMgr.Report.(*DNCReport)) - default: - telemetryLogger.Printf("[Telemetry] Invalid report type") - } - report, err := reportMgr.ReportToBytes() if err == nil { // If write fails, try to re-establish connections as server/client @@ -257,8 +245,6 @@ func (reportMgr *ReportManager) SendReport(tb *TelemetryBuffer) error { tb.Cancel() } } - } else { - err = fmt.Errorf("Not connected to telemetry server or tb is nil") } return err @@ -284,13 +270,12 @@ func (reportMgr *ReportManager) SetReportState(telemetryFile string) error { _, err = f.Write(reportBytes) if err != nil { - telemetryLogger.Printf("[Telemetry] Error while writing to file %v", err) + log.Printf("[Telemetry] Error while writing to file %v", err) return fmt.Errorf("[Telemetry] Error while writing to file %v", err) } // set IsNewInstance in report reflect.ValueOf(reportMgr.Report).Elem().FieldByName("IsNewInstance").SetBool(false) - telemetryLogger.Printf("[Telemetry] SetReportState succeeded") return nil } @@ -298,7 +283,7 @@ func (reportMgr *ReportManager) SetReportState(telemetryFile string) error { func (reportMgr *ReportManager) GetReportState(telemetryFile string) bool { // try to set IsNewInstance in report if _, err := os.Stat(telemetryFile); os.IsNotExist(err) { - telemetryLogger.Printf("[Telemetry] File not exist %v", telemetryFile) + log.Printf("[Telemetry] File not exist %v", telemetryFile) reflect.ValueOf(reportMgr.Report).Elem().FieldByName("IsNewInstance").SetBool(true) return false } diff --git a/telemetry/telemetrybuffer.go b/telemetry/telemetrybuffer.go index cb5436a984..967f8ce780 100644 --- a/telemetry/telemetrybuffer.go +++ b/telemetry/telemetrybuffer.go @@ -14,9 +14,7 @@ import ( "strings" "time" - "github.com/Azure/azure-container-networking/common" "github.com/Azure/azure-container-networking/log" - "github.com/Azure/azure-container-networking/platform" ) // FdName - file descriptor name @@ -62,6 +60,14 @@ type Payload struct { CNSReports []CNSReport } +func InitTelemetryLogger() error { + return telemetryLogger.SetTarget(log.TargetLogfile) +} + +func CloseTelemetryLogger() { + telemetryLogger.Close() +} + // NewTelemetryBuffer - create a new TelemetryBuffer func NewTelemetryBuffer(hostReportURL string) *TelemetryBuffer { var tb TelemetryBuffer @@ -78,11 +84,6 @@ func NewTelemetryBuffer(hostReportURL string) *TelemetryBuffer { tb.payload.NPMReports = make([]NPMReport, 0) tb.payload.CNSReports = make([]CNSReport, 0) - err := telemetryLogger.SetTarget(log.TargetLogfile) - if err != nil { - fmt.Printf("Failed to configure logging: %v\n", err) - } - return &tb } @@ -132,9 +133,9 @@ func (tb *TelemetryBuffer) StartServer() error { tb.data <- cnsReport } } else { - telemetryLogger.Printf("Server closing client connection") for index, value := range tb.connections { if value == conn { + telemetryLogger.Printf("Server closing client connection") conn.Close() tb.connections = remove(tb.connections, index) return @@ -228,7 +229,6 @@ func (tb *TelemetryBuffer) Cancel() { // Close - close all connections func (tb *TelemetryBuffer) Close() { if tb.client != nil { - telemetryLogger.Printf("client close") tb.client.Close() tb.client = nil } @@ -364,11 +364,12 @@ func getHostMetadata() (Metadata, error) { if err == nil { var metadata Metadata if err = json.Unmarshal(content, &metadata); err == nil { - telemetryLogger.Printf("[Telemetry] Returning hostmetadata from state") return metadata, nil } } + telemetryLogger.Printf("[Telemetry] Request metadata from wireserver") + req, err := http.NewRequest("GET", metadataURL, nil) if err != nil { return Metadata{}, err @@ -398,27 +399,3 @@ func getHostMetadata() (Metadata, error) { return metareport.Metadata, err } - -// StartTelemetryService - Kills if any telemetry service runs and start new telemetry service -func StartTelemetryService() error { - platform.KillProcessByName(telemetryServiceProcessName) - - telemetryLogger.Printf("[Telemetry] Starting telemetry service process") - path := fmt.Sprintf("%v/%v", cniInstallDir, telemetryServiceProcessName) - if err := common.StartProcess(path); err != nil { - telemetryLogger.Printf("[Telemetry] Failed to start telemetry service process :%v", err) - return err - } - - telemetryLogger.Printf("[Telemetry] Telemetry service started") - - for attempt := 0; attempt < 5; attempt++ { - if checkIfSockExists() { - break - } - - time.Sleep(200 * time.Millisecond) - } - - return nil -} diff --git a/telemetry/telemetrybuffer_linux.go b/telemetry/telemetrybuffer_linux.go index ba6cced8b1..78b71c30b5 100644 --- a/telemetry/telemetrybuffer_linux.go +++ b/telemetry/telemetrybuffer_linux.go @@ -10,9 +10,9 @@ import ( ) const ( - fdTemplate = "/tmp/%s.sock" - telemetryServiceProcessName = "azure-vnet-telemetry" - cniInstallDir = "/opt/cni/bin" + fdTemplate = "/var/run/%s.sock" + TelemetryServiceProcessName = "azure-vnet-telemetry" + CniInstallDir = "/opt/cni/bin" metadataFile = "/tmp/azuremetadata.json" ) @@ -41,7 +41,7 @@ func (tb *TelemetryBuffer) Cleanup(name string) error { return os.Remove(fmt.Sprintf(fdTemplate, name)) } -func checkIfSockExists() bool { +func CheckIfSockExists() bool { if _, err := os.Stat(fmt.Sprintf(fdTemplate, FdName)); !os.IsNotExist(err) { return true } diff --git a/telemetry/telemetrybuffer_windows.go b/telemetry/telemetrybuffer_windows.go index a47efc0052..f0703d2e36 100644 --- a/telemetry/telemetrybuffer_windows.go +++ b/telemetry/telemetrybuffer_windows.go @@ -12,8 +12,8 @@ import ( const ( fdTemplate = "\\\\.\\pipe\\%s" - telemetryServiceProcessName = "azure-vnet-telemetry.exe" - cniInstallDir = "c:\\k\\azurecni\\bin" + TelemetryServiceProcessName = "azure-vnet-telemetry.exe" + CniInstallDir = "c:\\k\\azurecni\\bin" metadataFile = "azuremetadata.json" ) @@ -43,7 +43,7 @@ func (tb *TelemetryBuffer) Cleanup(name string) error { } // Check if telemetry unix domain socket exists -func checkIfSockExists() bool { +func CheckIfSockExists() bool { if _, err := os.Stat(fmt.Sprintf(fdTemplate, FdName)); !os.IsNotExist(err) { return true } From 1fff32bc70dbdeb3ebb7ffdc77d4ef919e825e80 Mon Sep 17 00:00:00 2001 From: Tamilmani Manoharan Date: Mon, 11 Mar 2019 18:29:18 -0700 Subject: [PATCH 2/8] fixed an issue --- telemetry/telemetrybuffer.go | 1 - 1 file changed, 1 deletion(-) diff --git a/telemetry/telemetrybuffer.go b/telemetry/telemetrybuffer.go index ffa7605f2e..23ea489d33 100644 --- a/telemetry/telemetrybuffer.go +++ b/telemetry/telemetrybuffer.go @@ -259,7 +259,6 @@ func (tb *TelemetryBuffer) Close() { if tb.listener != nil { telemetryLogger.Printf("server close") tb.listener.Close() - tb.listener = nil } tb.mutex.Lock() From 231337bd7eeace7033794b990a55d50ea0aa26a1 Mon Sep 17 00:00:00 2001 From: Tamilmani Manoharan Date: Mon, 11 Mar 2019 18:36:40 -0700 Subject: [PATCH 3/8] added more test coverage --- telemetry/telemetry_test.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/telemetry/telemetry_test.go b/telemetry/telemetry_test.go index a12703d107..aead000e32 100644 --- a/telemetry/telemetry_test.go +++ b/telemetry/telemetry_test.go @@ -15,6 +15,7 @@ import ( "time" "github.com/Azure/azure-container-networking/common" + "github.com/Azure/azure-container-networking/telemetry" ) var reportManager *ReportManager @@ -94,6 +95,10 @@ func TestMain(m *testing.M) { reportManager.ContentType = "application/json" reportManager.Report = &CNIReport{} + if err := InitTelemetryLogger(); err == nil { + defer CloseTelemetryLogger() + } + tb = NewTelemetryBuffer(hostAgentUrl) err = tb.StartServer() if err == nil { @@ -214,6 +219,10 @@ func TestClientCloseTelemetryConnection(t *testing.T) { go tb.BufferAndPushData(0) } + if !telemetry.CheckIfSockExists() { + t.Errorf("telemetry sock doesn't exist") + } + // create client telemetrybuffer and connect to server tb1 := NewTelemetryBuffer(hostAgentUrl) if err := tb1.Connect(); err != nil { From 157c76a1ab87d2732df17bf06ffe2dada281e59f Mon Sep 17 00:00:00 2001 From: Tamilmani Manoharan Date: Mon, 11 Mar 2019 18:43:32 -0700 Subject: [PATCH 4/8] fixed an issue --- telemetry/telemetry_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/telemetry/telemetry_test.go b/telemetry/telemetry_test.go index aead000e32..dac0264315 100644 --- a/telemetry/telemetry_test.go +++ b/telemetry/telemetry_test.go @@ -15,7 +15,6 @@ import ( "time" "github.com/Azure/azure-container-networking/common" - "github.com/Azure/azure-container-networking/telemetry" ) var reportManager *ReportManager @@ -219,7 +218,7 @@ func TestClientCloseTelemetryConnection(t *testing.T) { go tb.BufferAndPushData(0) } - if !telemetry.CheckIfSockExists() { + if !CheckIfSockExists() { t.Errorf("telemetry sock doesn't exist") } From 8775e411be83e82e45bc8cc8d240b9c36c74a638 Mon Sep 17 00:00:00 2001 From: Tamilmani Manoharan Date: Mon, 11 Mar 2019 19:35:11 -0700 Subject: [PATCH 5/8] fixed invalid condition and added UT --- telemetry/telemetry.go | 16 +++++++++++----- telemetry/telemetry_test.go | 8 ++++++++ 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/telemetry/telemetry.go b/telemetry/telemetry.go index 1228f34420..894e9e027f 100644 --- a/telemetry/telemetry.go +++ b/telemetry/telemetry.go @@ -237,8 +237,10 @@ func (report *NPMReport) GetReport(clusterID, nodeName, npmVersion, kubernetesVe // SendReport will send telemetry report to HostNetAgent. func (reportMgr *ReportManager) SendReport(tb *TelemetryBuffer) error { var err error + var report []byte + if tb != nil && tb.Connected { - report, err := reportMgr.ReportToBytes() + report, err = reportMgr.ReportToBytes() if err == nil { // If write fails, try to re-establish connections as server/client if _, err = tb.Write(report); err != nil { @@ -415,7 +417,10 @@ func (report *CNIReport) GetOrchestratorDetails() { } // ReportToBytes - returns the report bytes -func (reportMgr *ReportManager) ReportToBytes() (report []byte, err error) { +func (reportMgr *ReportManager) ReportToBytes() ([]byte, error) { + var err error + var report []byte + switch reportMgr.Report.(type) { case *CNIReport: case *NPMReport: @@ -425,9 +430,10 @@ func (reportMgr *ReportManager) ReportToBytes() (report []byte, err error) { err = fmt.Errorf("[Telemetry] Invalid report type") } - if err == nil { - report, err = json.Marshal(reportMgr.Report) + if err != nil { + return []byte{}, err } - return + report, err = json.Marshal(reportMgr.Report) + return report, err } diff --git a/telemetry/telemetry_test.go b/telemetry/telemetry_test.go index dac0264315..5a529abfb9 100644 --- a/telemetry/telemetry_test.go +++ b/telemetry/telemetry_test.go @@ -162,6 +162,14 @@ func TestSendTelemetry(t *testing.T) { if err != nil { t.Errorf("SendTelemetry failed due to %v", err) } + + i := 3 + rpMgr := &ReportManager{} + rpMgr.Report = &i + err = rpMgr.SendReport(tb) + if err == nil { + t.Errorf("SendTelemetry not failed for incorrect report type") + } } func TestReceiveTelemetryData(t *testing.T) { From 1ee235b94d5090184322a7f1fbf96123383725a9 Mon Sep 17 00:00:00 2001 From: Tamilmani Manoharan Date: Tue, 12 Mar 2019 15:53:55 -0700 Subject: [PATCH 6/8] initialize telemetry logger as part of telemetry server --- cni/telemetry/service/telemetrymain.go | 4 ---- telemetry/cnstelemetry.go | 4 ---- telemetry/telemetrybuffer.go | 4 +++- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/cni/telemetry/service/telemetrymain.go b/cni/telemetry/service/telemetrymain.go index b1fe1daa3a..519c789503 100644 --- a/cni/telemetry/service/telemetrymain.go +++ b/cni/telemetry/service/telemetrymain.go @@ -17,10 +17,6 @@ func main() { var tb *telemetry.TelemetryBuffer var err error - if err := telemetry.InitTelemetryLogger(); err == nil { - defer telemetry.CloseTelemetryLogger() - } - for { tb = telemetry.NewTelemetryBuffer("") err = tb.StartServer() diff --git a/telemetry/cnstelemetry.go b/telemetry/cnstelemetry.go index ad0a5ea549..a554def7f7 100644 --- a/telemetry/cnstelemetry.go +++ b/telemetry/cnstelemetry.go @@ -25,10 +25,6 @@ const ( // SendCnsTelemetry - handles cns telemetry reports func SendCnsTelemetry(interval int, reports chan interface{}, service *restserver.HTTPRestService, telemetryStopProcessing chan bool) { - if err := InitTelemetryLogger(); err == nil { - defer CloseTelemetryLogger() - } - CONNECT: telemetryBuffer := NewTelemetryBuffer("") err := telemetryBuffer.StartServer() diff --git a/telemetry/telemetrybuffer.go b/telemetry/telemetrybuffer.go index 23ea489d33..ba8fa7b052 100644 --- a/telemetry/telemetrybuffer.go +++ b/telemetry/telemetrybuffer.go @@ -108,6 +108,8 @@ func (tb *TelemetryBuffer) StartServer() error { return err } + InitTelemetryLogger() + telemetryLogger.Printf("Telemetry service started") // Spawn server goroutine to handle incoming connections go func() { @@ -259,6 +261,7 @@ func (tb *TelemetryBuffer) Close() { if tb.listener != nil { telemetryLogger.Printf("server close") tb.listener.Close() + CloseTelemetryLogger() } tb.mutex.Lock() @@ -266,7 +269,6 @@ func (tb *TelemetryBuffer) Close() { for _, conn := range tb.connections { if conn != nil { - telemetryLogger.Printf("connection close as server closed") conn.Close() } } From 0dc3bcfc28ff86793732aba01d102bfa4012b00b Mon Sep 17 00:00:00 2001 From: Tamilmani Manoharan Date: Wed, 13 Mar 2019 12:05:07 -0700 Subject: [PATCH 7/8] changed checkifsockexists to sockexists --- cni/network/plugin/main.go | 2 +- telemetry/telemetry_test.go | 2 +- telemetry/telemetrybuffer_linux.go | 2 +- telemetry/telemetrybuffer_windows.go | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cni/network/plugin/main.go b/cni/network/plugin/main.go index 1402ba6696..561c4fc8a6 100644 --- a/cni/network/plugin/main.go +++ b/cni/network/plugin/main.go @@ -147,7 +147,7 @@ func startTelemetryService(path string) error { log.Printf("[cni] Telemetry service started") for attempt := 0; attempt < 5; attempt++ { - if telemetry.CheckIfSockExists() { + if telemetry.SockExists() { break } diff --git a/telemetry/telemetry_test.go b/telemetry/telemetry_test.go index 5a529abfb9..639585f1b3 100644 --- a/telemetry/telemetry_test.go +++ b/telemetry/telemetry_test.go @@ -226,7 +226,7 @@ func TestClientCloseTelemetryConnection(t *testing.T) { go tb.BufferAndPushData(0) } - if !CheckIfSockExists() { + if !SockExists() { t.Errorf("telemetry sock doesn't exist") } diff --git a/telemetry/telemetrybuffer_linux.go b/telemetry/telemetrybuffer_linux.go index 78b71c30b5..b5a68bc33a 100644 --- a/telemetry/telemetrybuffer_linux.go +++ b/telemetry/telemetrybuffer_linux.go @@ -41,7 +41,7 @@ func (tb *TelemetryBuffer) Cleanup(name string) error { return os.Remove(fmt.Sprintf(fdTemplate, name)) } -func CheckIfSockExists() bool { +func SockExists() bool { if _, err := os.Stat(fmt.Sprintf(fdTemplate, FdName)); !os.IsNotExist(err) { return true } diff --git a/telemetry/telemetrybuffer_windows.go b/telemetry/telemetrybuffer_windows.go index f0703d2e36..372bf03f07 100644 --- a/telemetry/telemetrybuffer_windows.go +++ b/telemetry/telemetrybuffer_windows.go @@ -43,7 +43,7 @@ func (tb *TelemetryBuffer) Cleanup(name string) error { } // Check if telemetry unix domain socket exists -func CheckIfSockExists() bool { +func SockExists() bool { if _, err := os.Stat(fmt.Sprintf(fdTemplate, FdName)); !os.IsNotExist(err) { return true } From a4119fb4e47dd055878e722ac1ef079620a7d46e Mon Sep 17 00:00:00 2001 From: Tamilmani Manoharan Date: Wed, 13 Mar 2019 13:34:23 -0700 Subject: [PATCH 8/8] changed interval time to push telemetry data to host --- telemetry/telemetrybuffer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/telemetry/telemetrybuffer.go b/telemetry/telemetrybuffer.go index ba8fa7b052..d88e473277 100644 --- a/telemetry/telemetrybuffer.go +++ b/telemetry/telemetrybuffer.go @@ -28,7 +28,7 @@ const ( FdName = "azure-vnet-telemetry" Delimiter = '\n' azureHostReportURL = "http://168.63.129.16/machine/plugins?comp=netagent&type=payload" - DefaultInterval = 10 * time.Second + DefaultInterval = 30 * time.Second logName = "azure-vnet-telemetry" MaxPayloadSize uint16 = 65535 dnc = "DNC"