diff --git a/aitelemetry/api.go b/aitelemetry/api.go index d23389edd4..d23f055eb2 100644 --- a/aitelemetry/api.go +++ b/aitelemetry/api.go @@ -14,6 +14,13 @@ type Report struct { CustomDimensions map[string]string } +// Application event structure +type Event struct { + EventName string + ResourceID string + Properties map[string]string +} + // Application metrics structure type Metric struct { Name string @@ -54,6 +61,9 @@ type TelemetryHandle interface { // TrackMetric function sends metric to appinsights resource. It overrides few of the existing columns with app information // and for rest it uses custom dimesion TrackMetric(metric Metric) + // TrackEvent function sends events to appinsights resource. It overrides a few of the existing columns + // with app information. + TrackEvent(aiEvent Event) // Close - should be called for each NewAITelemetry call. Will release resources acquired Close(timeout int) } diff --git a/aitelemetry/telemetrywrapper.go b/aitelemetry/telemetrywrapper.go index aa18e7df84..1047b377f2 100644 --- a/aitelemetry/telemetrywrapper.go +++ b/aitelemetry/telemetrywrapper.go @@ -15,6 +15,7 @@ const ( resourceGroupStr = "ResourceGroup" vmSizeStr = "VMSize" osVersionStr = "OSVersion" + osStr = "OS" locationStr = "Region" appNameStr = "AppName" subscriptionIDStr = "SubscriptionID" @@ -207,19 +208,63 @@ func (th *telemetryHandle) TrackLog(report Report) { // Check if metadata is populated if metadata.SubscriptionID != "" { // copy metadata from wireserver to trace - trace.Tags.User().SetAccountId(th.metadata.SubscriptionID) - trace.Tags.User().SetId(th.metadata.VMName) - trace.Properties[locationStr] = th.metadata.Location - trace.Properties[resourceGroupStr] = th.metadata.ResourceGroupName - trace.Properties[vmSizeStr] = th.metadata.VMSize - trace.Properties[osVersionStr] = th.metadata.OSVersion - trace.Properties[vmIDStr] = th.metadata.VMID + trace.Tags.User().SetAccountId(metadata.SubscriptionID) + trace.Tags.User().SetId(metadata.VMName) + trace.Properties[locationStr] = metadata.Location + trace.Properties[resourceGroupStr] = metadata.ResourceGroupName + trace.Properties[vmSizeStr] = metadata.VMSize + trace.Properties[osVersionStr] = metadata.OSVersion + trace.Properties[vmIDStr] = metadata.VMID + trace.Tags.Session().SetId(metadata.VMID) } // send to appinsights resource th.client.Track(trace) } +// TrackEvent function sends events to appinsights resource. It overrides a few of the existing columns +// with app information. +func (th *telemetryHandle) TrackEvent(event Event) { + // Initialize new event message + aiEvent := appinsights.NewEventTelemetry(event.EventName) + // OperationId => resourceID (e.g.: NCID) + aiEvent.Tags.Operation().SetId(event.ResourceID) + + // Copy the properties, if supplied + if event.Properties != nil { + for key, value := range event.Properties { + aiEvent.Properties[key] = value + } + } + + // Acquire read lock to read metadata + th.rwmutex.RLock() + metadata := th.metadata + th.rwmutex.RUnlock() + + // Add metadata + if metadata.SubscriptionID != "" { + aiEvent.Tags.User().SetAccountId(metadata.SubscriptionID) + // AnonId => VMName + aiEvent.Tags.User().SetId(metadata.VMName) + // SessionId => VMID + aiEvent.Tags.Session().SetId(metadata.VMID) + aiEvent.Properties[locationStr] = metadata.Location + aiEvent.Properties[resourceGroupStr] = metadata.ResourceGroupName + aiEvent.Properties[vmSizeStr] = metadata.VMSize + aiEvent.Properties[osVersionStr] = metadata.OSVersion + aiEvent.Properties[vmIDStr] = metadata.VMID + aiEvent.Properties[vmNameStr] = metadata.VMName + } + + aiEvent.Tags.Operation().SetParentId(th.appVersion) + aiEvent.Tags.User().SetAuthUserId(runtime.GOOS) + aiEvent.Properties[osStr] = runtime.GOOS + aiEvent.Properties[appNameStr] = th.appName + aiEvent.Properties[versionStr] = th.appVersion + th.client.Track(aiEvent) +} + // TrackMetric function sends metric to appinsights resource. It overrides few of the existing columns with app information // and for rest it uses custom dimesion func (th *telemetryHandle) TrackMetric(metric Metric) { @@ -233,12 +278,13 @@ func (th *telemetryHandle) TrackMetric(metric Metric) { // Check if metadata is populated if metadata.SubscriptionID != "" { - aimetric.Properties[locationStr] = th.metadata.Location - aimetric.Properties[subscriptionIDStr] = th.metadata.SubscriptionID - aimetric.Properties[vmNameStr] = th.metadata.VMName + aimetric.Properties[locationStr] = metadata.Location + aimetric.Properties[subscriptionIDStr] = metadata.SubscriptionID + aimetric.Properties[vmNameStr] = metadata.VMName aimetric.Properties[versionStr] = th.appVersion aimetric.Properties[resourceGroupStr] = th.metadata.ResourceGroupName - aimetric.Properties[vmIDStr] = th.metadata.VMID + aimetric.Properties[vmIDStr] = metadata.VMID + aimetric.Tags.Session().SetId(metadata.VMID) } // copy custom dimensions diff --git a/aitelemetry/telemetrywrapper_test.go b/aitelemetry/telemetrywrapper_test.go index 4897fe583d..0c1f3f70e8 100644 --- a/aitelemetry/telemetrywrapper_test.go +++ b/aitelemetry/telemetrywrapper_test.go @@ -129,6 +129,18 @@ func TestTrackLog(t *testing.T) { th.TrackLog(report) } +func TestTrackEvent(t *testing.T) { + event := Event{ + EventName: "testEvent", + ResourceID: "SomeResourceId", + Properties: make(map[string]string), + } + + event.Properties["P1"] = "V1" + event.Properties["P2"] = "V2" + th.TrackEvent(event) +} + func TestClose(t *testing.T) { th.Close(10) } diff --git a/cns/configuration/cns_config.json b/cns/configuration/cns_config.json index fb72310285..44e134e297 100644 --- a/cns/configuration/cns_config.json +++ b/cns/configuration/cns_config.json @@ -5,6 +5,7 @@ "RefreshIntervalInSecs": 15, "DisableAll": false, "HeartBeatIntervalInMins": 30, - "DebugMode": false + "DebugMode": false, + "SnapshotIntervalInMins": 60 } } diff --git a/cns/configuration/configuration.go b/cns/configuration/configuration.go index 18e7a4c680..edaf70a18d 100644 --- a/cns/configuration/configuration.go +++ b/cns/configuration/configuration.go @@ -26,6 +26,8 @@ type TelemetrySettings struct { DisableTrace bool // Flag to Disable sending metric. DisableMetric bool + // Flag to Disable sending events. + DisableEvent bool // Configure how many bytes can be sent in one call to the data collector TelemetryBatchSizeBytes int // Configure the maximum delay before sending queued telemetry in milliseconds @@ -38,6 +40,8 @@ type TelemetrySettings struct { RefreshIntervalInSecs int // Disable debug logging for telemetry messages DebugMode bool + // Interval for sending snapshot events. + SnapshotIntervalInMins int } // This functions reads cns config file and save it in a structure @@ -89,6 +93,10 @@ func setTelemetrySettingDefaults(telemetrySettings *TelemetrySettings) { // set the default Heartbeat interval to 30 minutes telemetrySettings.HeartBeatIntervalInMins = 30 } + + if telemetrySettings.SnapshotIntervalInMins == 0 { + telemetrySettings.SnapshotIntervalInMins = 60 + } } // Set Default values of CNS config if not specified diff --git a/cns/logger/constants.go b/cns/logger/constants.go index 7ef87ce12a..3ece449301 100644 --- a/cns/logger/constants.go +++ b/cns/logger/constants.go @@ -8,4 +8,15 @@ const ( //Dimensions OrchestratorTypeStr = "OrchestratorType" NodeIDStr = "NodeID" + // CNS Snspshot properties + CnsNCSnapshotEventStr = "CNSNCSnapshot" + IpConfigurationStr = "IPConfiguration" + LocalIPConfigurationStr = "LocalIPConfiguration" + PrimaryInterfaceIdentifierStr = "PrimaryInterfaceIdentifier" + MultiTenancyInfoStr = "MultiTenancyInfo" + CnetAddressSpaceStr = "CnetAddressSpace" + AllowNCToHostCommunicationStr = "AllowNCToHostCommunication" + AllowHostToNCCommunicationStr = "AllowHostToNCCommunication" + NetworkContainerTypeStr = "NetworkContainerType" + OrchestratorContextStr = "OrchestratorContext" ) diff --git a/cns/logger/log.go b/cns/logger/log.go index d83bc7889e..450551e46e 100644 --- a/cns/logger/log.go +++ b/cns/logger/log.go @@ -25,6 +25,7 @@ type CNSLogger struct { NodeID string DisableTraceLogging bool DisableMetricLogging bool + DisableEventLogging bool } // Initialize CNS Logger @@ -35,7 +36,7 @@ func InitLogger(fileName string, logLevel, logTarget int, logDir string) { } // Intialize CNS AI telmetry instance -func InitAI(aiConfig aitelemetry.AIConfig, disableTraceLogging, disableMetricLogging bool) { +func InitAI(aiConfig aitelemetry.AIConfig, disableTraceLogging, disableMetricLogging bool, disableEventLogging bool) { var err error Log.th, err = aitelemetry.NewAITelemetry("", aiMetadata, aiConfig) @@ -47,6 +48,7 @@ func InitAI(aiConfig aitelemetry.AIConfig, disableTraceLogging, disableMetricLog Log.logger.Printf("AI Telemetry Handle created") Log.DisableMetricLogging = disableMetricLogging Log.DisableTraceLogging = disableTraceLogging + Log.DisableEventLogging = disableEventLogging } func InitReportChannel(reports chan interface{}) { @@ -104,6 +106,16 @@ func Debugf(format string, args ...interface{}) { sendTraceInternal(msg) } +func LogEvent(event aitelemetry.Event) { + if Log.th == nil || Log.DisableEventLogging { + return + } + + event.Properties[OrchestratorTypeStr] = Log.Orchestrator + event.Properties[NodeIDStr] = Log.NodeID + Log.th.TrackEvent(event) +} + func Errorf(format string, args ...interface{}) { Log.logger.Errorf(format, args...) diff --git a/cns/restserver/restserver.go b/cns/restserver/restserver.go index 231a09c173..0366c0149f 100644 --- a/cns/restserver/restserver.go +++ b/cns/restserver/restserver.go @@ -13,6 +13,7 @@ import ( "sync" "time" + "github.com/Azure/azure-container-networking/aitelemetry" "github.com/Azure/azure-container-networking/cns" "github.com/Azure/azure-container-networking/cns/common" "github.com/Azure/azure-container-networking/cns/dockerclient" @@ -88,6 +89,7 @@ type networkInfo struct { // HTTPService describes the min API interface that every service should have. type HTTPService interface { common.ServiceAPI + SendNCSnapShotPeriodically(int, chan bool) } // NewHTTPRestService creates a new HTTP Service object. @@ -205,6 +207,7 @@ func (service *HTTPRestService) Start(config *common.ServiceConfig) error { logger.SetContextDetails(service.state.OrchestratorType, service.state.NodeID) logger.Printf("[Azure CNS] Listening.") + return nil } @@ -1174,6 +1177,12 @@ func (service *HTTPRestService) createOrUpdateNetworkContainer(w http.ResponseWr reserveResp := &cns.CreateNetworkContainerResponse{Response: resp} err = service.Listener.Encode(w, &reserveResp) + + // If the NC was created successfully, log NC snapshot. + if returnCode == 0 { + logNCSnapshot(req) + } + logger.Response(service.Name, reserveResp, resp.ReturnCode, ReturnCodeToString(resp.ReturnCode), err) } @@ -1961,3 +1970,50 @@ func (service *HTTPRestService) unpublishNetworkContainer(w http.ResponseWriter, err = service.Listener.Encode(w, &response) logger.Response(service.Name, response, response.Response.ReturnCode, ReturnCodeToString(response.Response.ReturnCode), err) } + +func logNCSnapshot(createNetworkContainerRequest cns.CreateNetworkContainerRequest) { + var aiEvent = aitelemetry.Event{ + EventName: logger.CnsNCSnapshotEventStr, + Properties: make(map[string]string), + ResourceID: createNetworkContainerRequest.NetworkContainerid, + } + + aiEvent.Properties[logger.IpConfigurationStr] = fmt.Sprintf("%+v", createNetworkContainerRequest.IPConfiguration) + aiEvent.Properties[logger.LocalIPConfigurationStr] = fmt.Sprintf("%+v", createNetworkContainerRequest.LocalIPConfiguration) + aiEvent.Properties[logger.PrimaryInterfaceIdentifierStr] = createNetworkContainerRequest.PrimaryInterfaceIdentifier + aiEvent.Properties[logger.MultiTenancyInfoStr] = fmt.Sprintf("%+v", createNetworkContainerRequest.MultiTenancyInfo) + aiEvent.Properties[logger.CnetAddressSpaceStr] = fmt.Sprintf("%+v", createNetworkContainerRequest.CnetAddressSpace) + aiEvent.Properties[logger.AllowNCToHostCommunicationStr] = fmt.Sprintf("%t", createNetworkContainerRequest.AllowNCToHostCommunication) + aiEvent.Properties[logger.AllowHostToNCCommunicationStr] = fmt.Sprintf("%t", createNetworkContainerRequest.AllowHostToNCCommunication) + aiEvent.Properties[logger.NetworkContainerTypeStr] = createNetworkContainerRequest.NetworkContainerType + aiEvent.Properties[logger.OrchestratorContextStr] = fmt.Sprintf("%s", createNetworkContainerRequest.OrchestratorContext) + + logger.LogEvent(aiEvent) +} + +// Sends network container snapshots to App Insights telemetry. +func (service *HTTPRestService) logNCSnapshots() { + + for _, ncStatus := range service.state.ContainerStatus { + logNCSnapshot(ncStatus.CreateNetworkContainerRequest) + } + + logger.Printf("[Azure CNS] Logging periodic NC snapshots. NC Count %d", len(service.state.ContainerStatus)) +} + +// Sets up periodic timer for sending network container snapshots +func (service *HTTPRestService) SendNCSnapShotPeriodically(ncSnapshotIntervalInMinutes int, stopSnapshot chan bool) { + + // Emit snapshot on startup and then emit it periodically. + service.logNCSnapshots() + + snapshot := time.NewTicker(time.Minute * time.Duration(ncSnapshotIntervalInMinutes)).C + for { + select { + case <-snapshot: + service.logNCSnapshots() + case <-stopSnapshot: + return + } + } +} diff --git a/cns/service/main.go b/cns/service/main.go index 200651090e..f5df2f8ed9 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -39,6 +39,7 @@ var version string var reports = make(chan interface{}) var telemetryStopProcessing = make(chan bool) var stopheartbeat = make(chan bool) +var stopSnapshots = make(chan bool) // Command line arguments for CNS. var args = acn.ArgumentList{ @@ -254,7 +255,7 @@ func main() { DebugMode: ts.DebugMode, } - logger.InitAI(aiConfig, ts.DisableTrace, ts.DisableMetric) + logger.InitAI(aiConfig, ts.DisableTrace, ts.DisableMetric, ts.DisableEvent) logger.InitReportChannel(reports) } @@ -312,6 +313,7 @@ func main() { if !disableTelemetry { go logger.SendToTelemetryService(reports, telemetryStopProcessing) go logger.SendHeartBeat(cnsconfig.TelemetrySettings.HeartBeatIntervalInMins, stopheartbeat) + go httpRestService.SendNCSnapShotPeriodically(cnsconfig.TelemetrySettings.SnapshotIntervalInMins, stopSnapshots) } var netPlugin network.NetPlugin @@ -387,6 +389,7 @@ func main() { if !disableTelemetry { telemetryStopProcessing <- true stopheartbeat <- true + stopSnapshots <- true } // Cleanup.