Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ acncli-binary:

# Build the Azure CNS binary.
azure-cns-binary:
cd $(CNS_DIR) && CGO_ENABLED=0 go build -v -o $(CNS_BUILD_DIR)/azure-cns$(EXE_EXT) -ldflags "-X main.version=$(VERSION) -X $(CNS_AI_PATH)=$(CNS_AI_ID)" -gcflags="-dwarflocationlists=true"
cd $(CNS_DIR) && CGO_ENABLED=0 go build -v -o $(CNS_BUILD_DIR)/azure-cns$(EXE_EXT) -ldflags "-X main.version=$(VERSION) -X $(CNS_AI_PATH)=$(CNS_AI_ID) -X $(CNI_AI_PATH)=$(CNI_AI_ID)" -gcflags="-dwarflocationlists=true"

# Build the Azure NPM binary.
azure-npm-binary:
Expand Down Expand Up @@ -392,8 +392,11 @@ cni-archive: azure-vnet-binary azure-vnet-ipam-binary azure-vnet-ipamv6-binary a

$(MKDIR) $(CNI_MULTITENANCY_BUILD_DIR)
cp cni/azure-$(GOOS)-multitenancy.conflist $(CNI_MULTITENANCY_BUILD_DIR)/10-azure.conflist
cp $(CNI_BUILD_DIR)/azure-vnet$(EXE_EXT) $(CNI_BUILD_DIR)/azure-vnet-ipam$(EXE_EXT) $(CNI_MULTITENANCY_BUILD_DIR)
ifeq ($(GOOS),linux)
cp telemetry/azure-vnet-telemetry.config $(CNI_MULTITENANCY_BUILD_DIR)/azure-vnet-telemetry.config
cp $(CNI_BUILD_DIR)/azure-vnet$(EXE_EXT) $(CNI_BUILD_DIR)/azure-vnet-ipam$(EXE_EXT) $(CNI_BUILD_DIR)/azure-vnet-telemetry$(EXE_EXT) $(CNI_MULTITENANCY_BUILD_DIR)
cp $(CNI_BUILD_DIR)/azure-vnet-telemetry$(EXE_EXT) $(CNI_MULTITENANCY_BUILD_DIR)
endif
cd $(CNI_MULTITENANCY_BUILD_DIR) && $(ARCHIVE_CMD) $(CNI_MULTITENANCY_ARCHIVE_NAME) azure-vnet$(EXE_EXT) azure-vnet-ipam$(EXE_EXT) azure-vnet-telemetry$(EXE_EXT) 10-azure.conflist azure-vnet-telemetry.config

$(MKDIR) $(CNI_SWIFT_BUILD_DIR)
Expand Down
2 changes: 2 additions & 0 deletions aitelemetry/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
type Report struct {
Message string
Context string
AppVersion string
CustomDimensions map[string]string
}

Expand All @@ -25,6 +26,7 @@ type Event struct {
type Metric struct {
Name string
Value float64
AppVersion string
CustomDimensions map[string]string
}

Expand Down
9 changes: 9 additions & 0 deletions aitelemetry/telemetrywrapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,11 @@ func (th *telemetryHandle) TrackLog(report Report) {
// Initialize new trace message
trace := appinsights.NewTraceTelemetry(report.Message, appinsights.Warning)

// will be empty if cns used as telemetry service for cni
if th.appVersion == "" {
th.appVersion = report.AppVersion
}

// Override few of existing columns with metadata
trace.Tags.User().SetAuthUserId(runtime.GOOS)
trace.Tags.Operation().SetId(report.Context)
Expand Down Expand Up @@ -295,6 +300,10 @@ func (th *telemetryHandle) TrackMetric(metric Metric) {
metadata := th.metadata
th.rwmutex.RUnlock()

if th.appVersion == "" {
th.appVersion = metric.AppVersion
}

// Check if metadata is populated
if metadata.SubscriptionID != "" {
aimetric.Properties[locationStr] = metadata.Location
Expand Down
43 changes: 30 additions & 13 deletions cni/network/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"encoding/json"
"fmt"
"net"
"os"
"time"

"github.com/Azure/azure-container-networking/aitelemetry"
Expand Down Expand Up @@ -161,6 +162,19 @@ func (plugin *NetPlugin) Start(config *common.PluginConfig) error {
return nil
}

// This function for sending CNI metrics to telemetry service
func logAndSendEvent(plugin *NetPlugin, msg string) {
log.Printf(msg)
sendEvent(plugin, msg)
}

func sendEvent(plugin *NetPlugin, msg string) {
eventMsg := fmt.Sprintf("[%d] %s", os.Getpid(), msg)
plugin.report.Version = plugin.Version
plugin.report.EventMessage = eventMsg
telemetry.SendCNIEvent(plugin.tb, plugin.report)
}

func (plugin *NetPlugin) GetAllEndpointState(networkid string) (*api.AzureCNIState, error) {
st := api.AzureCNIState{
ContainerInterfaces: make(map[string]api.PodNetworkInterfaceInfo),
Expand Down Expand Up @@ -321,7 +335,7 @@ func (plugin *NetPlugin) Add(args *cniSkel.CmdArgs) error {

startTime := time.Now()

telemetry.LogAndSendEvent(plugin.tb, fmt.Sprintf("[cni-net] Processing ADD command with args {ContainerID:%v Netns:%v IfName:%v Args:%v Path:%v StdinData:%s}.",
logAndSendEvent(plugin, fmt.Sprintf("[cni-net] Processing ADD command with args {ContainerID:%v Netns:%v IfName:%v Args:%v Path:%v StdinData:%s}.",
args.ContainerID, args.Netns, args.IfName, args.Args, args.Path, args.StdinData))

// Parse network configuration from stdin.
Expand All @@ -339,6 +353,7 @@ func (plugin *NetPlugin) Add(args *cniSkel.CmdArgs) error {
cniMetric.Metric = aitelemetry.Metric{
Name: telemetry.CNIAddTimeMetricStr,
Value: float64(operationTimeMs),
AppVersion: plugin.Version,
CustomDimensions: make(map[string]string),
}
SetCustomDimensions(&cniMetric, nwCfg, err)
Expand Down Expand Up @@ -501,7 +516,7 @@ func (plugin *NetPlugin) Add(args *cniSkel.CmdArgs) error {
}
}

telemetry.SendCNIEvent(plugin.tb, fmt.Sprintf("Allocated IPAddress from ipam:%+v v6:%+v", ipamAddResult.ipv4Result, ipamAddResult.ipv6Result))
sendEvent(plugin, fmt.Sprintf("Allocated IPAddress from ipam:%+v v6:%+v", ipamAddResult.ipv4Result, ipamAddResult.ipv6Result))

defer func() {
if err != nil {
Expand All @@ -512,14 +527,14 @@ func (plugin *NetPlugin) Add(args *cniSkel.CmdArgs) error {
// Create network
if nwInfoErr != nil {
// Network does not exist.
telemetry.LogAndSendEvent(plugin.tb, fmt.Sprintf("[cni-net] Creating network %v.", networkID))
logAndSendEvent(plugin, fmt.Sprintf("[cni-net] Creating network %v.", networkID))
// opts map needs to get passed in here
if nwInfo, err = plugin.createNetworkInternal(networkID, policies, ipamAddConfig, ipamAddResult); err != nil {
log.Errorf("Create network failed: %w", err)
return err
}

telemetry.LogAndSendEvent(plugin.tb, fmt.Sprintf("[cni-net] Created network %v with subnet %v.", networkID, ipamAddResult.hostSubnetPrefix.String()))
logAndSendEvent(plugin, fmt.Sprintf("[cni-net] Created network %v with subnet %v.", networkID, ipamAddResult.hostSubnetPrefix.String()))
}

natInfo := getNATInfo(nwCfg.ExecutionMode, options[network.SNATIPKey], nwCfg.MultiTenancy, enableSnatForDNS)
Expand All @@ -546,7 +561,7 @@ func (plugin *NetPlugin) Add(args *cniSkel.CmdArgs) error {
return err
}

telemetry.LogAndSendEvent(plugin.tb, fmt.Sprintf("CNI ADD succeeded : IP:%+v, VlanID: %v, podname %v, namespace %v numendpoints:%d",
sendEvent(plugin, fmt.Sprintf("CNI ADD succeeded : IP:%+v, VlanID: %v, podname %v, namespace %v numendpoints:%d",
ipamAddResult.ipv4Result.IPs, epInfo.Data[network.VlanIDKey], k8sPodName, k8sNamespace, plugin.nm.GetNumberOfEndpoints("", nwCfg.Name)))

return nil
Expand Down Expand Up @@ -749,7 +764,7 @@ func (plugin *NetPlugin) createEndpointInternal(opt *createEndpointInternalOpt)
}

// Create the endpoint.
telemetry.LogAndSendEvent(plugin.tb, fmt.Sprintf("[cni-net] Creating endpoint %s.", epInfo.PrettyString()))
logAndSendEvent(plugin, fmt.Sprintf("[cni-net] Creating endpoint %s.", epInfo.PrettyString()))
err = plugin.nm.CreateEndpoint(cnsclient, opt.nwInfo.Id, &epInfo)
if err != nil {
err = plugin.Errorf("Failed to create endpoint: %v", err)
Expand Down Expand Up @@ -859,12 +874,11 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error {
nwInfo network.NetworkInfo
epInfo *network.EndpointInfo
cniMetric telemetry.AIMetric
msg string
)

startTime := time.Now()

telemetry.LogAndSendEvent(plugin.tb, fmt.Sprintf("[cni-net] Processing DEL command with args {ContainerID:%v Netns:%v IfName:%v Args:%v Path:%v, StdinData:%s}.",
logAndSendEvent(plugin, fmt.Sprintf("[cni-net] Processing DEL command with args {ContainerID:%v Netns:%v IfName:%v Args:%v Path:%v, StdinData:%s}.",
args.ContainerID, args.Netns, args.IfName, args.Args, args.Path, args.StdinData))

defer func() {
Expand All @@ -883,13 +897,16 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error {
}

plugin.setCNIReportDetails(nwCfg, CNI_DEL, "")
plugin.report.ContainerName = k8sPodName + ":" + k8sNamespace

iptables.DisableIPTableLock = nwCfg.DisableIPTableLock

sendMetricFunc := func() {
operationTimeMs := time.Since(startTime).Milliseconds()
cniMetric.Metric = aitelemetry.Metric{
Name: telemetry.CNIDelTimeMetricStr,
Value: float64(operationTimeMs),
AppVersion: plugin.Version,
CustomDimensions: make(map[string]string),
}
SetCustomDimensions(&cniMetric, nwCfg, err)
Expand Down Expand Up @@ -957,7 +974,7 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error {
// attempt to release address associated with this Endpoint id
// This is to ensure clean up is done even in failure cases
log.Printf("[cni-net] Failed to query endpoint %s: %v", endpointID, err)
telemetry.LogAndSendEvent(plugin.tb, fmt.Sprintf("Release ip by ContainerID (endpoint not found):%v", args.ContainerID))
logAndSendEvent(plugin, fmt.Sprintf("Release ip by ContainerID (endpoint not found):%v", args.ContainerID))
if err = plugin.ipamInvoker.Delete(nil, nwCfg, args, nwInfo.Options); err != nil {
return plugin.RetriableError(fmt.Errorf("failed to release address(no endpoint): %w", err))
}
Expand All @@ -970,7 +987,7 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error {

// schedule send metric before attempting delete
defer sendMetricFunc()
telemetry.LogAndSendEvent(plugin.tb, fmt.Sprintf("Deleting endpoint:%v", endpointID))
logAndSendEvent(plugin, fmt.Sprintf("Deleting endpoint:%v", endpointID))
// Delete the endpoint.
if err = plugin.nm.DeleteEndpoint(networkID, endpointID); err != nil {
// return a retriable error so the container runtime will retry this DEL later
Expand All @@ -982,7 +999,7 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error {
if !nwCfg.MultiTenancy {
// Call into IPAM plugin to release the endpoint's addresses.
for _, address := range epInfo.IPAddresses {
telemetry.LogAndSendEvent(plugin.tb, fmt.Sprintf("Release ip:%s", address.IP.String()))
logAndSendEvent(plugin, fmt.Sprintf("Release ip:%s", address.IP.String()))
err = plugin.ipamInvoker.Delete(&address, nwCfg, args, nwInfo.Options)
if err != nil {
return plugin.RetriableError(fmt.Errorf("failed to release address: %w", err))
Expand All @@ -997,8 +1014,7 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error {
}
}

plugin.setCNIReportDetails(nwCfg, CNI_DEL, msg)
telemetry.SendCNIEvent(plugin.tb, fmt.Sprintf("CNI DEL succeeded : Released ip %+v podname %v namespace %v", nwCfg.Ipam.Address, k8sPodName, k8sNamespace))
sendEvent(plugin, fmt.Sprintf("CNI DEL succeeded : Released ip %+v podname %v namespace %v", nwCfg.Ipam.Address, k8sPodName, k8sNamespace))

return err
}
Expand Down Expand Up @@ -1038,6 +1054,7 @@ func (plugin *NetPlugin) Update(args *cniSkel.CmdArgs) error {
cniMetric.Metric = aitelemetry.Metric{
Name: telemetry.CNIUpdateTimeMetricStr,
Value: float64(operationTimeMs),
AppVersion: plugin.Version,
CustomDimensions: make(map[string]string),
}
SetCustomDimensions(&cniMetric, nwCfg, err)
Expand Down
1 change: 1 addition & 0 deletions cni/network/plugin/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ func rootExecute() error {
SystemDetails: telemetry.SystemInfo{},
InterfaceDetails: telemetry.InterfaceInfo{},
BridgeDetails: telemetry.BridgeInfo{},
Version: version,
},
}

Expand Down
3 changes: 2 additions & 1 deletion cni/telemetry/service/telemetrymain.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package main
// Entry point of the telemetry service if started by CNI

import (
"context"
"fmt"
"os"
"runtime"
Expand Down Expand Up @@ -189,7 +190,7 @@ func main() {
err = telemetry.CreateAITelemetryHandle(aiConfig, config.DisableAll, config.DisableTrace, config.DisableMetric)
log.Printf("[Telemetry] AI Handle creation status:%v", err)
log.Logf("[Telemetry] Report to host for an interval of %d seconds", config.ReportToHostIntervalInSeconds)
tb.PushData()
tb.PushData(context.Background())
telemetry.CloseAITelemetryHandle()

log.Close()
Expand Down
35 changes: 35 additions & 0 deletions cns/service/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ import (
"github.com/Azure/azure-container-networking/processlock"
localtls "github.com/Azure/azure-container-networking/server/tls"
"github.com/Azure/azure-container-networking/store"
"github.com/Azure/azure-container-networking/telemetry"
"github.com/avast/retry-go/v3"
"github.com/pkg/errors"
"go.uber.org/zap"
Expand Down Expand Up @@ -266,6 +267,13 @@ var args = acn.ArgumentList{
Type: "string",
DefaultValue: "",
},
{
Name: acn.OptTelemetryService,
Shorthand: acn.OptTelemetryServiceAlias,
Description: "Flag to start telemetry service to receive telemetry events from CNI. Default, disabled.",
Type: "bool",
DefaultValue: false,
},
}

// init() is executed before main() whenever this package is imported
Expand Down Expand Up @@ -367,6 +375,28 @@ func sendRegisterNodeRequest(httpc *http.Client, httpRestService cns.HTTPService
return nil
}

func startTelemetryService(ctx context.Context) {
var config aitelemetry.AIConfig

err := telemetry.CreateAITelemetryHandle(config, false, false, false)
if err != nil {
log.Errorf("AI telemetry handle creation failed..:%w", err)
return
}

tbtemp := telemetry.NewTelemetryBuffer()
//nolint:errcheck // best effort to cleanup leaked pipe/socket before start
tbtemp.Cleanup(telemetry.FdName)

tb := telemetry.NewTelemetryBuffer()
err = tb.StartServer()
if err != nil {
log.Errorf("Telemetry service failed to start: %w", err)
return
}
tb.PushData(rootCtx)
}

// Main is the entry point for CNS.
func main() {
// Initialize and parse command line arguments.
Expand Down Expand Up @@ -396,6 +426,7 @@ func main() {
clientDebugCmd := acn.GetArg(acn.OptDebugCmd).(string)
clientDebugArg := acn.GetArg(acn.OptDebugArg).(string)
cmdLineConfigPath := acn.GetArg(acn.OptCNSConfigPath).(string)
telemetryDaemonEnabled := acn.GetArg(acn.OptTelemetryService).(bool)

if vers {
printVersion()
Expand Down Expand Up @@ -475,6 +506,10 @@ func main() {
logger.InitAI(aiConfig, ts.DisableTrace, ts.DisableMetric, ts.DisableEvent)
}

if telemetryDaemonEnabled {
go startTelemetryService(rootCtx)
}

// Log platform information.
logger.Printf("Running on %v", platform.GetOSInfo())

Expand Down
4 changes: 4 additions & 0 deletions common/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ const (
OptTelemetry = "telemetry"
OptTelemetryAlias = "dt"

// Enable Telemetry service
OptTelemetryService = "telemetry-service"
OptTelemetryServiceAlias = "ts"

// HTTP connection timeout
OptHttpConnectionTimeout = "http-connection-timeout"
OptHttpConnectionTimeoutAlias = "httpcontimeout"
Expand Down
4 changes: 2 additions & 2 deletions network/endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,9 @@ type apipaClient interface {
}

func (epInfo *EndpointInfo) PrettyString() string {
return fmt.Sprintf("Id:%s ContainerID:%s NetNsPath:%s IfName:%s IfIndex:%d MacAddr:%s IPAddrs:%v Gateways:%v",
return fmt.Sprintf("Id:%s ContainerID:%s NetNsPath:%s IfName:%s IfIndex:%d MacAddr:%s IPAddrs:%v Gateways:%v Data:%+v",
epInfo.Id, epInfo.ContainerID, epInfo.NetNsPath, epInfo.IfName, epInfo.IfIndex, epInfo.MacAddress.String(), epInfo.IPAddresses,
epInfo.Gateways)
epInfo.Gateways, epInfo.Data)
}

// NewEndpoint creates a new endpoint in the network.
Expand Down
2 changes: 1 addition & 1 deletion network/endpoint_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ func (nw *network) newEndpointImplHnsV2(cli apipaClient, epInfo *EndpointInfo) (
}

// Create the HCN endpoint.
log.Printf("[net] Creating hcn endpoint: %+v", hcnEndpoint)
log.Printf("[net] Creating hcn endpoint: %s computenetwork:%s", hcnEndpoint.Name, hcnEndpoint.HostComputeNetwork)
hnsResponse, err := Hnsv2.CreateEndpoint(hcnEndpoint)
if err != nil {
return nil, fmt.Errorf("Failed to create endpoint: %s due to error: %v", hcnEndpoint.Name, err)
Expand Down
3 changes: 1 addition & 2 deletions network/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,8 @@ func (nm *networkManager) restore(isRehydrationRequired bool) error {
}
}

log.Printf("[net] Restored state, %+v\n", nm)
log.Printf("[net] Restored state")
for _, extIf := range nm.ExternalInterfaces {
log.Printf("External Interface %+v", extIf)
for _, nw := range extIf.Networks {
log.Printf("Number of endpoints: %d", len(nw.Endpoints))
}
Expand Down
1 change: 1 addition & 0 deletions telemetry/aiwrapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ func SendAITelemetry(cnireport CNIReport) {
report := aitelemetry.Report{
Message: msg,
Context: cnireport.ContainerName,
AppVersion: cnireport.Version,
CustomDimensions: make(map[string]string),
}

Expand Down
Loading