Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 42 additions & 22 deletions cns/restserver/internalapi.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,66 +157,86 @@ func (service *HTTPRestService) SyncHostNCVersion(ctx context.Context, channelMo
if err != nil {
logger.Errorf("sync host error %v", err)
}
syncHostNCVersion.WithLabelValues(strconv.FormatBool(err == nil)).Observe(time.Since(start).Seconds())
syncHostNCVersionCount.WithLabelValues(strconv.FormatBool(err == nil)).Inc()
syncHostNCVersionLatency.WithLabelValues(strconv.FormatBool(err == nil)).Observe(time.Since(start).Seconds())
}

var errNonExistentContainerStatus = errors.New("nonExistantContainerstatus")

func (service *HTTPRestService) syncHostNCVersion(ctx context.Context, channelMode string) error {
var hostVersionNeedsUpdateContainers []string
outdatedNCs := map[string]struct{}{}
for idx := range service.state.ContainerStatus {
// Will open a separate PR to convert all the NC version related variable to int. Change from string to int is a pain.
hostVersion, err := strconv.Atoi(service.state.ContainerStatus[idx].HostVersion)
localNCVersion, err := strconv.Atoi(service.state.ContainerStatus[idx].HostVersion)
if err != nil {
logger.Errorf("Received err when change containerstatus.HostVersion %s to int, err msg %v", service.state.ContainerStatus[idx].HostVersion, err)
continue
}
dncNcVersion, err := strconv.Atoi(service.state.ContainerStatus[idx].CreateNetworkContainerRequest.Version)
dncNCVersion, err := strconv.Atoi(service.state.ContainerStatus[idx].CreateNetworkContainerRequest.Version)
if err != nil {
logger.Errorf("Received err when change nc version %s in containerstatus to int, err msg %v", service.state.ContainerStatus[idx].CreateNetworkContainerRequest.Version, err)
continue
}
// host NC version is the NC version from NMAgent, if it's smaller than NC version from DNC, then append it to indicate it needs update.
if hostVersion < dncNcVersion {
hostVersionNeedsUpdateContainers = append(hostVersionNeedsUpdateContainers, service.state.ContainerStatus[idx].ID)
} else if hostVersion > dncNcVersion {
logger.Errorf("NC version from NMAgent is larger than DNC, NC version from NMAgent is %d, NC version from DNC is %d", hostVersion, dncNcVersion)
if localNCVersion < dncNCVersion {
outdatedNCs[service.state.ContainerStatus[idx].ID] = struct{}{}
} else if localNCVersion > dncNCVersion {
logger.Errorf("NC version from NMAgent is larger than DNC, NC version from NMAgent is %d, NC version from DNC is %d", localNCVersion, dncNCVersion)
}
}
if len(hostVersionNeedsUpdateContainers) == 0 {
if len(outdatedNCs) == 0 {
return nil
}
ncList, err := service.nmagentClient.GetNCVersionList(ctx)
ncVersionListResp, err := service.nmagentClient.GetNCVersionList(ctx)
if err != nil {
return errors.Wrap(err, "failed to get nc version list from nmagent")
}

newHostNCVersionList := map[string]string{}
for _, nc := range ncList.Containers {
newHostNCVersionList[nc.NetworkContainerID] = nc.Version
nmaNCs := map[string]string{}
for _, nc := range ncVersionListResp.Containers {
nmaNCs[nc.NetworkContainerID] = nc.Version
}
for _, ncID := range hostVersionNeedsUpdateContainers {
versionStr, ok := newHostNCVersionList[ncID]
for ncID := range outdatedNCs {
nmaNCVersionStr, ok := nmaNCs[ncID]
if !ok {
// NMA doesn't have this NC that we need programmed yet, bail out
continue
}
version, err := strconv.Atoi(versionStr)
nmaNCVersion, err := strconv.Atoi(nmaNCVersionStr)
if err != nil {
return errors.Wrapf(err, "failed to parse container version of %s", ncID)
logger.Errorf("failed to parse container version of %s: %s", ncID, err)
continue
}

// Check whether it exist in service state and get the related nc info
ncInfo, exist := service.state.ContainerStatus[ncID]
if !exist {
// if we marked this NC as needs update, but it no longer exists in internal state when we reach
// this point, our internal state has changed unexpectedly and we should bail out and try again.
return errors.Wrapf(errNonExistentContainerStatus, "can't find NC with ID %s in service state, stop updating this host NC version", ncID)
}
localNCVersion, err := strconv.Atoi(ncInfo.HostVersion)
if err != nil {
logger.Errorf("failed to parse host nc version string %s: %s", ncInfo.HostVersion, err)
continue
}
if localNCVersion > nmaNCVersion {
logger.Errorf("NC version from NMA is decreasing: have %d, got %d", localNCVersion, nmaNCVersion)
continue
}
if channelMode == cns.CRD {
service.MarkIpsAsAvailableUntransacted(ncInfo.ID, version)
service.MarkIpsAsAvailableUntransacted(ncInfo.ID, nmaNCVersion)
}
oldHostNCVersion := ncInfo.HostVersion
ncInfo.HostVersion = versionStr
logger.Printf("Updating NC %s host version from %s to %s", ncID, ncInfo.HostVersion, nmaNCVersionStr)
ncInfo.HostVersion = nmaNCVersionStr
logger.Printf("Updated NC %s host version to %s", ncID, ncInfo.HostVersion)
service.state.ContainerStatus[ncID] = ncInfo
logger.Printf("Updated NC %s host version from %s to %s", ncID, oldHostNCVersion, ncInfo.HostVersion)
// if we successfully updated the NC, pop it from the needs update set.
delete(outdatedNCs, ncID)
}
// if we didn't empty out the needs update set, NMA has not programmed all the NCs we are expecting, and we
// need to return an error indicating that
if len(outdatedNCs) > 0 {
return errors.Errorf("unabled to update some NCs: %v, missing or bad response from NMA", outdatedNCs)
}
return nil
}
Expand Down
17 changes: 13 additions & 4 deletions cns/restserver/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,22 +39,31 @@ var ipConfigStatusStateTransitionTime = prometheus.NewHistogramVec(
[]string{"previous_state", "next_state"},
)

var syncHostNCVersion = prometheus.NewHistogramVec(
var syncHostNCVersionCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "sync_host_nc_version_total",
Help: "Count of Sync Host NC by success or failure",
},
[]string{"ok"},
)

var syncHostNCVersionLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "sync_host_nc_version_seconds",
Name: "sync_host_nc_version_latency_seconds",
Help: "Sync Host NC Latency",
//nolint:gomnd // default bucket consts
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
},
[]string{"success"},
[]string{"ok"},
)

func init() {
metrics.Registry.MustRegister(
httpRequestLatency,
ipAssignmentLatency,
ipConfigStatusStateTransitionTime,
syncHostNCVersion,
syncHostNCVersionCount,
syncHostNCVersionLatency,
)
}

Expand Down