Skip to content

Commit

Permalink
fix: reconcile initial state from CRD regardless of existing podInfo (#…
Browse files Browse the repository at this point in the history
…2022)

* fix: reconcile initial state from CRD regardless of existing podInfo

Signed-off-by: Evan Baker <rbtr@users.noreply.github.com>

* fix: add test for no state and pending release in NNC

Signed-off-by: Evan Baker <rbtr@users.noreply.github.com>

* fix: add test for restoring state and pending release in NNC

Signed-off-by: Evan Baker <rbtr@users.noreply.github.com>
  • Loading branch information
rbtr committed Jun 23, 2023
1 parent 0822d0c commit fa2cbef
Show file tree
Hide file tree
Showing 136 changed files with 4,441 additions and 1,442 deletions.
173 changes: 151 additions & 22 deletions cns/restserver/internalapi_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package restserver
import (
"context"
"fmt"
"math/rand"
"os"
"reflect"
"strconv"
Expand All @@ -21,6 +22,8 @@ import (
"github.com/google/uuid"
"github.com/pkg/errors"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/exp/maps"
)

const (
Expand Down Expand Up @@ -336,7 +339,126 @@ func TestReconcileNCWithEmptyState(t *testing.T) {
t.Errorf("Unexpected failure on reconcile with no state %d", returnCode)
}

validateNCStateAfterReconcile(t, nil, expectedNcCount, expectedAssignedPods)
validateNCStateAfterReconcile(t, nil, expectedNcCount, expectedAssignedPods, nil)
}

// TestReconcileNCWithEmptyStateAndPendingRelease tests the case where there is
// no state (node reboot) and there are pending release IPs in the NNC that
// may have been deallocated and should not be made available for assignment
// to pods.
func TestReconcileNCWithEmptyStateAndPendingRelease(t *testing.T) {
restartService()
setEnv(t)
setOrchestratorTypeInternal(cns.KubernetesCRD)

expectedAssignedPods := map[string]cns.PodInfo{}
secondaryIPConfigs := map[string]cns.SecondaryIPConfig{}
for i := 6; i < 22; i++ {
ipaddress := "10.0.0." + strconv.Itoa(i)
secIPConfig := newSecondaryIPConfig(ipaddress, -1)
ipID := uuid.New()
secondaryIPConfigs[ipID.String()] = secIPConfig
}
pendingReleaseIPIDs := func() map[string]cns.PodInfo {
numPending := rand.Intn(len(secondaryIPConfigs)) + 1 //nolint:gosec // weak rand is sufficient in test
pendingIPs := map[string]cns.PodInfo{}
for k := range secondaryIPConfigs {
if numPending == 0 {
break
}
pendingIPs[k] = nil
numPending--
}
return pendingIPs
}()
req := generateNetworkContainerRequest(secondaryIPConfigs, "reconcileNc1", "-1")
returnCode := svc.ReconcileNCState(req, expectedAssignedPods, &v1alpha.NodeNetworkConfig{
Spec: v1alpha.NodeNetworkConfigSpec{
IPsNotInUse: maps.Keys(pendingReleaseIPIDs),
},
})
assert.Equal(t, types.Success, returnCode, "Unexpected failure on reconcile with no state")
// confirm that the correct number of IPs are now PendingRelease
assert.EqualValues(t, len(pendingReleaseIPIDs), len(svc.GetPendingReleaseIPConfigs()))

// validate state
containerStatus := svc.state.ContainerStatus[req.NetworkContainerid]
assert.Equal(t, req.NetworkContainerid, containerStatus.ID, "NCID is not persisted")
assert.Equal(t, req.NetworkContainerType, containerStatus.CreateNetworkContainerRequest.NetworkContainerType, "ContainerType is not persisted")
assert.Equal(t, req.IPConfiguration.IPSubnet.IPAddress, containerStatus.CreateNetworkContainerRequest.IPConfiguration.IPSubnet.IPAddress, "Primary IPAddress doesnt match")
assert.Equal(t, len(req.SecondaryIPConfigs), len(svc.PodIPConfigState), "Secondary IP count doesnt match in PodIpConfig state")
for ipID, ipStatus := range svc.PodIPConfigState {
require.Contains(t, req.SecondaryIPConfigs, ipID, "PodIpConfigState has stale ipID")
assert.Equal(t, req.SecondaryIPConfigs[ipID].IPAddress, ipStatus.IPAddress, "IPSubnet doesnt match")
if expectedAssignedPods[ipID] != nil {
require.Equal(t, types.Assigned, ipStatus.GetState(), "IPState is not Assigned")
require.NotNil(t, ipStatus.PodInfo, "PodInfo is nil")
}
if pendingReleaseIPIDs[ipID] != nil {
require.Equal(t, types.PendingRelease, ipStatus.GetState(), "IPState is not PendingRelease")
}
}
}

func TestReconcileNCWithExistingStateAndPendingRelease(t *testing.T) {
restartService()
setEnv(t)
setOrchestratorTypeInternal(cns.KubernetesCRD)

secondaryIPConfigs := make(map[string]cns.SecondaryIPConfig)
for i := 6; i < 22; i++ {
ipaddress := "10.0.0." + strconv.Itoa(i)
secIPConfig := newSecondaryIPConfig(ipaddress, -1)
ipID := uuid.New()
secondaryIPConfigs[ipID.String()] = secIPConfig
}
expectedAssignedPods := map[string]cns.PodInfo{
"10.0.0.6": cns.NewPodInfo("", "", "reconcilePod1", "PodNS1"),
"10.0.0.7": cns.NewPodInfo("", "", "reconcilePod2", "PodNS1"),
}
pendingReleaseIPIDs := func() map[string]cns.PodInfo {
numPending := rand.Intn(len(secondaryIPConfigs)) + 1 //nolint:gosec // weak rand is sufficient in test
pendingIPs := map[string]cns.PodInfo{}
for k := range secondaryIPConfigs {
if numPending == 0 {
break
}
if _, ok := expectedAssignedPods[secondaryIPConfigs[k].IPAddress]; ok {
continue
}
pendingIPs[k] = nil
numPending--
}
return pendingIPs
}()
req := generateNetworkContainerRequest(secondaryIPConfigs, "reconcileNc1", "-1")
returnCode := svc.ReconcileNCState(req, expectedAssignedPods, &v1alpha.NodeNetworkConfig{
Spec: v1alpha.NodeNetworkConfigSpec{
IPsNotInUse: maps.Keys(pendingReleaseIPIDs),
},
})
if returnCode != types.Success {
t.Errorf("Unexpected failure on reconcile with no state %d", returnCode)
}
// confirm that the correct number of IPs are now PendingRelease
assert.EqualValues(t, len(pendingReleaseIPIDs), len(svc.GetPendingReleaseIPConfigs()))
// validate state
containerStatus := svc.state.ContainerStatus[req.NetworkContainerid]
assert.Equal(t, req.NetworkContainerid, containerStatus.ID, "NCID is not persisted")
assert.Equal(t, req.NetworkContainerType, containerStatus.CreateNetworkContainerRequest.NetworkContainerType, "ContainerType is not persisted")
assert.Equal(t, req.IPConfiguration.IPSubnet.IPAddress, containerStatus.CreateNetworkContainerRequest.IPConfiguration.IPSubnet.IPAddress, "Primary IPAddress doesnt match")
assert.Equal(t, len(req.SecondaryIPConfigs), len(svc.PodIPConfigState), "Secondary IP count doesnt match in PodIpConfig state")
for ipID, ipStatus := range svc.PodIPConfigState {
require.Contains(t, req.SecondaryIPConfigs, ipID, "PodIpConfigState has stale ipID")
assert.Equal(t, req.SecondaryIPConfigs[ipID].IPAddress, ipStatus.IPAddress, "IPSubnet doesnt match")
if expectedAssignedPods[ipID] != nil {
require.Equal(t, types.Assigned, ipStatus.GetState(), "IPState is not Assigned")
require.NotNil(t, ipStatus.PodInfo, "PodInfo is nil")
}
if pendingReleaseIPIDs[ipID] != nil {
require.Equal(t, types.PendingRelease, ipStatus.GetState(), "IPState is not PendingRelease")
}
}
}

func TestReconcileNCWithExistingState(t *testing.T) {
Expand Down Expand Up @@ -378,7 +500,7 @@ func TestReconcileNCWithExistingState(t *testing.T) {
t.Errorf("Unexpected failure on reconcile with no state %d", returnCode)
}

validateNCStateAfterReconcile(t, req, expectedNcCount+1, expectedAssignedPods)
validateNCStateAfterReconcile(t, req, expectedNcCount+1, expectedAssignedPods, nil)
}

func TestReconcileNCWithExistingStateFromInterfaceID(t *testing.T) {
Expand Down Expand Up @@ -422,7 +544,7 @@ func TestReconcileNCWithExistingStateFromInterfaceID(t *testing.T) {
t.Errorf("Unexpected failure on reconcile with no state %d", returnCode)
}

validateNCStateAfterReconcile(t, req, expectedNcCount+1, expectedAssignedPods)
validateNCStateAfterReconcile(t, req, expectedNcCount+1, expectedAssignedPods, nil)
}

func TestReconcileNCWithSystemPods(t *testing.T) {
Expand Down Expand Up @@ -466,7 +588,7 @@ func TestReconcileNCWithSystemPods(t *testing.T) {
}

delete(expectedAssignedPods, "192.168.0.1")
validateNCStateAfterReconcile(t, req, expectedNcCount, expectedAssignedPods)
validateNCStateAfterReconcile(t, req, expectedNcCount, expectedAssignedPods, nil)
}

func setOrchestratorTypeInternal(orchestratorType string) {
Expand Down Expand Up @@ -655,24 +777,23 @@ func generateNetworkContainerRequest(secondaryIps map[string]cns.SecondaryIPConf
return &req
}

func validateNCStateAfterReconcile(t *testing.T, ncRequest *cns.CreateNetworkContainerRequest, expectedNcCount int, expectedAssignedPods map[string]cns.PodInfo) {
func validateNCStateAfterReconcile(t *testing.T, ncRequest *cns.CreateNetworkContainerRequest, expectedNCCount int, expectedAssignedIPs, expectedPendingIPs map[string]cns.PodInfo) {
if ncRequest == nil {
// check svc ContainerStatus will be empty
if len(svc.state.ContainerStatus) != expectedNcCount {
if len(svc.state.ContainerStatus) != expectedNCCount {
t.Fatalf("CNS has some stale ContainerStatus, count: %d, state: %+v", len(svc.state.ContainerStatus), svc.state.ContainerStatus)
}
} else {
validateNetworkRequest(t, *ncRequest)
}

if len(expectedAssignedPods) != len(svc.PodIPIDByPodInterfaceKey) {
t.Fatalf("Unexpected assigned pods, actual: %d, expected: %d", len(svc.PodIPIDByPodInterfaceKey), len(expectedAssignedPods))
if len(expectedAssignedIPs) != len(svc.PodIPIDByPodInterfaceKey) {
t.Fatalf("Unexpected assigned pods, actual: %d, expected: %d", len(svc.PodIPIDByPodInterfaceKey), len(expectedAssignedIPs))
}

for ipaddress, podInfo := range expectedAssignedPods {
ipId := svc.PodIPIDByPodInterfaceKey[podInfo.Key()]
ipConfigstate := svc.PodIPConfigState[ipId]

for ipaddress, podInfo := range expectedAssignedIPs {
ipID := svc.PodIPIDByPodInterfaceKey[podInfo.Key()]
ipConfigstate := svc.PodIPConfigState[ipID]
if ipConfigstate.GetState() != types.Assigned {
t.Fatalf("IpAddress %s is not marked as assigned to Pod: %+v, ipState: %+v", ipaddress, podInfo, ipConfigstate)
}
Expand All @@ -682,7 +803,7 @@ func validateNCStateAfterReconcile(t *testing.T, ncRequest *cns.CreateNetworkCon
t.Fatalf("IpAddress %s is not same, for Pod: %+v, actual ipState: %+v", ipaddress, podInfo, ipConfigstate)
}

// Valdate pod context
// Validate pod context
if reflect.DeepEqual(ipConfigstate.PodInfo, podInfo) != true {
t.Fatalf("OrchestrationContext: is not same, expected: %+v, actual %+v", ipConfigstate.PodInfo, podInfo)
}
Expand All @@ -696,18 +817,26 @@ func validateNCStateAfterReconcile(t *testing.T, ncRequest *cns.CreateNetworkCon

// validate rest of Secondary IPs in Available state
if ncRequest != nil {
for secIpId, secIpConfig := range ncRequest.SecondaryIPConfigs {
if _, exists := expectedAssignedPods[secIpConfig.IPAddress]; exists {
continue
}

for secIPID, secIPConfig := range ncRequest.SecondaryIPConfigs {
// Validate IP state
if secIpConfigState, found := svc.PodIPConfigState[secIpId]; found {
if secIpConfigState.GetState() != types.Available {
t.Fatalf("IPId: %s State is not Available, ipStatus: %+v", secIpId, secIpConfigState)
if secIPConfigState, found := svc.PodIPConfigState[secIPID]; found {
if _, exists := expectedAssignedIPs[secIPConfig.IPAddress]; exists {
if secIPConfigState.GetState() != types.Assigned {
t.Fatalf("IPId: %s State is not Assigned, ipStatus: %+v", secIPID, secIPConfigState)
}
continue
}
if _, exists := expectedPendingIPs[secIPID]; exists {
if secIPConfigState.GetState() != types.PendingRelease {
t.Fatalf("IPId: %s State is not PendingRelease, ipStatus: %+v", secIPID, secIPConfigState)
}
continue
}
if secIPConfigState.GetState() != types.Available {
t.Fatalf("IPId: %s State is not Available, ipStatus: %+v", secIPID, secIPConfigState)
}
} else {
t.Fatalf("IPId: %s, IpAddress: %+v State doesnt exists in PodIp Map", secIpId, secIpConfig)
t.Fatalf("IPId: %s, IpAddress: %+v State doesnt exists in PodIp Map", secIPID, secIPConfig)
}
}
}
Expand Down
40 changes: 15 additions & 25 deletions cns/service/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -1139,34 +1139,24 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn
poolMonitor := ipampool.NewMonitor(httpRestServiceImplementation, scopedcli, clusterSubnetStateChan, &poolOpts)
httpRestServiceImplementation.IPAMPoolMonitor = poolMonitor

// reconcile initial CNS state from CNI or apiserver.
// Only reconcile if there are any existing Pods using NC ips,
// else let the goal state be updated using a regular NNC Reconciler loop
podInfoByIP, err := podInfoByIPProvider.PodInfoByIP()
if err != nil {
return errors.Wrap(err, "failed to provide PodInfoByIP")
}
if len(podInfoByIP) > 0 {
logger.Printf("Reconciling initial CNS state as PodInfoByIP is not empty: %d", len(podInfoByIP))

// apiserver nnc might not be registered or api server might be down and crashloop backof puts us outside of 5-10 minutes we have for
// aks addons to come up so retry a bit more aggresively here.
// will retry 10 times maxing out at a minute taking about 8 minutes before it gives up.
attempt := 0
err = retry.Do(func() error {
attempt++
logger.Printf("reconciling initial CNS state attempt: %d", attempt)
err = reconcileInitialCNSState(ctx, scopedcli, httpRestServiceImplementation, podInfoByIPProvider)
if err != nil {
logger.Errorf("failed to reconcile initial CNS state, attempt: %d err: %v", attempt, err)
}
return errors.Wrap(err, "failed to initialize CNS state")
}, retry.Context(ctx), retry.Delay(initCNSInitalDelay), retry.MaxDelay(time.Minute))
logger.Printf("Reconciling initial CNS state")
// apiserver nnc might not be registered or api server might be down and crashloop backof puts us outside of 5-10 minutes we have for
// aks addons to come up so retry a bit more aggresively here.
// will retry 10 times maxing out at a minute taking about 8 minutes before it gives up.
attempt := 0
err = retry.Do(func() error {
attempt++
logger.Printf("reconciling initial CNS state attempt: %d", attempt)
err = reconcileInitialCNSState(ctx, scopedcli, httpRestServiceImplementation, podInfoByIPProvider)
if err != nil {
return err
logger.Errorf("failed to reconcile initial CNS state, attempt: %d err: %v", attempt, err)
}
logger.Printf("reconciled initial CNS state after %d attempts", attempt)
return errors.Wrap(err, "failed to initialize CNS state")
}, retry.Context(ctx), retry.Delay(initCNSInitalDelay), retry.MaxDelay(time.Minute))
if err != nil {
return err
}
logger.Printf("reconciled initial CNS state after %d attempts", attempt)

// start the pool Monitor before the Reconciler, since it needs to be ready to receive an
// NodeNetworkConfig update by the time the Reconciler tries to send it.
Expand Down
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ require (
github.com/spf13/viper v1.14.0
github.com/stretchr/testify v1.8.2
go.uber.org/zap v1.24.0
golang.org/x/sys v0.3.0
golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1
golang.org/x/sys v0.6.0
google.golang.org/grpc v1.52.0
google.golang.org/protobuf v1.28.1
k8s.io/api v0.26.0
Expand Down
6 changes: 4 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -844,6 +844,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0
golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1 h1:k/i9J1pBpvlfR+9QsetwPyERsqu1GIbi967PQMq3Ivc=
golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1/go.mod h1:V1LtkGg67GoY2N1AnLN78QLrzxkLyJw7RJb1gzOOz9w=
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
Expand Down Expand Up @@ -1023,8 +1025,8 @@ golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.3.0 h1:w8ZOecv6NaNa/zC8944JTU3vz4u6Lagfk4RPQxv92NQ=
golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.3.0 h1:qoo4akIqOcDME5bhc/NgxUdovd6BSS2uMsVjB56q1xI=
Expand Down
27 changes: 27 additions & 0 deletions vendor/golang.org/x/exp/LICENSE

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions vendor/golang.org/x/exp/PATENTS

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit fa2cbef

Please sign in to comment.