Skip to content

Commit a8b5220

Browse files
authored
Merge pull request kubernetes#83654 from nilo19/qi-cherry-pick-83102-release1.13
Cherry pick of kubernetes#83102: Fix aggressive VM calls for Azure VMSS.
2 parents 17c28f0 + 20fd43c commit a8b5220

File tree

4 files changed

+174
-199
lines changed

4 files changed

+174
-199
lines changed

pkg/cloudprovider/providers/azure/azure_controller_vmss.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,9 @@ func (ss *scaleSet) AttachDisk(isManagedDisk bool, diskName, diskURI string, nod
8484
defer cancel()
8585

8686
// Invalidate the cache right after updating
87-
key := buildVmssCacheKey(nodeResourceGroup, ss.makeVmssVMName(ssName, instanceID))
88-
defer ss.vmssVMCache.Delete(key)
87+
if err = ss.deleteCacheForNode(vmName); err != nil {
88+
return err
89+
}
8990

9091
klog.V(2).Infof("azureDisk - update(%s): vm(%s) - attach disk(%s, %s)", nodeResourceGroup, nodeName, diskName, diskURI)
9192
_, err = ss.VirtualMachineScaleSetVMsClient.Update(ctx, nodeResourceGroup, ssName, instanceID, newVM)
@@ -155,8 +156,9 @@ func (ss *scaleSet) DetachDisk(diskName, diskURI string, nodeName types.NodeName
155156
defer cancel()
156157

157158
// Invalidate the cache right after updating
158-
key := buildVmssCacheKey(nodeResourceGroup, ss.makeVmssVMName(ssName, instanceID))
159-
defer ss.vmssVMCache.Delete(key)
159+
if err = ss.deleteCacheForNode(vmName); err != nil {
160+
return nil, err
161+
}
160162

161163
klog.V(2).Infof("azureDisk - update(%s): vm(%s) - detach disk(%s, %s)", nodeResourceGroup, nodeName, diskName, diskURI)
162164
return ss.VirtualMachineScaleSetVMsClient.Update(ctx, nodeResourceGroup, ssName, instanceID, newVM)

pkg/cloudprovider/providers/azure/azure_vmss.go

Lines changed: 87 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"sort"
2424
"strconv"
2525
"strings"
26+
"sync"
2627

2728
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-03-01/compute"
2829
"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2018-07-01/network"
@@ -54,10 +55,8 @@ type scaleSet struct {
5455
// (e.g. master nodes) may not belong to any scale sets.
5556
availabilitySet VMSet
5657

57-
vmssCache *timedCache
58-
vmssVMCache *timedCache
59-
nodeNameToScaleSetMappingCache *timedCache
60-
availabilitySetNodesCache *timedCache
58+
vmssVMCache *timedCache
59+
availabilitySetNodesCache *timedCache
6160
}
6261

6362
// newScaleSet creates a new scaleSet.
@@ -68,22 +67,12 @@ func newScaleSet(az *Cloud) (VMSet, error) {
6867
availabilitySet: newAvailabilitySet(az),
6968
}
7069

71-
ss.nodeNameToScaleSetMappingCache, err = ss.newNodeNameToScaleSetMappingCache()
72-
if err != nil {
73-
return nil, err
74-
}
75-
7670
ss.availabilitySetNodesCache, err = ss.newAvailabilitySetNodesCache()
7771
if err != nil {
7872
return nil, err
7973
}
8074

81-
ss.vmssCache, err = ss.newVmssCache()
82-
if err != nil {
83-
return nil, err
84-
}
85-
86-
ss.vmssVMCache, err = ss.newVmssVMCache()
75+
ss.vmssVMCache, err = ss.newVMSSVirtualMachinesCache()
8776
if err != nil {
8877
return nil, err
8978
}
@@ -93,39 +82,46 @@ func newScaleSet(az *Cloud) (VMSet, error) {
9382

9483
// getVmssVM gets virtualMachineScaleSetVM by nodeName from cache.
9584
// It returns cloudprovider.InstanceNotFound if node does not belong to any scale sets.
96-
func (ss *scaleSet) getVmssVM(nodeName string) (ssName, instanceID string, vm compute.VirtualMachineScaleSetVM, err error) {
97-
instanceID, err = getScaleSetVMInstanceID(nodeName)
98-
if err != nil {
99-
return ssName, instanceID, vm, err
100-
}
85+
func (ss *scaleSet) getVmssVM(nodeName string) (string, string, *compute.VirtualMachineScaleSetVM, error) {
86+
getter := func(nodeName string) (string, string, *compute.VirtualMachineScaleSetVM, error) {
87+
cached, err := ss.vmssVMCache.Get(vmssVirtualMachinesKey)
88+
if err != nil {
89+
return "", "", nil, err
90+
}
10191

102-
ssName, err = ss.getScaleSetNameByNodeName(nodeName)
103-
if err != nil {
104-
return ssName, instanceID, vm, err
105-
}
92+
virtualMachines := cached.(*sync.Map)
93+
if vm, ok := virtualMachines.Load(nodeName); ok {
94+
result := vm.(*vmssVirtualMachinesEntry)
95+
return result.vmssName, result.instanceID, result.virtualMachine, nil
96+
}
10697

107-
if ssName == "" {
108-
return "", "", vm, cloudprovider.InstanceNotFound
98+
return "", "", nil, nil
10999
}
110100

111-
resourceGroup, err := ss.GetNodeResourceGroup(nodeName)
101+
_, err := getScaleSetVMInstanceID(nodeName)
112102
if err != nil {
113-
return "", "", vm, err
103+
return "", "", nil, err
114104
}
115105

116-
klog.V(4).Infof("getVmssVM gets scaleSetName (%q) and instanceID (%q) for node %q", ssName, instanceID, nodeName)
117-
key := buildVmssCacheKey(resourceGroup, ss.makeVmssVMName(ssName, instanceID))
118-
cachedVM, err := ss.vmssVMCache.Get(key)
106+
vmssName, instanceID, vm, err := getter(nodeName)
119107
if err != nil {
120-
return ssName, instanceID, vm, err
108+
return "", "", nil, err
109+
}
110+
if vm != nil {
111+
return vmssName, instanceID, vm, nil
121112
}
122113

123-
if cachedVM == nil {
124-
klog.Errorf("Can't find node (%q) in any scale sets", nodeName)
125-
return ssName, instanceID, vm, cloudprovider.InstanceNotFound
114+
klog.V(3).Infof("Couldn't find VMSS VM with nodeName %s, refreshing the cache", nodeName)
115+
ss.vmssVMCache.Delete(vmssVirtualMachinesKey)
116+
vmssName, instanceID, vm, err = getter(nodeName)
117+
if err != nil {
118+
return "", "", nil, err
126119
}
127120

128-
return ssName, instanceID, *(cachedVM.(*compute.VirtualMachineScaleSetVM)), nil
121+
if vm == nil {
122+
return "", "", nil, cloudprovider.InstanceNotFound
123+
}
124+
return vmssName, instanceID, vm, nil
129125
}
130126

131127
// GetPowerStatusByNodeName returns the power state of the specified node.
@@ -150,20 +146,49 @@ func (ss *scaleSet) GetPowerStatusByNodeName(name string) (powerState string, er
150146

151147
// getCachedVirtualMachineByInstanceID gets scaleSetVMInfo from cache.
152148
// The node must belong to one of scale sets.
153-
func (ss *scaleSet) getVmssVMByInstanceID(resourceGroup, scaleSetName, instanceID string) (vm compute.VirtualMachineScaleSetVM, err error) {
154-
vmName := ss.makeVmssVMName(scaleSetName, instanceID)
155-
key := buildVmssCacheKey(resourceGroup, vmName)
156-
cachedVM, err := ss.vmssVMCache.Get(key)
149+
func (ss *scaleSet) getVmssVMByInstanceID(resourceGroup, scaleSetName, instanceID string) (*compute.VirtualMachineScaleSetVM, error) {
150+
getter := func() (vm *compute.VirtualMachineScaleSetVM, found bool, err error) {
151+
cached, err := ss.vmssVMCache.Get(vmssVirtualMachinesKey)
152+
if err != nil {
153+
return nil, false, err
154+
}
155+
156+
virtualMachines := cached.(*sync.Map)
157+
virtualMachines.Range(func(key, value interface{}) bool {
158+
vmEntry := value.(*vmssVirtualMachinesEntry)
159+
if strings.EqualFold(vmEntry.resourceGroup, resourceGroup) &&
160+
strings.EqualFold(vmEntry.vmssName, scaleSetName) &&
161+
strings.EqualFold(vmEntry.instanceID, instanceID) {
162+
vm = vmEntry.virtualMachine
163+
found = true
164+
return false
165+
}
166+
167+
return true
168+
})
169+
170+
return vm, found, nil
171+
}
172+
173+
vm, found, err := getter()
157174
if err != nil {
158-
return vm, err
175+
return nil, err
176+
}
177+
if found {
178+
return vm, nil
159179
}
160180

161-
if cachedVM == nil {
162-
klog.Errorf("couldn't find vmss virtual machine by scaleSetName (%s) and instanceID (%s)", scaleSetName, instanceID)
163-
return vm, cloudprovider.InstanceNotFound
181+
klog.V(3).Infof("Couldn't find VMSS VM with scaleSetName %q and instanceID %q, refreshing the cache", scaleSetName, instanceID)
182+
ss.vmssVMCache.Delete(vmssVirtualMachinesKey)
183+
vm, found, err = getter()
184+
if err != nil {
185+
return nil, err
186+
}
187+
if !found {
188+
return nil, cloudprovider.InstanceNotFound
164189
}
165190

166-
return *(cachedVM.(*compute.VirtualMachineScaleSetVM)), nil
191+
return vm, nil
167192
}
168193

169194
// GetInstanceIDByNodeName gets the cloud provider ID by node name.
@@ -425,9 +450,15 @@ func (ss *scaleSet) listScaleSets(resourceGroup string) ([]string, error) {
425450
return nil, err
426451
}
427452

428-
ssNames := make([]string, len(allScaleSets))
429-
for i := range allScaleSets {
430-
ssNames[i] = *(allScaleSets[i].Name)
453+
ssNames := make([]string, 0)
454+
for _, vmss := range allScaleSets {
455+
name := *vmss.Name
456+
if vmss.Sku != nil && to.Int64(vmss.Sku.Capacity) == 0 {
457+
klog.V(3).Infof("Capacity of VMSS %q is 0, skipping", name)
458+
continue
459+
}
460+
461+
ssNames = append(ssNames, name)
431462
}
432463

433464
return ssNames, nil
@@ -462,7 +493,7 @@ func (ss *scaleSet) getAgentPoolScaleSets(nodes []*v1.Node) (*[]string, error) {
462493
}
463494

464495
nodeName := nodes[nx].Name
465-
ssName, err := ss.getScaleSetNameByNodeName(nodeName)
496+
ssName, _, _, err := ss.getVmssVM(nodeName)
466497
if err != nil {
467498
return nil, err
468499
}
@@ -561,7 +592,7 @@ func (ss *scaleSet) GetPrimaryInterface(nodeName string) (network.Interface, err
561592
return network.Interface{}, err
562593
}
563594

564-
primaryInterfaceID, err := ss.getPrimaryInterfaceID(vm)
595+
primaryInterfaceID, err := ss.getPrimaryInterfaceID(*vm)
565596
if err != nil {
566597
klog.Errorf("error: ss.GetPrimaryInterface(%s), ss.getPrimaryInterfaceID(), err=%v", nodeName, err)
567598
return network.Interface{}, err
@@ -722,8 +753,9 @@ func (ss *scaleSet) EnsureHostInPool(service *v1.Service, nodeName types.NodeNam
722753
}
723754

724755
// Invalidate the cache since we would update it.
725-
key := buildVmssCacheKey(nodeResourceGroup, ss.makeVmssVMName(ssName, instanceID))
726-
defer ss.vmssVMCache.Delete(key)
756+
if err = ss.deleteCacheForNode(vmName); err != nil {
757+
return err
758+
}
727759

728760
// Update vmssVM with backoff.
729761
ctx, cancel := getContextWithCancel()
@@ -851,8 +883,9 @@ func (ss *scaleSet) ensureBackendPoolDeletedFromNode(service *v1.Service, nodeNa
851883
}
852884

853885
// Invalidate the cache since we would update it.
854-
key := buildVmssCacheKey(nodeResourceGroup, ss.makeVmssVMName(ssName, instanceID))
855-
defer ss.vmssVMCache.Delete(key)
886+
if err = ss.deleteCacheForNode(nodeName); err != nil {
887+
return err
888+
}
856889

857890
// Update vmssVM with backoff.
858891
ctx, cancel := getContextWithCancel()

0 commit comments

Comments
 (0)