Skip to content

Commit

Permalink
Fix tolerations for Kubernetes >= 1.24 (#3731)
Browse files Browse the repository at this point in the history
The taints for control-plane Nodes are changed for cluster version
>= 1.24. Add a new toleration for Pods running on control-plane
Nodes to make sure they can be scheduled.

Signed-off-by: Xu Liu <xliu2@vmware.com>
  • Loading branch information
xliuxu committed May 6, 2022
1 parent 50d33be commit 7673d42
Show file tree
Hide file tree
Showing 10 changed files with 31 additions and 19 deletions.
2 changes: 1 addition & 1 deletion build/charts/antrea/README.md
Expand Up @@ -61,7 +61,7 @@ Kubernetes: `>= 1.16.0-0`
| controller.podLabels | object | `{}` | Labels to be added to antrea-controller Pod. |
| controller.priorityClassName | string | `"system-cluster-critical"` | Prority class to use for the antrea-controller Pod. |
| controller.selfSignedCert | bool | `true` | Indicates whether to use auto-generated self-signed TLS certificates. If false, a Secret named "antrea-controller-tls" must be provided with the following keys: ca.crt, tls.crt, tls.key. |
| controller.tolerations | list | `[{"key":"CriticalAddonsOnly","operator":"Exists"},{"effect":"NoSchedule","key":"node-role.kubernetes.io/master"}]` | Tolerations for the antrea-controller Pod. |
| controller.tolerations | list | `[{"key":"CriticalAddonsOnly","operator":"Exists"},{"effect":"NoSchedule","key":"node-role.kubernetes.io/master"},{"effect":"NoSchedule","key":"node-role.kubernetes.io/control-plane"}]` | Tolerations for the antrea-controller Pod. |
| defaultMTU | int | `0` | Default MTU to use for the host gateway interface and the network interface of each Pod. By default, antrea-agent will discover the MTU of the Node's primary interface and adjust it to accommodate for tunnel encapsulation overhead if applicable. |
| egress.exceptCIDRs | list | `[]` | CIDR ranges to which outbound Pod traffic will not be SNAT'd by Egresses. |
| enableBridgingMode | bool | `false` | Enable bridging mode of Pod network on Nodes, in which the Node's transport interface is connected to the OVS bridge. |
Expand Down
3 changes: 3 additions & 0 deletions build/charts/antrea/values.yaml
Expand Up @@ -203,6 +203,9 @@ controller:
# Allow it to schedule onto master nodes.
- key: node-role.kubernetes.io/master
effect: NoSchedule
# Control-plane taint for Kubernetes >= 1.24.
- key: node-role.kubernetes.io/control-plane
effect: NoSchedule
# -- Node selector for the antrea-controller Pod.
nodeSelector:
kubernetes.io/os: linux
Expand Down
2 changes: 2 additions & 0 deletions build/yamls/antrea-aks.yml
Expand Up @@ -3723,6 +3723,8 @@ spec:
operator: Exists
- effect: NoSchedule
key: node-role.kubernetes.io/master
- effect: NoSchedule
key: node-role.kubernetes.io/control-plane
serviceAccountName: antrea-controller
containers:
- name: antrea-controller
Expand Down
2 changes: 2 additions & 0 deletions build/yamls/antrea-eks.yml
Expand Up @@ -3725,6 +3725,8 @@ spec:
operator: Exists
- effect: NoSchedule
key: node-role.kubernetes.io/master
- effect: NoSchedule
key: node-role.kubernetes.io/control-plane
serviceAccountName: antrea-controller
containers:
- name: antrea-controller
Expand Down
2 changes: 2 additions & 0 deletions build/yamls/antrea-gke.yml
Expand Up @@ -3723,6 +3723,8 @@ spec:
operator: Exists
- effect: NoSchedule
key: node-role.kubernetes.io/master
- effect: NoSchedule
key: node-role.kubernetes.io/control-plane
serviceAccountName: antrea-controller
containers:
- name: antrea-controller
Expand Down
2 changes: 2 additions & 0 deletions build/yamls/antrea-ipsec.yml
Expand Up @@ -3772,6 +3772,8 @@ spec:
operator: Exists
- effect: NoSchedule
key: node-role.kubernetes.io/master
- effect: NoSchedule
key: node-role.kubernetes.io/control-plane
serviceAccountName: antrea-controller
containers:
- name: antrea-controller
Expand Down
2 changes: 2 additions & 0 deletions build/yamls/antrea.yml
Expand Up @@ -3723,6 +3723,8 @@ spec:
operator: Exists
- effect: NoSchedule
key: node-role.kubernetes.io/master
- effect: NoSchedule
key: node-role.kubernetes.io/control-plane
serviceAccountName: antrea-controller
containers:
- name: antrea-controller
Expand Down
28 changes: 15 additions & 13 deletions test/e2e/framework.go
Expand Up @@ -379,12 +379,19 @@ func labelNodeRoleControlPlane() string {
return labelNodeRoleControlPlane
}

func controlPlaneNoScheduleToleration() corev1.Toleration {
func controlPlaneNoScheduleTolerations() []corev1.Toleration {
// the Node taint still uses "master" in K8s v1.20
return corev1.Toleration{
Key: "node-role.kubernetes.io/master",
Operator: corev1.TolerationOpExists,
Effect: corev1.TaintEffectNoSchedule,
return []corev1.Toleration{
{
Key: "node-role.kubernetes.io/master",
Operator: corev1.TolerationOpExists,
Effect: corev1.TaintEffectNoSchedule,
},
{
Key: "node-role.kubernetes.io/control-plane",
Operator: corev1.TolerationOpExists,
Effect: corev1.TaintEffectNoSchedule,
},
}
}

Expand Down Expand Up @@ -1119,8 +1126,7 @@ func (data *TestData) CreatePodOnNodeInNamespace(name, ns string, nodeName, ctrN
}
if nodeName == controlPlaneNodeName() {
// tolerate NoSchedule taint if we want Pod to run on control-plane Node
noScheduleToleration := controlPlaneNoScheduleToleration()
podSpec.Tolerations = []corev1.Toleration{noScheduleToleration}
podSpec.Tolerations = controlPlaneNoScheduleTolerations()
}
pod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Expand Down Expand Up @@ -2390,9 +2396,7 @@ func (data *TestData) createAgnhostPodOnNodeWithAnnotations(name string, ns stri

func (data *TestData) createDaemonSet(name string, ns string, ctrName string, image string, cmd []string, args []string) (*appsv1.DaemonSet, func() error, error) {
podSpec := corev1.PodSpec{
Tolerations: []corev1.Toleration{
controlPlaneNoScheduleToleration(),
},
Tolerations: controlPlaneNoScheduleTolerations(),
Containers: []corev1.Container{
{
Name: ctrName,
Expand Down Expand Up @@ -2464,9 +2468,7 @@ func (data *TestData) waitForDaemonSetPods(timeout time.Duration, dsName string,

func (data *TestData) createStatefulSet(name string, ns string, size int32, ctrName string, image string, cmd []string, args []string, mutateFunc func(*appsv1.StatefulSet)) (*appsv1.StatefulSet, func() error, error) {
podSpec := corev1.PodSpec{
Tolerations: []corev1.Toleration{
controlPlaneNoScheduleToleration(),
},
Tolerations: controlPlaneNoScheduleTolerations(),
Containers: []corev1.Container{
{
Name: ctrName,
Expand Down
3 changes: 1 addition & 2 deletions test/e2e/networkpolicy_test.go
Expand Up @@ -816,8 +816,7 @@ func testIngressPolicyWithEndPort(t *testing.T, data *TestData) {
}
if nodeName == controlPlaneNodeName() {
// tolerate NoSchedule taint if we want Pod to run on control-plane Node
noScheduleToleration := controlPlaneNoScheduleToleration()
podSpec.Tolerations = []corev1.Toleration{noScheduleToleration}
podSpec.Tolerations = controlPlaneNoScheduleTolerations()
}
pod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Expand Down
4 changes: 1 addition & 3 deletions test/e2e/performance_test.go
Expand Up @@ -50,8 +50,6 @@ var (
customizePolicyRules = flag.Int("perf.http.policy_rules", 0, "Number of CIDRs in the network policy")
httpConcurrency = flag.Int("perf.http.concurrency", 1, "Number of multiple requests to make at a time")
realizeTimeout = flag.Duration("perf.realize.timeout", 5*time.Minute, "Timeout of the realization of network policies")
// tolerate NoSchedule taint to let the Pod run on control-plane Node
noScheduleToleration = controlPlaneNoScheduleToleration()
labelSelector = &metav1.LabelSelector{
MatchLabels: map[string]string{"app": perfTestAppLabel},
}
Expand Down Expand Up @@ -118,7 +116,7 @@ func createPerfTestPodDefinition(name, containerName, image string) *corev1.Pod
"kubernetes.io/hostname": controlPlaneNodeName(),
}

podSpec.Tolerations = []corev1.Toleration{noScheduleToleration}
podSpec.Tolerations = controlPlaneNoScheduleTolerations()
pod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Expand Down

0 comments on commit 7673d42

Please sign in to comment.