Skip to content

Commit

Permalink
Watch node object for podCIDR instead of crash + wait for restart
Browse files Browse the repository at this point in the history
This can save up to 10 seconds before Kubelet decides to restart
the pod after the crash.
  • Loading branch information
jingyuanliang committed May 24, 2024
1 parent f19530f commit 4608b25
Show file tree
Hide file tree
Showing 31 changed files with 260 additions and 96 deletions.
39 changes: 25 additions & 14 deletions scripts/install-cni.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ log() {
echo "$@"
}

fatal() {
echo FATAL: "$@" >&2
exit 1
}

# shellcheck disable=SC2317,SC2329 # when called with $1=calico_ready
calico_ready() {
log "Listing items matching /host/etc/cni/net.d/*calico*.conflist"
Expand Down Expand Up @@ -110,15 +115,23 @@ else
cni_spec=${cni_spec//@cniBandwidthPlugin/}
fi

token=$(</var/run/secrets/kubernetes.io/serviceaccount/token)
host=${KUBERNETES_SERVICE_HOST}
# If host contains a colon (:), it is an IPv6 address, hence needs wrapping
# with [..].
if [[ "${host}" =~ : ]]; then
host="[$host]"
fi
node_url="https://$host:${KUBERNETES_SERVICE_PORT}/api/v1/nodes/${HOSTNAME}"
response=$(curl -k -s -H "Authorization: Bearer $token" "$node_url")
node_url="https://$host:${KUBERNETES_SERVICE_PORT}/api/v1/watch/nodes/${HOSTNAME}"
for attempt in {1..3}; do
log "Watching attempt #${attempt} at ${node_url}"
response=$(grep -m1 . <(curl -k -s -N -H "Authorization: Bearer $(</var/run/secrets/kubernetes.io/serviceaccount/token)" "$node_url" | jq --unbuffered -c '.object | select(.spec.podCIDR != null)'))
[[ -n "${response}" ]] && break
done
if [[ -z "${response}" ]]; then
fatal "Could not successfully watch node and wait for podCIDR."
fi
log "Node object fetched from $node_url"
log "${response}"

if [ "${MIGRATE_TO_DPV2:-}" == "true" ]; then
DPV2_MIGRATION_READY=$(jq '.metadata.labels."cloud.google.com/gke-dpv2-migration-ready"' <<<"$response")
Expand All @@ -134,7 +147,8 @@ if [[ "${ENABLE_CILIUM_PLUGIN}" == "true" ]]; then
if [[ -n "${CILIUM_FAST_START_NAMESPACES:-}" ]]; then
cilium_cni_config=$(jq --arg namespaces "${CILIUM_FAST_START_NAMESPACES:-}" '.["dpv2-fast-start-namespaces"] = $namespaces' <<<"${cilium_cni_config}")
fi
log "Adding Cilium plug-in to the CNI config: ${cilium_cni_config//$'\n'/ }"
log "Adding Cilium plug-in to the CNI config:"
log "${cilium_cni_config//$'\n'/ }"
cni_spec=${cni_spec//@cniCiliumPlugin/, ${cilium_cni_config}}
else
log "Not using Cilium plug-in."
Expand Down Expand Up @@ -218,8 +232,7 @@ function fillSubnetsInCniSpecV2Template {
SUBNETS_REPLACEMENT+=("$(jq -nc --arg subnet "${subnet}" '[{"subnet": $subnet}]')")
ROUTES_REPLACEMENT+=('{"dst": "::/0"}')
else
log "[ERROR] Subnet detected in .spec.podCIDRs '${subnet}' is not a valid IP range"
exit 1
fatal "Subnet detected in .spec.podCIDRs '${subnet}' is not a valid IP range"
fi
done

Expand Down Expand Up @@ -261,13 +274,9 @@ function fillSubnetsInCniSpecLegacyTemplate {
log "PodCIDR IPv4 detected: '${primary_subnet:-}'"
cni_spec=${cni_spec//@ipv4Subnet/[{\"subnet\": \"${primary_subnet:-}\"\}]}
elif is_ipv6_range "${primary_subnet:-}" ; then
log "Primary IPv6 pod range detected '${primary_subnet:-}'. It will only work with new spec template."
exit 1
fatal "Primary IPv6 pod range detected '${primary_subnet:-}'. It will only work with new spec template."
else
log "Response from $node_url"
log "$node"
log "Failed to fetch PodCIDR from K8s API server, primary_subnet=${primary_subnet:-}. Exiting (1)..."
exit 1
fatal "Failed to fetch PodCIDR from K8s API server, primary_subnet=${primary_subnet:-}."
fi

if [ -n "${node_ipv6_addr:-}" ] && [ "${node_ipv6_addr}" != "null" ]; then
Expand Down Expand Up @@ -433,7 +442,8 @@ cilium_wait_or_ignore() {
}

write_and_success() {
log "Creating CNI spec at '${output_file}' with content: ${cni_spec//$'\n'/ }"
log "Creating CNI spec at '${output_file}' with content:"
log "${cni_spec//$'\n'/ }"
write_file "${output_file}" "${cni_spec}"
success
}
Expand All @@ -455,7 +465,8 @@ cilium_watchdog_failure_retry=${CILIUM_WATCHDOG_FAILURE_RETRY:-60}
cilium_watchdog_fast_start_wait=${CILIUM_WATCHDOG_FAST_START_WAIT:-60}

if [[ -n "${CILIUM_FAST_START_NAMESPACES:-}" ]]; then
log "Cilium has fast-start; writing CNI config upfront then wait for ${cilium_watchdog_fast_start_wait}s and start to check Cilium health."
log "Cilium has fast-start; writing CNI config upfront then wait for ${cilium_watchdog_fast_start_wait}s and start to check Cilium health. Content:"
log "${cni_spec//$'\n'/ }"
write_file "${output_file}" "${cni_spec}"
sleep "${cilium_watchdog_fast_start_wait}"s
fi
Expand Down
6 changes: 3 additions & 3 deletions scripts/testcase/testcase-basic-v2-ipv4.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ function before_test() {
*http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0*)
echo '{"ipv6s": ["2600:1900:4000:318:0:7:0:0"]}'
;;
*https://10.0.0.1:443/api/v1/nodes/*)
echo '{
*https://10.0.0.1:443/api/v1/watch/nodes/*)
echo '{"object":{
"metadata": {
"labels": {
},
Expand All @@ -38,7 +38,7 @@ function before_test() {
],
"providerID": "gce://my-gke-project/us-central1-c/gke-my-cluster-default-pool-128bc25d-9c94"
}
}'
}}'
;;
*)
#unsupported
Expand Down
6 changes: 3 additions & 3 deletions scripts/testcase/testcase-basic-v2-ipv6.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ function before_test() {
*http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0*)
echo '{"ipv6s": ["2600:1900:4000:318:0:7:0:0"]}'
;;
*"https://[fd20::1]:443/api/v1/nodes/"*)
echo '{
*"https://[fd20::1]:443/api/v1/watch/nodes/"*)
echo '{"object":{
"metadata": {
"labels": {
},
Expand All @@ -38,7 +38,7 @@ function before_test() {
],
"providerID": "gce://my-gke-project/us-central1-c/gke-my-cluster-default-pool-128bc25d-9c94"
}
}'
}}'
;;
*)
#unsupported
Expand Down
6 changes: 3 additions & 3 deletions scripts/testcase/testcase-basic-v2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ function before_test() {
*http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0*)
echo '{"ipv6s": ["2600:1900:4000:318:0:7:0:0"]}'
;;
*https://kubernetes.default.svc:443/api/v1/nodes/*)
echo '{
*https://kubernetes.default.svc:443/api/v1/watch/nodes/*)
echo '{"object":{
"metadata": {
"labels": {
},
Expand All @@ -38,7 +38,7 @@ function before_test() {
],
"providerID": "gce://my-gke-project/us-central1-c/gke-my-cluster-default-pool-128bc25d-9c94"
}
}'
}}'
;;
*)
#unsupported
Expand Down
6 changes: 3 additions & 3 deletions scripts/testcase/testcase-basic.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ function before_test() {
*http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0*)
echo '{"ipv6s": ["2600:1900:4000:318:0:7:0:0"]}'
;;
*https://kubernetes.default.svc:443/api/v1/nodes/*)
echo '{
*https://kubernetes.default.svc:443/api/v1/watch/nodes/*)
echo '{"object":{
"metadata": {
"labels": {
},
Expand All @@ -36,7 +36,7 @@ function before_test() {
],
"providerID": "gce://my-gke-project/us-central1-c/gke-my-cluster-default-pool-128bc25d-9c94"
}
}'
}}'
;;
*)
#unsupported
Expand Down
6 changes: 3 additions & 3 deletions scripts/testcase/testcase-calico-v2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ function before_test() {
*http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0*)
echo '{"ipv6s": ["2600:1900:4000:318:0:7:0:0"]}'
;;
*https://kubernetes.default.svc:443/api/v1/nodes/*)
echo '{
*https://kubernetes.default.svc:443/api/v1/watch/nodes/*)
echo '{"object":{
"metadata": {
"labels": {
},
Expand All @@ -43,7 +43,7 @@ function before_test() {
],
"providerID": "gce://my-gke-project/us-central1-c/gke-my-cluster-default-pool-128bc25d-9c94"
}
}'
}}'
;;
*)
#unsupported
Expand Down
6 changes: 3 additions & 3 deletions scripts/testcase/testcase-calico.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ function before_test() {
*http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0*)
echo '{"ipv6s": ["2600:1900:4000:318:0:7:0:0"]}'
;;
*https://kubernetes.default.svc:443/api/v1/nodes/*)
echo '{
*https://kubernetes.default.svc:443/api/v1/watch/nodes/*)
echo '{"object":{
"metadata": {
"labels": {
},
Expand All @@ -41,7 +41,7 @@ function before_test() {
],
"providerID": "gce://my-gke-project/us-central1-c/gke-my-cluster-default-pool-128bc25d-9c94"
}
}'
}}'
;;
*)
#unsupported
Expand Down
6 changes: 3 additions & 3 deletions scripts/testcase/testcase-cilium-faststart-v2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ function before_test() {
*http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0*)
echo '{"ipv6s": ["2600:1900:4000:318:0:7:0:0"]}'
;;
*https://kubernetes.default.svc:443/api/v1/nodes/*)
echo '{
*https://kubernetes.default.svc:443/api/v1/watch/nodes/*)
echo '{"object":{
"metadata": {
"labels": {
},
Expand All @@ -40,7 +40,7 @@ function before_test() {
],
"providerID": "gce://my-gke-project/us-central1-c/gke-my-cluster-default-pool-128bc25d-9c94"
}
}'
}}'
;;
*http://localhost:63197/*)
echo 'healthz'
Expand Down
6 changes: 3 additions & 3 deletions scripts/testcase/testcase-cilium-faststart.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ function before_test() {
*http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0*)
echo '{"ipv6s": ["2600:1900:4000:318:0:7:0:0"]}'
;;
*https://kubernetes.default.svc:443/api/v1/nodes/*)
echo '{
*https://kubernetes.default.svc:443/api/v1/watch/nodes/*)
echo '{"object":{
"metadata": {
"labels": {
},
Expand All @@ -38,7 +38,7 @@ function before_test() {
],
"providerID": "gce://my-gke-project/us-central1-c/gke-my-cluster-default-pool-128bc25d-9c94"
}
}'
}}'
;;
*http://localhost:63197/*)
echo 'healthz'
Expand Down
6 changes: 3 additions & 3 deletions scripts/testcase/testcase-cilium-v2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ function before_test() {
*http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0*)
echo '{"ipv6s": ["2600:1900:4000:318:0:7:0:0"]}'
;;
*https://kubernetes.default.svc:443/api/v1/nodes/*)
echo '{
*https://kubernetes.default.svc:443/api/v1/watch/nodes/*)
echo '{"object":{
"metadata": {
"labels": {
},
Expand All @@ -40,7 +40,7 @@ function before_test() {
],
"providerID": "gce://my-gke-project/us-central1-c/gke-my-cluster-default-pool-128bc25d-9c94"
}
}'
}}'
;;
*http://localhost:63197/*)
echo 'healthz'
Expand Down
6 changes: 3 additions & 3 deletions scripts/testcase/testcase-cilium.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ function before_test() {
*http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0*)
echo '{"ipv6s": ["2600:1900:4000:318:0:7:0:0"]}'
;;
*https://kubernetes.default.svc:443/api/v1/nodes/*)
echo '{
*https://kubernetes.default.svc:443/api/v1/watch/nodes/*)
echo '{"object":{
"metadata": {
"labels": {
},
Expand All @@ -38,7 +38,7 @@ function before_test() {
],
"providerID": "gce://my-gke-project/us-central1-c/gke-my-cluster-default-pool-128bc25d-9c94"
}
}'
}}'
;;
*http://localhost:63197/*)
echo 'healthz'
Expand Down
6 changes: 3 additions & 3 deletions scripts/testcase/testcase-directpath-v2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ function before_test() {
# call to GCE metadata server
echo '{"ipv6s": ["2600:1900:4000:318:0:7:0:0"]}'
;;
*https://kubernetes.default.svc:443/api/v1/nodes/*)
*https://kubernetes.default.svc:443/api/v1/watch/nodes/*)
# call to kube-apiserver
echo '{
echo '{"object":{
"metadata": {
"labels": {
},
Expand All @@ -42,7 +42,7 @@ function before_test() {
],
"providerID": "gce://my-gke-project/us-central1-c/gke-my-cluster-default-pool-128bc25d-9c94"
}
}'
}}'
;;
*)
# unmatched call
Expand Down
6 changes: 3 additions & 3 deletions scripts/testcase/testcase-directpath.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ function before_test() {
# call to GCE metadata server
echo '{"ipv6s": ["2600:1900:4000:318:0:7:0:0"]}'
;;
*https://kubernetes.default.svc:443/api/v1/nodes/*)
*https://kubernetes.default.svc:443/api/v1/watch/nodes/*)
# call to kube-apiserver
echo '{
echo '{"object":{
"metadata": {
"labels": {
},
Expand All @@ -40,7 +40,7 @@ function before_test() {
],
"providerID": "gce://my-gke-project/us-central1-c/gke-my-cluster-default-pool-128bc25d-9c94"
}
}'
}}'
;;
*)
# unmatched call
Expand Down
6 changes: 3 additions & 3 deletions scripts/testcase/testcase-dualstack-v2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ function before_test() {
# call to GCE metadata server
echo '{"ipv6s": ["2600:1900:4000:318:0:7:0:0"]}'
;;
*https://kubernetes.default.svc:443/api/v1/nodes/*)
*https://kubernetes.default.svc:443/api/v1/watch/nodes/*)
# call to kube-apiserver
echo '{
echo '{"object":{
"metadata": {
"labels": {
"cloud.google.com/gke-stack-type": "IPV4_IPV6"
Expand All @@ -42,7 +42,7 @@ function before_test() {
],
"providerID": "gce://my-gke-project/us-central1-c/gke-my-cluster-default-pool-128bc25d-9c94"
}
}'
}}'
;;
*)
# unmatched call
Expand Down
6 changes: 3 additions & 3 deletions scripts/testcase/testcase-dualstack.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ function before_test() {
# call to GCE metadata server
echo '{"ipv6s": ["2600:1900:4000:318:0:7:0:0"]}'
;;
*https://kubernetes.default.svc:443/api/v1/nodes/*)
*https://kubernetes.default.svc:443/api/v1/watch/nodes/*)
# call to kube-apiserver
echo '{
echo '{"object":{
"metadata": {
"labels": {
"cloud.google.com/gke-stack-type": "IPV4_IPV6"
Expand All @@ -40,7 +40,7 @@ function before_test() {
],
"providerID": "gce://my-gke-project/us-central1-c/gke-my-cluster-default-pool-128bc25d-9c94"
}
}'
}}'
;;
*)
# unmatched call
Expand Down
6 changes: 3 additions & 3 deletions scripts/testcase/testcase-ipv6-v2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ function before_test() {
# call to GCE metadata server
echo '{"ipv6s": ["2600:1900:4000:318:0:7:0:0"]}'
;;
*https://kubernetes.default.svc:443/api/v1/nodes/*)
*https://kubernetes.default.svc:443/api/v1/watch/nodes/*)
# call to kube-apiserver
echo '{
echo '{"object":{
"metadata": {
"labels": {
"cloud.google.com/gke-stack-type": "IPV6"
Expand All @@ -41,7 +41,7 @@ function before_test() {
],
"providerID": "gce://my-gke-project/us-central1-c/gke-my-cluster-default-pool-128bc25d-9c94"
}
}'
}}'
;;
*)
# unmatched call
Expand Down
Loading

0 comments on commit 4608b25

Please sign in to comment.