From 86f861440b8f1bdba67b0566a29a82a3cf9ae24b Mon Sep 17 00:00:00 2001 From: Mathew Merrick Date: Tue, 10 May 2022 15:31:30 -0700 Subject: [PATCH 1/6] add cleanup network script --- cni/scripts/cleanupnetwork.ps1 | 76 +++++++++++++++++++++++++++ hack/toolbox/manifests/webserver.yaml | 5 +- 2 files changed, 77 insertions(+), 4 deletions(-) create mode 100644 cni/scripts/cleanupnetwork.ps1 diff --git a/cni/scripts/cleanupnetwork.ps1 b/cni/scripts/cleanupnetwork.ps1 new file mode 100644 index 0000000000..b40ce5e489 --- /dev/null +++ b/cni/scripts/cleanupnetwork.ps1 @@ -0,0 +1,76 @@ +$Global:ClusterConfiguration = ConvertFrom-Json ((Get-Content "c:\k\kubeclusterconfig.json" -ErrorAction Stop) | out-string) + +$global:NetworkMode = "L2Bridge" +$global:ContainerRuntime = $Global:ClusterConfiguration.Cri.Name +$global:NetworkPlugin = $Global:ClusterConfiguration.Cni.Name +$global:HNSModule = "c:\k\hns.psm1" + +ipmo $global:HNSModule + +$networkname = $global:NetworkMode.ToLower() +if ($global:NetworkPlugin -eq "azure") { + $networkname = "azure" +} + +$hnsNetwork = Get-HnsNetwork | ? Name -EQ $networkname +if ($hnsNetwork) { + # Cleanup all containers + Write-Host "Cleaning up containers" + if ($global:ContainerRuntime -eq "containerd") { + ctr.exe -n k8s.io c ls -q | ForEach-Object { ctr -n k8s.io tasks kill $_ } + ctr.exe -n k8s.io c ls -q | ForEach-Object { ctr -n k8s.io c rm $_ } + } + else { + docker.exe ps -q | ForEach-Object { docker rm $_ -f } + } + + Write-Host "Cleaning up persisted HNS policy lists" + # Initially a workaround for https://github.com/kubernetes/kubernetes/pull/68923 in < 1.14, + # and https://github.com/kubernetes/kubernetes/pull/78612 for <= 1.15 + # + # October patch 10.0.17763.1554 introduced a breaking change + # which requires the hns policy list to be removed before network if it gets into a bad state + # See https://github.com/Azure/aks-engine/pull/3956#issuecomment-720797433 for more info + # Kubeproxy doesn't fail becuase errors are not handled: + # https://github.com/delulu/kubernetes/blob/524de768bb64b7adff76792ca3bf0f0ece1e849f/pkg/proxy/winkernel/proxier.go#L532 + Get-HnsPolicyList | Remove-HnsPolicyList + + Write-Host "Cleaning up old HNS network found" + Remove-HnsNetwork $hnsNetwork + Start-Sleep 10 +} + + +if ($global:NetworkPlugin -eq "azure") { + Write-Host "NetworkPlugin azure, starting kubelet." + + Write-Host "Cleaning stale CNI data" + # Kill all cni instances & stale data left by cni + # Cleanup all files related to cni + taskkill /IM azure-vnet.exe /f + taskkill /IM azure-vnet-ipam.exe /f + + # azure-cni logs currently end up in c:\windows\system32 when machines are configured with containerd. + # https://github.com/containerd/containerd/issues/4928 + $filesToRemove = @( + "c:\k\azure-vnet.json", + "c:\k\azure-vnet.json.lock", + "c:\k\azure-vnet-ipam.json", + "c:\k\azure-vnet-ipam.json.lock" + "c:\k\azure-vnet-ipamv6.json", + "c:\k\azure-vnet-ipamv6.json.lock" + "c:\windows\system32\azure-vnet.json", + "c:\windows\system32\azure-vnet.json.lock", + "c:\windows\system32\azure-vnet-ipam.json", + "c:\windows\system32\azure-vnet-ipam.json.lock" + "c:\windows\system32\azure-vnet-ipamv6.json", + "c:\windows\system32\azure-vnet-ipamv6.json.lock" + ) + + foreach ($file in $filesToRemove) { + if (Test-Path $file) { + Write-Host "Deleting stale file at $file" + Remove-Item $file + } + } +} diff --git a/hack/toolbox/manifests/webserver.yaml b/hack/toolbox/manifests/webserver.yaml index 8c52d0e8f9..87bfa9ceda 100644 --- a/hack/toolbox/manifests/webserver.yaml +++ b/hack/toolbox/manifests/webserver.yaml @@ -4,7 +4,6 @@ kind: Service metadata: name: toolbox spec: - ipFamily: IPv6 type: LoadBalancer selector: app: toolbox @@ -19,7 +18,6 @@ kind: Service metadata: name: matmerr-http-v4 spec: - ipFamily: IPv4 type: LoadBalancer selector: app: matmerr-http-v4 @@ -45,11 +43,10 @@ spec: app: toolbox spec: nodeSelector: - beta.kubernetes.io/os: linux kubernetes.io/role: agent containers: - name: toolbox - image: matmerr/toolbox:v6.2 + image: acnpublic.azurecr.io/toolbox:latest env: - name: TCP_PORT value: "8085" From ab4b26018ecda361e5203548e358c1783d60c177 Mon Sep 17 00:00:00 2001 From: Mathew Merrick Date: Mon, 16 May 2022 10:11:57 -0700 Subject: [PATCH 2/6] generalize --- cni/scripts/cleanupnetwork.ps1 | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/cni/scripts/cleanupnetwork.ps1 b/cni/scripts/cleanupnetwork.ps1 index b40ce5e489..24f41ebdfb 100644 --- a/cni/scripts/cleanupnetwork.ps1 +++ b/cni/scripts/cleanupnetwork.ps1 @@ -1,9 +1,12 @@ -$Global:ClusterConfiguration = ConvertFrom-Json ((Get-Content "c:\k\kubeclusterconfig.json" -ErrorAction Stop) | out-string) + # ./cleanupnetwork.ps1 -CniName azure + param ( + [Parameter(Mandatory=$true)][string]$CniName + ) + +Invoke-WebRequest -Uri https://raw.githubusercontent.com/microsoft/SDN/master/Kubernetes/windows/hns.psm1 -OutFile "c:\hns.psm1" -UseBasicParsing $global:NetworkMode = "L2Bridge" -$global:ContainerRuntime = $Global:ClusterConfiguration.Cri.Name -$global:NetworkPlugin = $Global:ClusterConfiguration.Cni.Name -$global:HNSModule = "c:\k\hns.psm1" +$global:HNSModule = "c:\hns.psm1" ipmo $global:HNSModule @@ -13,17 +16,7 @@ if ($global:NetworkPlugin -eq "azure") { } $hnsNetwork = Get-HnsNetwork | ? Name -EQ $networkname -if ($hnsNetwork) { - # Cleanup all containers - Write-Host "Cleaning up containers" - if ($global:ContainerRuntime -eq "containerd") { - ctr.exe -n k8s.io c ls -q | ForEach-Object { ctr -n k8s.io tasks kill $_ } - ctr.exe -n k8s.io c ls -q | ForEach-Object { ctr -n k8s.io c rm $_ } - } - else { - docker.exe ps -q | ForEach-Object { docker rm $_ -f } - } - +if ($hnsNetwork) { Write-Host "Cleaning up persisted HNS policy lists" # Initially a workaround for https://github.com/kubernetes/kubernetes/pull/68923 in < 1.14, # and https://github.com/kubernetes/kubernetes/pull/78612 for <= 1.15 @@ -38,6 +31,8 @@ if ($hnsNetwork) { Write-Host "Cleaning up old HNS network found" Remove-HnsNetwork $hnsNetwork Start-Sleep 10 +} else { + Write-Host "no hns network found with name" $networkname } @@ -73,4 +68,6 @@ if ($global:NetworkPlugin -eq "azure") { Remove-Item $file } } +} else { + Write-Host "network plugin name not recognized, default is \azure" $networkname } From 69fc421d61f4295546eafc0deb1f05717e17f37b Mon Sep 17 00:00:00 2001 From: Mathew Merrick Date: Mon, 16 May 2022 13:03:36 -0700 Subject: [PATCH 3/6] generic cwd --- cni/scripts/cleanupnetwork.ps1 | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/cni/scripts/cleanupnetwork.ps1 b/cni/scripts/cleanupnetwork.ps1 index 24f41ebdfb..f8a7cc880c 100644 --- a/cni/scripts/cleanupnetwork.ps1 +++ b/cni/scripts/cleanupnetwork.ps1 @@ -1,6 +1,7 @@ - # ./cleanupnetwork.ps1 -CniName azure + # ./cleanupnetwork.ps1 -CniDirectory c:\k -NetworkName azure param ( - [Parameter(Mandatory=$true)][string]$CniName + [string]$CniDirectory = "c:\k", + [Parameter(Mandatory=$true)][string]$NetworkName ) Invoke-WebRequest -Uri https://raw.githubusercontent.com/microsoft/SDN/master/Kubernetes/windows/hns.psm1 -OutFile "c:\hns.psm1" -UseBasicParsing @@ -48,18 +49,12 @@ if ($global:NetworkPlugin -eq "azure") { # azure-cni logs currently end up in c:\windows\system32 when machines are configured with containerd. # https://github.com/containerd/containerd/issues/4928 $filesToRemove = @( - "c:\k\azure-vnet.json", - "c:\k\azure-vnet.json.lock", - "c:\k\azure-vnet-ipam.json", - "c:\k\azure-vnet-ipam.json.lock" - "c:\k\azure-vnet-ipamv6.json", - "c:\k\azure-vnet-ipamv6.json.lock" - "c:\windows\system32\azure-vnet.json", - "c:\windows\system32\azure-vnet.json.lock", - "c:\windows\system32\azure-vnet-ipam.json", - "c:\windows\system32\azure-vnet-ipam.json.lock" - "c:\windows\system32\azure-vnet-ipamv6.json", - "c:\windows\system32\azure-vnet-ipamv6.json.lock" + $CniDirectory+"\azure-vnet.json", + $CniDirectory+"\azure-vnet.json.lock", + $CniDirectory+"\azure-vnet-ipam.json", + $CniDirectory+"\azure-vnet-ipam.json.lock" + $CniDirectory+"\azure-vnet-ipamv6.json", + $CniDirectory+"\azure-vnet-ipamv6.json.lock" ) foreach ($file in $filesToRemove) { From 4f944c805f8deb99d644f3a091e813b1e9ff00ef Mon Sep 17 00:00:00 2001 From: Mathew Merrick Date: Mon, 16 May 2022 16:23:42 -0700 Subject: [PATCH 4/6] loop through with prefix --- cni/scripts/cleanupnetwork.ps1 | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/cni/scripts/cleanupnetwork.ps1 b/cni/scripts/cleanupnetwork.ps1 index f8a7cc880c..c564e4d558 100644 --- a/cni/scripts/cleanupnetwork.ps1 +++ b/cni/scripts/cleanupnetwork.ps1 @@ -16,27 +16,16 @@ if ($global:NetworkPlugin -eq "azure") { $networkname = "azure" } -$hnsNetwork = Get-HnsNetwork | ? Name -EQ $networkname -if ($hnsNetwork) { - Write-Host "Cleaning up persisted HNS policy lists" - # Initially a workaround for https://github.com/kubernetes/kubernetes/pull/68923 in < 1.14, - # and https://github.com/kubernetes/kubernetes/pull/78612 for <= 1.15 - # - # October patch 10.0.17763.1554 introduced a breaking change - # which requires the hns policy list to be removed before network if it gets into a bad state - # See https://github.com/Azure/aks-engine/pull/3956#issuecomment-720797433 for more info - # Kubeproxy doesn't fail becuase errors are not handled: - # https://github.com/delulu/kubernetes/blob/524de768bb64b7adff76792ca3bf0f0ece1e849f/pkg/proxy/winkernel/proxier.go#L532 - Get-HnsPolicyList | Remove-HnsPolicyList - Write-Host "Cleaning up old HNS network found" - Remove-HnsNetwork $hnsNetwork - Start-Sleep 10 -} else { - Write-Host "no hns network found with name" $networkname +foreach($net in Get-HnsNetwork) { + Get-HnsPolicyList | Remove-HnsPolicyList + if ($net.Name.StartsWith("azure")) { + Write-Host "Cleaning up old HNS network:" $net.Name + Remove-HnsNetwork $net + Start-Sleep 10 + } } - if ($global:NetworkPlugin -eq "azure") { Write-Host "NetworkPlugin azure, starting kubelet." From 42454fc1ab428bc2a951b0f5ffdd4a19c09ec53b Mon Sep 17 00:00:00 2001 From: Mathew Merrick Date: Thu, 19 May 2022 10:04:09 -0700 Subject: [PATCH 5/6] simplify checks --- cni/scripts/cleanupnetwork.ps1 | 56 +++++++++++++--------------------- 1 file changed, 21 insertions(+), 35 deletions(-) diff --git a/cni/scripts/cleanupnetwork.ps1 b/cni/scripts/cleanupnetwork.ps1 index c564e4d558..4abe4840f8 100644 --- a/cni/scripts/cleanupnetwork.ps1 +++ b/cni/scripts/cleanupnetwork.ps1 @@ -6,17 +6,9 @@ Invoke-WebRequest -Uri https://raw.githubusercontent.com/microsoft/SDN/master/Kubernetes/windows/hns.psm1 -OutFile "c:\hns.psm1" -UseBasicParsing -$global:NetworkMode = "L2Bridge" $global:HNSModule = "c:\hns.psm1" - ipmo $global:HNSModule -$networkname = $global:NetworkMode.ToLower() -if ($global:NetworkPlugin -eq "azure") { - $networkname = "azure" -} - - foreach($net in Get-HnsNetwork) { Get-HnsPolicyList | Remove-HnsPolicyList if ($net.Name.StartsWith("azure")) { @@ -26,32 +18,26 @@ foreach($net in Get-HnsNetwork) { } } -if ($global:NetworkPlugin -eq "azure") { - Write-Host "NetworkPlugin azure, starting kubelet." - - Write-Host "Cleaning stale CNI data" - # Kill all cni instances & stale data left by cni - # Cleanup all files related to cni - taskkill /IM azure-vnet.exe /f - taskkill /IM azure-vnet-ipam.exe /f - - # azure-cni logs currently end up in c:\windows\system32 when machines are configured with containerd. - # https://github.com/containerd/containerd/issues/4928 - $filesToRemove = @( - $CniDirectory+"\azure-vnet.json", - $CniDirectory+"\azure-vnet.json.lock", - $CniDirectory+"\azure-vnet-ipam.json", - $CniDirectory+"\azure-vnet-ipam.json.lock" - $CniDirectory+"\azure-vnet-ipamv6.json", - $CniDirectory+"\azure-vnet-ipamv6.json.lock" - ) - - foreach ($file in $filesToRemove) { - if (Test-Path $file) { - Write-Host "Deleting stale file at $file" - Remove-Item $file - } +Write-Host "Cleaning stale CNI data" +# Kill all cni instances & stale data left by cni +# Cleanup all files related to cni +taskkill /IM azure-vnet.exe /f +taskkill /IM azure-vnet-ipam.exe /f + +# azure-cni logs currently end up in c:\windows\system32 when machines are configured with containerd. +# https://github.com/containerd/containerd/issues/4928 +$filesToRemove = @( + $CniDirectory+"\azure-vnet.json", + $CniDirectory+"\azure-vnet.json.lock", + $CniDirectory+"\azure-vnet-ipam.json", + $CniDirectory+"\azure-vnet-ipam.json.lock" + $CniDirectory+"\azure-vnet-ipamv6.json", + $CniDirectory+"\azure-vnet-ipamv6.json.lock" +) + +foreach ($file in $filesToRemove) { + if (Test-Path $file) { + Write-Host "Deleting stale file at $file" + Remove-Item $file } -} else { - Write-Host "network plugin name not recognized, default is \azure" $networkname } From 09d82ce4f431e97790a2aadfa5523d26ee1c4805 Mon Sep 17 00:00:00 2001 From: Mathew Merrick Date: Thu, 19 May 2022 10:39:35 -0700 Subject: [PATCH 6/6] remove network option --- cni/scripts/cleanupnetwork.ps1 | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cni/scripts/cleanupnetwork.ps1 b/cni/scripts/cleanupnetwork.ps1 index 4abe4840f8..6d290ca9de 100644 --- a/cni/scripts/cleanupnetwork.ps1 +++ b/cni/scripts/cleanupnetwork.ps1 @@ -1,7 +1,6 @@ # ./cleanupnetwork.ps1 -CniDirectory c:\k -NetworkName azure param ( - [string]$CniDirectory = "c:\k", - [Parameter(Mandatory=$true)][string]$NetworkName + [string]$CniDirectory = "c:\windows\system32", ) Invoke-WebRequest -Uri https://raw.githubusercontent.com/microsoft/SDN/master/Kubernetes/windows/hns.psm1 -OutFile "c:\hns.psm1" -UseBasicParsing