New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve kubeadm reset #37831
Improve kubeadm reset #37831
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
/* | ||
Copyright 2014 The Kubernetes Authors. | ||
Copyright 2016 The Kubernetes Authors. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
|
@@ -21,145 +21,207 @@ import ( | |
"io" | ||
"os" | ||
"os/exec" | ||
"path/filepath" | ||
"path" | ||
"strings" | ||
|
||
"github.com/spf13/cobra" | ||
|
||
kubeadmapi "k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm" | ||
"k8s.io/kubernetes/cmd/kubeadm/app/preflight" | ||
kubeadmutil "k8s.io/kubernetes/cmd/kubeadm/app/util" | ||
"k8s.io/kubernetes/pkg/util/initsystem" | ||
) | ||
|
||
// NewCmdReset returns "kubeadm reset" command. | ||
// NewCmdReset returns the "kubeadm reset" command | ||
func NewCmdReset(out io.Writer) *cobra.Command { | ||
var skipPreFlight bool | ||
var skipPreFlight, removeNode bool | ||
cmd := &cobra.Command{ | ||
Use: "reset", | ||
Short: "Run this to revert any changes made to this host by 'kubeadm init' or 'kubeadm join'.", | ||
Run: func(cmd *cobra.Command, args []string) { | ||
r, err := NewReset(skipPreFlight) | ||
r, err := NewReset(skipPreFlight, removeNode) | ||
kubeadmutil.CheckErr(err) | ||
kubeadmutil.CheckErr(r.Run(out)) | ||
}, | ||
} | ||
|
||
cmd.PersistentFlags().BoolVar( | ||
&skipPreFlight, "skip-preflight-checks", false, | ||
"skip preflight checks normally run before modifying the system", | ||
"Skip preflight checks normally run before modifying the system", | ||
) | ||
|
||
cmd.PersistentFlags().BoolVar( | ||
&removeNode, "remove-node", true, | ||
"Remove this node from the pool of nodes in this cluster", | ||
) | ||
|
||
return cmd | ||
} | ||
|
||
type Reset struct{} | ||
type Reset struct { | ||
removeNode bool | ||
} | ||
|
||
func NewReset(skipPreFlight bool) (*Reset, error) { | ||
func NewReset(skipPreFlight, removeNode bool) (*Reset, error) { | ||
if !skipPreFlight { | ||
fmt.Println("Running pre-flight checks") | ||
err := preflight.RunResetCheck() | ||
if err != nil { | ||
fmt.Println("[preflight] Running pre-flight checks...") | ||
|
||
if err := preflight.RunResetCheck(); err != nil { | ||
return nil, &preflight.PreFlightError{Msg: err.Error()} | ||
} | ||
} else { | ||
fmt.Println("Skipping pre-flight checks") | ||
fmt.Println("[preflight] Skipping pre-flight checks...") | ||
} | ||
|
||
return &Reset{}, nil | ||
return &Reset{ | ||
removeNode: removeNode, | ||
}, nil | ||
} | ||
|
||
// cleanDir removes everything in a directory, but not the directory itself: | ||
func cleanDir(path string) { | ||
// If the directory doesn't even exist there's nothing to do, and we do | ||
// not consider this an error: | ||
if _, err := os.Stat(path); os.IsNotExist(err) { | ||
return | ||
} | ||
// Run reverts any changes made to this host by "kubeadm init" or "kubeadm join". | ||
func (r *Reset) Run(out io.Writer) error { | ||
|
||
d, err := os.Open(path) | ||
// Drain and maybe remove the node from the cluster | ||
err := drainAndRemoveNode(r.removeNode) | ||
if err != nil { | ||
fmt.Printf("failed to remove directory: [%v]\n", err) | ||
} | ||
defer d.Close() | ||
names, err := d.Readdirnames(-1) | ||
if err != nil { | ||
fmt.Printf("failed to remove directory: [%v]\n", err) | ||
} | ||
for _, name := range names { | ||
err = os.RemoveAll(filepath.Join(path, name)) | ||
if err != nil { | ||
fmt.Printf("failed to remove directory: [%v]\n", err) | ||
} | ||
} | ||
} | ||
|
||
// resetConfigDir is used to cleanup the files kubeadm writes in /etc/kubernetes/. | ||
func resetConfigDir(configDirPath string) { | ||
dirsToClean := []string{ | ||
filepath.Join(configDirPath, "manifests"), | ||
filepath.Join(configDirPath, "pki"), | ||
fmt.Printf("[reset] Failed to cleanup node: [%v]\n", err) | ||
} | ||
fmt.Printf("Deleting contents of config directories: %v\n", dirsToClean) | ||
for _, dir := range dirsToClean { | ||
cleanDir(dir) | ||
} | ||
|
||
filesToClean := []string{ | ||
filepath.Join(configDirPath, "admin.conf"), | ||
filepath.Join(configDirPath, "kubelet.conf"), | ||
} | ||
fmt.Printf("Deleting files: %v\n", filesToClean) | ||
for _, path := range filesToClean { | ||
err := os.RemoveAll(path) | ||
if err != nil { | ||
fmt.Printf("failed to remove file: [%v]\n", err) | ||
} | ||
} | ||
} | ||
|
||
// Run reverts any changes made to this host by "kubeadm init" or "kubeadm join". | ||
func (r *Reset) Run(out io.Writer) error { | ||
serviceToStop := "kubelet" | ||
initSystem, err := initsystem.GetInitSystem() | ||
if err != nil { | ||
fmt.Printf("%v", err) | ||
fmt.Printf("[reset] Failed to detect init system and stop the kubelet service: %v\n", err) | ||
} else { | ||
fmt.Printf("Stopping the %s service...\n", serviceToStop) | ||
fmt.Printf("[reset] Stopping the %s service...\n", serviceToStop) | ||
if err := initSystem.ServiceStop(serviceToStop); err != nil { | ||
fmt.Printf("failed to stop the %s service", serviceToStop) | ||
fmt.Printf("[reset] Failed to stop the %s service\n", serviceToStop) | ||
} | ||
} | ||
|
||
fmt.Printf("Unmounting directories in /var/lib/kubelet...\n") | ||
fmt.Println("[reset] Unmounting directories in /var/lib/kubelet...") | ||
umountDirsCmd := "cat /proc/mounts | awk '{print $2}' | grep '/var/lib/kubelet' | xargs -r umount" | ||
umountOutputBytes, err := exec.Command("sh", "-c", umountDirsCmd).Output() | ||
if err != nil { | ||
fmt.Printf("failed to unmount directories in /var/lib/kubelet, %s", string(umountOutputBytes)) | ||
fmt.Printf("[reset] Failed to unmount directories in /var/lib/kubelet: %s\n", string(umountOutputBytes)) | ||
} | ||
|
||
dirsToClean := []string{"/var/lib/kubelet"} | ||
// Remove contents from the config and pki directories | ||
resetConfigDir(kubeadmapi.GlobalEnvParams.KubernetesDir, kubeadmapi.GlobalEnvParams.HostPKIPath) | ||
|
||
dirsToClean := []string{"/var/lib/kubelet", "/etc/cni/net.d"} | ||
|
||
// Only clear etcd data when the etcd manifest is found. In case it is not found, we must assume that the user | ||
// provided external etcd endpoints. In that case, it is his own responsibility to reset etcd | ||
if _, err := os.Stat("/etc/kubernetes/manifests/etcd.json"); os.IsNotExist(err) { | ||
dirsToClean = append(dirsToClean, "/var/lib/etcd") | ||
} else { | ||
fmt.Printf("[reset] No etcd manifest found in %q, assuming external etcd.\n", "/etc/kubernetes/manifests/etcd.json") | ||
} | ||
|
||
resetConfigDir("/etc/kubernetes/") | ||
|
||
fmt.Printf("Deleting contents of stateful directories: %v\n", dirsToClean) | ||
fmt.Printf("[reset] Deleting contents of stateful directories: %v\n", dirsToClean) | ||
for _, dir := range dirsToClean { | ||
cleanDir(dir) | ||
} | ||
|
||
dockerCheck := preflight.ServiceCheck{Service: "docker"} | ||
if warnings, errors := dockerCheck.Check(); len(warnings) == 0 && len(errors) == 0 { | ||
fmt.Println("Stopping all running docker containers...") | ||
if err := exec.Command("sh", "-c", "docker ps | grep 'k8s_' | awk '{print $1}' | xargs docker rm --force --volumes").Run(); err != nil { | ||
fmt.Println("failed to stop the running containers") | ||
fmt.Println("[reset] Stopping all running docker containers...") | ||
if err := exec.Command("sh", "-c", "docker ps | grep 'k8s_' | awk '{print $1}' | xargs -r docker rm --force --volumes").Run(); err != nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what would happen if non-docker container runtime used ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we don't support other runtimes than docker for the moment, so that's not a problem for now. |
||
fmt.Println("[reset] Failed to stop the running containers") | ||
} | ||
} else { | ||
fmt.Println("docker doesn't seem to be running, skipping the removal of kubernetes containers") | ||
fmt.Println("[reset] docker doesn't seem to be running, skipping the removal of running kubernetes containers") | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func drainAndRemoveNode(removeNode bool) error { | ||
|
||
hostname, err := os.Hostname() | ||
if err != nil { | ||
return fmt.Errorf("failed to detect node hostname") | ||
} | ||
hostname = strings.ToLower(hostname) | ||
|
||
// TODO: Use the "native" k8s client for this once we're confident the versioned is working | ||
kubeConfigPath := path.Join(kubeadmapi.GlobalEnvParams.KubernetesDir, "kubelet.conf") | ||
|
||
getNodesCmd := fmt.Sprintf("kubectl --kubeconfig %s get nodes | grep %s", kubeConfigPath, hostname) | ||
output, err := exec.Command("sh", "-c", getNodesCmd).Output() | ||
if err != nil || len(output) == 0 { | ||
// kubeadm shouldn't drain and/or remove the node when it doesn't exist anymore | ||
return nil | ||
} | ||
|
||
fmt.Printf("[reset] Draining node: %q\n", hostname) | ||
|
||
output, err = exec.Command("kubectl", "--kubeconfig", kubeConfigPath, "drain", hostname, "--delete-local-data", "--force", "--ignore-daemonsets").Output() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we need to add HttpProxyCheck() in RunResetCheck(). but we need somehow get API URL which written in kubelet.conf. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would these be covered sufficiently by the err handling? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The reason I don't want to add those checks is that the drain/remove functions aren't critical in any way, if it doesn't work, it just won't do the drain and remove things. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. checks can be done optional based on state of "remove-node". if user specifies flag, he/she assumes that it would be done gracefully. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Still, it will try to drain regardless of The TODO I added was to switch over to the go native client, but we're gonna do that in time for v1.6 when we have more time and are confident that the versioned go client ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. comment applicable to both invocations of kubectl, before client-go used. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This situation is very different with reset however, we're expecting that the system could easily have failed to cluster up, so the drain operation may well fail and we intend to keep on going. Provided the error messages resulting from attempting to drain are logical, it's probably ok to let the error for whatever happens bubble up, as we're going to proceed either way. That said, @luxas what happens on pure nodes here? Is kubectl configured there? Does draining only work on masters? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It works on nodes as well, otherwise I wouldn't have added it here... |
||
if err != nil { | ||
return fmt.Errorf("failed to drain node %q [%s]", hostname, output) | ||
} | ||
|
||
if removeNode { | ||
fmt.Printf("[reset] Removing node: %q\n", hostname) | ||
|
||
output, err = exec.Command("kubectl", "--kubeconfig", kubeConfigPath, "delete", "node", hostname).Output() | ||
if err != nil { | ||
return fmt.Errorf("failed to remove node %q [%s]", hostname, output) | ||
} | ||
} | ||
|
||
return nil | ||
} | ||
|
||
// cleanDir removes everything in a directory, but not the directory itself | ||
func cleanDir(filepath string) error { | ||
// If the directory doesn't even exist there's nothing to do, and we do | ||
// not consider this an error | ||
if _, err := os.Stat(filepath); os.IsNotExist(err) { | ||
return nil | ||
} | ||
|
||
d, err := os.Open(filepath) | ||
if err != nil { | ||
return err | ||
} | ||
defer d.Close() | ||
names, err := d.Readdirnames(-1) | ||
if err != nil { | ||
return err | ||
} | ||
for _, name := range names { | ||
err = os.RemoveAll(path.Join(filepath, name)) | ||
if err != nil { | ||
return err | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
// resetConfigDir is used to cleanup the files kubeadm writes in /etc/kubernetes/. | ||
func resetConfigDir(configPathDir, pkiPathDir string) { | ||
dirsToClean := []string{ | ||
path.Join(configPathDir, "manifests"), | ||
pkiPathDir, | ||
} | ||
fmt.Printf("[reset] Deleting contents of config directories: %v\n", dirsToClean) | ||
for _, dir := range dirsToClean { | ||
err := cleanDir(dir) | ||
if err != nil { | ||
fmt.Printf("[reset] Failed to remove directory: %q [%v]\n", dir, err) | ||
} | ||
} | ||
|
||
filesToClean := []string{ | ||
path.Join(configPathDir, "admin.conf"), | ||
path.Join(configPathDir, "kubelet.conf"), | ||
} | ||
fmt.Printf("[reset] Deleting files: %v\n", filesToClean) | ||
for _, path := range filesToClean { | ||
err := os.RemoveAll(path) | ||
if err != nil { | ||
fmt.Printf("[reset] Failed to remove file: %q [%v]\n", path, err) | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe instead of embedding "shell" script here, write small function to call umount individually and report error on particular path ?