From 31c1029254e6ebacf27a95610d2b249be0a8829b Mon Sep 17 00:00:00 2001 From: jadarsie Date: Fri, 29 Jan 2021 14:42:33 -0800 Subject: [PATCH 1/2] feat: reworked rotate-certs command --- cmd/deploy_test.go | 12 + cmd/root.go | 22 + cmd/root_test.go | 28 + cmd/rotate_certs.go | 940 +++++++++++------- cmd/rotate_certs_test.go | 505 +++------- cmd/rotatecerts/arm.go | 100 ++ cmd/rotatecerts/internal/interfaces.go | 49 + .../internal/mock_internal/client_mock.go | 282 ++++++ cmd/rotatecerts/internal/mock_internal/doc.go | 7 + cmd/rotatecerts/operations.go | 164 +++ cmd/rotatecerts/operations_test.go | 477 +++++++++ cmd/rotatecerts/wait.go | 322 ++++++ cmd/rotatecerts/wait_test.go | 416 ++++++++ docs/topics/rotate-certs.md | 137 +++ parts/k8s/rotate-certs.ps1 | 86 ++ parts/k8s/rotate-certs.sh | 58 ++ pkg/api/apiloader.go | 21 + pkg/api/apiloader_test.go | 23 + pkg/api/types.go | 13 + pkg/api/types_test.go | 32 + pkg/armhelpers/azurestack/compute.go | 21 + pkg/armhelpers/compute.go | 29 + pkg/armhelpers/interfaces.go | 6 + pkg/armhelpers/mockclients.go | 10 + .../certificate-profile/kubernetes.json | 20 + pkg/engine/templates_generated.go | 184 ++++ pkg/helpers/ssh/scp.go | 4 +- pkg/helpers/ssh/ssh.go | 24 +- pkg/helpers/ssh/types.go | 9 + pkg/kubernetes/client.go | 106 +- pkg/kubernetes/composite_client.go | 457 +++++++++ pkg/kubernetes/composite_client_test.go | 639 ++++++++++++ pkg/kubernetes/internal/interfaces.go | 38 + .../internal/mock_internal/client_mock.go | 216 ++++ pkg/kubernetes/internal/mock_internal/doc.go | 7 + test/e2e/cluster.sh | 122 ++- 36 files changed, 4817 insertions(+), 769 deletions(-) create mode 100644 cmd/rotatecerts/arm.go create mode 100644 cmd/rotatecerts/internal/interfaces.go create mode 100644 cmd/rotatecerts/internal/mock_internal/client_mock.go create mode 100644 cmd/rotatecerts/internal/mock_internal/doc.go create mode 100644 cmd/rotatecerts/operations.go create mode 100644 cmd/rotatecerts/operations_test.go create mode 100644 cmd/rotatecerts/wait.go create mode 100644 cmd/rotatecerts/wait_test.go create mode 100644 docs/topics/rotate-certs.md create mode 100644 parts/k8s/rotate-certs.ps1 create mode 100644 parts/k8s/rotate-certs.sh create mode 100644 pkg/engine/profiles/certificate-profile/kubernetes.json create mode 100644 pkg/kubernetes/composite_client.go create mode 100644 pkg/kubernetes/composite_client_test.go create mode 100644 pkg/kubernetes/internal/interfaces.go create mode 100644 pkg/kubernetes/internal/mock_internal/client_mock.go create mode 100644 pkg/kubernetes/internal/mock_internal/doc.go diff --git a/cmd/deploy_test.go b/cmd/deploy_test.go index 8e8c3d3050..15d39a6733 100644 --- a/cmd/deploy_test.go +++ b/cmd/deploy_test.go @@ -5,6 +5,7 @@ package cmd import ( "fmt" + "io/ioutil" "os" "path" "strconv" @@ -1161,3 +1162,14 @@ func TestAPIModelWithContainerMonitoringAddonWithWorkspaceGuidAndKeyConfigInCmd( }) } } + +func makeTmpFile(t *testing.T, name string) (string, func()) { + tmpF, err := ioutil.TempFile(os.TempDir(), name) + if err != nil { + t.Fatalf("unable to create file: %s", err.Error()) + } + + return tmpF.Name(), func() { + defer os.Remove(tmpF.Name()) + } +} diff --git a/cmd/root.go b/cmd/root.go index a47c4a540e..fb0cbcf4ab 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -18,7 +18,9 @@ import ( "github.com/Azure/aks-engine/pkg/armhelpers" "github.com/Azure/aks-engine/pkg/armhelpers/azurestack" "github.com/Azure/aks-engine/pkg/engine" + "github.com/Azure/aks-engine/pkg/engine/transform" "github.com/Azure/aks-engine/pkg/helpers" + "github.com/Azure/aks-engine/pkg/i18n" "github.com/Azure/aks-engine/pkg/kubernetes" "github.com/Azure/go-autorest/autorest/azure" "github.com/google/uuid" @@ -357,3 +359,23 @@ func getKubeClient(cs *api.ContainerService, interval, timeout time.Duration) (k } return client, nil } + +func writeArtifacts(outputDirectory string, cs *api.ContainerService, apiVersion string, translator *i18n.Translator) error { + ctx := engine.Context{Translator: translator} + tplgen, err := engine.InitializeTemplateGenerator(ctx) + if err != nil { + return errors.Wrap(err, "initializing template generator") + } + tpl, params, err := tplgen.GenerateTemplateV2(cs, engine.DefaultGeneratorCode, BuildTag) + if err != nil { + return errors.Wrap(err, "generating template") + } + if tpl, err = transform.PrettyPrintArmTemplate(tpl); err != nil { + return errors.Wrap(err, "pretty-printing template") + } + if params, err = transform.BuildAzureParametersFile(params); err != nil { + return errors.Wrap(err, "pretty-printing template parameters") + } + w := &engine.ArtifactWriter{Translator: translator} + return w.WriteTLSArtifacts(cs, apiVersion, tpl, params, outputDirectory, true, false) +} diff --git a/cmd/root_test.go b/cmd/root_test.go index c61cccb3fc..a727ecca3e 100644 --- a/cmd/root_test.go +++ b/cmd/root_test.go @@ -21,6 +21,7 @@ import ( "github.com/Azure/aks-engine/pkg/armhelpers" "github.com/Azure/aks-engine/pkg/armhelpers/azurestack/testserver" "github.com/Azure/aks-engine/pkg/helpers" + "github.com/Azure/aks-engine/pkg/i18n" . "github.com/onsi/gomega" ) @@ -581,3 +582,30 @@ func prepareCustomCloudProfile() (*api.ContainerService, error) { return cs, nil } + +func TestWriteArtifacts(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + + cs := api.CreateMockContainerService("testcluster", "1.16.14", 3, 2, false) + _, err := cs.SetPropertiesDefaults(api.PropertiesDefaultsParams{ + IsScale: false, + IsUpgrade: false, + PkiKeySize: helpers.DefaultPkiKeySize, + }) + g.Expect(err).NotTo(HaveOccurred()) + + outdir, del := makeTmpDir(t) + defer del() + + err = writeArtifacts(outdir, cs, "vlabs", &i18n.Translator{}) + g.Expect(err).NotTo(HaveOccurred()) +} + +func makeTmpDir(t *testing.T) (string, func()) { + tmpDir, err := ioutil.TempDir(os.TempDir(), "_tmp_dir") + if err != nil { + t.Fatalf("unable to create dir: %s", err.Error()) + } + return tmpDir, func() { defer os.RemoveAll(tmpDir) } +} diff --git a/cmd/rotate_certs.go b/cmd/rotate_certs.go index ce88d9c7d1..59caa71c6d 100644 --- a/cmd/rotate_certs.go +++ b/cmd/rotate_certs.go @@ -4,513 +4,787 @@ package cmd import ( - "bytes" - "context" "fmt" "io/ioutil" "os" "path" - "strconv" + "path/filepath" "strings" "time" - "github.com/leonelquinteros/gotext" - "github.com/pkg/errors" - log "github.com/sirupsen/logrus" - "github.com/spf13/cobra" - "golang.org/x/crypto/ssh" - v1 "k8s.io/api/core/v1" - + ops "github.com/Azure/aks-engine/cmd/rotatecerts" "github.com/Azure/aks-engine/pkg/api" "github.com/Azure/aks-engine/pkg/api/common" - "github.com/Azure/aks-engine/pkg/armhelpers" "github.com/Azure/aks-engine/pkg/engine" - "github.com/Azure/aks-engine/pkg/engine/transform" "github.com/Azure/aks-engine/pkg/helpers" + "github.com/Azure/aks-engine/pkg/helpers/ssh" "github.com/Azure/aks-engine/pkg/i18n" "github.com/Azure/aks-engine/pkg/kubernetes" + "github.com/pkg/errors" + log "github.com/sirupsen/logrus" + "github.com/spf13/cobra" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) const ( rotateCertsName = "rotate-certs" - rotateCertsShortDescription = "(experimental) Rotate certificates on an existing AKS Engine-created Kubernetes cluster" - rotateCertsLongDescription = "(experimental) Rotate CA, etcd, kubelet, kubeconfig and apiserver certificates in a cluster built with AKS Engine. Rotating certificates can break component connectivity and leave the cluster in an unrecoverable state. Before performing any of these instructions on a live cluster, it is preferrable to backup your cluster state and migrate critical workloads to another cluster." - kubeSystemNamespace = "kube-system" + rotateCertsShortDescription = "Rotate certificates on an existing AKS Engine-created Kubernetes cluster" + rotateCertsLongDescription = "Rotate CA, etcd, kubelet, kubeconfig and apiserver certificates in a cluster built with AKS Engine. Rotating certificates can break component connectivity and leave the cluster in an unrecoverable state. Before performing any of these instructions on a live cluster, it is preferrable to backup your cluster state and migrate critical workloads to another cluster." +) + +const ( + rootUserGroup = "root:root" + etcdUserGroup = "etcd:etcd" + keyPermissions = "600" + crtPermissions = "644" + configPermissions = "600" + + kubeAPIServer = "kube-apiserver" + kubeAddonManager = "kube-addon-manager" + kubeControllerManager = "kube-controller-manager" + kubeScheduler = "kube-scheduler" + + kubeProxyLabels = "component=kube-proxy,k8s-app=kube-proxy,tier=node" + kubeSchedulerLabels = "component=kube-scheduler,tier=control-plane" + + rotateCertsDefaultInterval = 10 * time.Second + rotateCertsDefaultTimeout = 20 * time.Minute + + vmasSSHPort = 22 + vmssSSHPort = 50001 ) +type nodeMap = map[string]*ssh.RemoteHost +type fileMap = map[string]*ssh.RemoteFile + type rotateCertsCmd struct { authProvider // user input - resourceGroupName string - sshFilepath string - masterFQDN string - location string - apiModelPath string - outputDirectory string - - // derived - containerService *api.ContainerService - apiVersion string - locale *gotext.Locale - client armhelpers.AKSEngineClient - masterNodes []v1.Node - agentNodes []v1.Node - sshConfig *ssh.ClientConfig - sshCommandExecuter func(command, masterFQDN, hostname string, port string, config *ssh.ClientConfig) (string, error) + resourceGroupName string + location string + apiModelPath string + newCertsPath string + sshHostURI string + linuxSSHPrivateKeyPath string + outputDirectory string + force bool + + // computed + backupDirectory string + apiVersion string + cs *api.ContainerService + loader *api.Apiloader + newCertsProfile *api.CertificateProfile + kubeClient *kubernetes.CompositeClientSet + armClient *ops.ARMClientWrapper + saTokenNamespaces []string + nodes nodeMap + generateCerts bool + linuxAuthConfig *ssh.AuthConfig + windowsAuthConfig *ssh.AuthConfig + jumpbox *ssh.JumpBox + sshPort int } func newRotateCertsCmd() *cobra.Command { rcc := rotateCertsCmd{ - authProvider: &authArgs{}, - sshCommandExecuter: executeCmd, + authProvider: &authArgs{}, + generateCerts: true, } - command := &cobra.Command{ Use: rotateCertsName, Short: rotateCertsShortDescription, Long: rotateCertsLongDescription, - RunE: rcc.run, + RunE: func(cmd *cobra.Command, args []string) error { + if err := rcc.validateArgs(); err != nil { + return errors.Wrap(err, "validating rotate-certs args") + } + if err := rcc.loadAPIModel(); err != nil { + return errors.Wrap(err, "loading API model") + } + if err := rcc.init(); err != nil { + return errors.Wrap(err, "loading API model") + } + cmd.SilenceUsage = true + return rcc.run() + }, } - f := command.Flags() - f.StringVarP(&rcc.location, "location", "l", "", "location the cluster is deployed in (required)") - f.StringVarP(&rcc.resourceGroupName, "resource-group", "g", "", "the resource group where the cluster is deployed (required)") - f.StringVarP(&rcc.apiModelPath, "api-model", "m", "", "path to the generated apimodel.json file (required)") - f.StringVarP(&rcc.sshFilepath, "ssh", "", "", "the filepath of a valid private ssh key to access the cluster's nodes (required)") - f.StringVar(&rcc.masterFQDN, "master-FQDN", "", "FQDN for the master load balancer") - f.StringVar(&rcc.masterFQDN, "apiserver", "", "apiserver endpoint (required)") - f.StringVarP(&rcc.outputDirectory, "output-directory", "o", "", "output directory where generated TLS artifacts will be saved (derived from DNS prefix if absent)") - _ = f.MarkDeprecated("master-FQDN", "--apiserver is preferred") + f.StringVarP(&rcc.location, "location", "l", "", "Azure location where the cluster is deployed") + f.StringVarP(&rcc.resourceGroupName, "resource-group", "g", "", "the resource group where the cluster is deployed") + f.StringVarP(&rcc.apiModelPath, "api-model", "m", "", "path to the generated apimodel.json file") + f.StringVar(&rcc.sshHostURI, "ssh-host", "", "FQDN, or IP address, of an SSH listener that can reach all nodes in the cluster") + f.StringVar(&rcc.linuxSSHPrivateKeyPath, "linux-ssh-private-key", "", "path to a valid private SSH key to access the cluster's Linux nodes") + _ = command.MarkFlagRequired("location") + _ = command.MarkFlagRequired("resource-group") + _ = command.MarkFlagRequired("api-model") + _ = command.MarkFlagRequired("ssh-host") + _ = command.MarkFlagRequired("linux-ssh-private-key") + + f.StringVarP(&rcc.newCertsPath, "certificate-profile", "", "", "path to a JSON file containing the new set of certificates") + f.BoolVarP(&rcc.force, "force", "", false, "force execution even if API Server is not responsive") addAuthFlags(rcc.getAuthArgs(), f) return command } -func (rcc *rotateCertsCmd) run(cmd *cobra.Command, args []string) error { - - log.Debugf("Start rotating certs") - - var err error - +func (rcc *rotateCertsCmd) validateArgs() (err error) { + locale, err := i18n.LoadTranslations() + if err != nil { + return errors.Wrap(err, "loading translation files") + } + rcc.loader = &api.Apiloader{ + Translator: &i18n.Translator{ + Locale: locale, + }, + } if err = rcc.getAuthArgs().validateAuthArgs(); err != nil { return errors.Wrap(err, "failed to get validate auth args") } - - if rcc.client, err = rcc.authProvider.getClient(); err != nil { - return errors.Wrap(err, "failed to get client") - } - - ctx, cancel := context.WithTimeout(context.Background(), armhelpers.DefaultARMOperationTimeout) - defer cancel() - _, err = rcc.client.EnsureResourceGroup(ctx, rcc.resourceGroupName, rcc.location, nil) + armClient, err := rcc.authProvider.getClient() if err != nil { - return errors.Wrap(err, "ensuring resource group") + return errors.Wrap(err, "failed to get ARM client") } - - // load the cluster configuration. - if _, err = os.Stat(rcc.apiModelPath); os.IsNotExist(err) { - return errors.Errorf("specified api model does not exist (%s)", rcc.apiModelPath) + rcc.armClient = ops.NewARMClientWrapper(armClient, rotateCertsDefaultInterval, rotateCertsDefaultTimeout) + rcc.location = helpers.NormalizeAzureRegion(rcc.location) + if rcc.location == "" { + return errors.New("--location must be specified") } - - rcc.locale, err = i18n.LoadTranslations() - if err != nil { - return errors.Wrap(err, "loading translation files") + if rcc.sshHostURI == "" { + return errors.New("--ssh-host must be specified") } - - log.Debugf("Loading container service") - - apiloader := &api.Apiloader{ - Translator: &i18n.Translator{ - Locale: rcc.locale, - }, + if rcc.linuxSSHPrivateKeyPath == "" { + return errors.New("--linux-ssh-private-key must be specified") + } else if _, err = os.Stat(rcc.linuxSSHPrivateKeyPath); os.IsNotExist(err) { + return errors.Errorf("specified --linux-ssh-private-key does not exist (%s)", rcc.linuxSSHPrivateKeyPath) } - rcc.containerService, rcc.apiVersion, err = apiloader.LoadContainerServiceFromFile(rcc.apiModelPath, true, true, nil) - if err != nil { - return errors.Wrap(err, "parsing the api model") + if rcc.apiModelPath == "" { + return errors.New("--api-model must be specified") + } else if _, err = os.Stat(rcc.apiModelPath); os.IsNotExist(err) { + return errors.Errorf("specified --api-model does not exist (%s)", rcc.apiModelPath) } + if rcc.newCertsPath != "" { + rcc.generateCerts = false + if _, err = os.Stat(rcc.newCertsPath); os.IsNotExist(err) { + return errors.Errorf("specified --certificate-profile does not exist (%s)", rcc.newCertsPath) + } + } if rcc.outputDirectory == "" { - if rcc.containerService.Properties.MasterProfile != nil { - rcc.outputDirectory = path.Join("_output", rcc.containerService.Properties.MasterProfile.DNSPrefix) - } else { - return errors.New("can't determine output directory from nil MasterProfile") + rcc.outputDirectory = path.Join(filepath.Dir(rcc.apiModelPath), "_rotate_certs_output") + if err = os.MkdirAll(rcc.outputDirectory, 0755); err != nil { + return errors.Errorf("error creating output directory (%s)", rcc.outputDirectory) } } - - log.Debugf("Getting cluster nodes") - - err = rcc.getClusterNodes() - if err != nil { - return errors.Wrap(err, "listing cluster nodes") + if _, err := ioutil.ReadDir(rcc.outputDirectory); err != nil { + return errors.Wrapf(err, "reading output directory %s", rcc.outputDirectory) } + return nil +} - log.Infoln("Generating new certificates") - - // reset the certificateProfile and use the exisiting certificate generation code to generate new certificates. - rcc.containerService.Properties.CertificateProfile = &api.CertificateProfile{} - certsGenerated, _, err := rcc.containerService.SetDefaultCerts(api.DefaultCertParams{ - PkiKeySize: helpers.DefaultPkiKeySize, - }) - if !certsGenerated || err != nil { - return errors.Wrap(err, "generating new certificates") +func (rcc *rotateCertsCmd) loadAPIModel() (err error) { + if rcc.cs, rcc.apiVersion, err = rcc.loader.LoadContainerServiceFromFile(rcc.apiModelPath, false, false, nil); err != nil { + return errors.Wrap(err, "error parsing api-model") } - - if _, err = os.Stat(rcc.sshFilepath); os.IsNotExist(err) { - return errors.Errorf("specified ssh filepath does not exist (%s)", rcc.sshFilepath) + if rcc.newCertsPath != "" { + // TODO validate certificates metadata + if rcc.newCertsProfile, err = rcc.loader.LoadCertificateProfileFromFile(rcc.newCertsPath); err != nil { + return errors.Wrap(err, "error parsing certificate-profile") + } } - rcc.setSSHConfig() - - log.Infoln("Rotating apiserver certificate") - - err = rcc.rotateApiserver() - if err != nil { - return errors.Wrap(err, "rotating apiserver") + if rcc.cs.Properties.IsCustomCloudProfile() { + if err = writeCustomCloudProfile(rcc.cs); err != nil { + return errors.Wrap(err, "error writing custom cloud profile") + } + if err = rcc.cs.Properties.SetCustomCloudSpec(api.AzureCustomCloudSpecParams{IsUpgrade: false, IsScale: true}); err != nil { + return errors.Wrap(err, "error parsing the api model") + } } - - log.Infoln("Rotating kubelet certificate") - - err = rcc.rotateKubelet() - if err != nil { - return errors.Wrap(err, "rotating kubelet") + if rcc.cs.Location == "" { + rcc.cs.Location = rcc.location + } else if rcc.cs.Location != rcc.location { + return errors.New("--location flag does not match api-model location") + } + if rcc.cs.Properties.WindowsProfile != nil && !rcc.cs.Properties.WindowsProfile.GetSSHEnabled() { + return errors.New("SSH not enabled on Windows nodes. SSH is required in order to rotate agent nodes certificates") } - log.Infoln("Rotating etcd certificates") + return +} - err = rcc.rotateEtcd(ctx) - if err != nil { - return errors.Wrap(err, "rotating etcd cluster") - } +func (rcc *rotateCertsCmd) init() (err error) { + rcc.saTokenNamespaces = rcc.getNamespacesWithSATokensToRotate() + rcc.backupDirectory = path.Join(filepath.Dir(rcc.apiModelPath), "_rotate_certs_backup") - log.Infoln("Updating kubeconfig") - err = rcc.updateKubeconfig() - if err != nil { - return errors.Wrap(err, "updating kubeconfig") + rcc.linuxAuthConfig = &ssh.AuthConfig{ + User: rcc.cs.Properties.LinuxProfile.AdminUsername, + PrivateKeyPath: rcc.linuxSSHPrivateKeyPath, } + if rcc.cs.Properties.WindowsProfile != nil { + rcc.windowsAuthConfig = &ssh.AuthConfig{ + User: rcc.cs.Properties.WindowsProfile.AdminUsername, + Password: rcc.cs.Properties.WindowsProfile.AdminPassword, + } + } + rcc.sshPort = vmssSSHPort + if rcc.cs.Properties.MasterProfile.IsAvailabilitySet() { + rcc.sshPort = vmasSSHPort + } + rcc.jumpbox = &ssh.JumpBox{URI: rcc.sshHostURI, Port: rcc.sshPort, OperatingSystem: api.Linux, AuthConfig: rcc.linuxAuthConfig} + return +} - log.Debugf("Deleting Service Accoutns") - err = rcc.deleteServiceAccounts() +func (rcc *rotateCertsCmd) run() (err error) { + if err = rcc.backupCerts(); err != nil { + return errors.Wrap(err, "backing up current state") + } + if err = rcc.updateCertificateProfile(); err != nil { + return errors.Wrap(err, "updating certificate profile") + } + rcc.kubeClient, err = rcc.getKubeClient() if err != nil { - return errors.Wrap(err, "deleting service accounts") + return errors.Wrap(err, "creating Kubernetes client") } - log.Debugf("Deleting all pods") - err = rcc.deleteAllPods() - if err != nil { - return errors.Wrap(err, "deleting all the pods") + if !rcc.force { + var resumeClusterAutoscaler func() error + resumeClusterAutoscaler, err = ops.PauseClusterAutoscaler(rcc.kubeClient) + defer func() { + if e := resumeClusterAutoscaler(); e != nil { + log.Warn(e) + } + }() + if err != nil { + return err + } + if err = rcc.waitForNodesReady(rcc.cs.Properties.GetMasterVMNameList()); err != nil { + return err + } + if err = rcc.waitForControlPlaneReadiness(); err != nil { + return err + } } - err = rcc.writeArtifacts() - if err != nil { - return errors.Wrap(err, "writing artifacts") + if err = rcc.rotateMasterCerts(); err != nil { + return errors.Wrap(err, "rotating certificates") + } + if err = rcc.rotateAgentCerts(); err != nil { + return errors.Wrap(err, "rotating certificates") } - log.Infoln("Successfully rotated etcd and cluster certificates.") + if err = rcc.updateAPIModel(); err != nil { + return errors.Wrap(err, "updating apimodel") + } + log.Infoln("Certificate rotation completed") return nil } -func (rcc *rotateCertsCmd) writeArtifacts() error { - ctx := engine.Context{ - Translator: &i18n.Translator{ - Locale: rcc.locale, - }, +func (rcc *rotateCertsCmd) backupCerts() error { + log.Infof("Backing up artifacts to directory %s", rcc.backupDirectory) + if err := writeArtifacts(rcc.backupDirectory, rcc.cs, rcc.apiVersion, rcc.loader.Translator); err != nil { + return errors.Wrap(err, "writing artifacts") } - templateGenerator, err := engine.InitializeTemplateGenerator(ctx) - if err != nil { - return errors.Wrap(err, "initializing template generator") + return nil +} + +func (rcc *rotateCertsCmd) updateCertificateProfile() error { + if rcc.generateCerts { + if err := rcc.generateTLSArtifacts(); err != nil { + return errors.Wrap(err, "generating artifacts") + } + } else { + rcc.cs.Properties.CertificateProfile = rcc.newCertsProfile } - template, parameters, err := templateGenerator.GenerateTemplateV2(rcc.containerService, engine.DefaultGeneratorCode, BuildTag) - if err != nil { - return errors.Wrapf(err, "generating template %s", rcc.apiModelPath) + log.Infof("Writing artifacts to output directory %s", rcc.outputDirectory) + if err := writeArtifacts(rcc.outputDirectory, rcc.cs, rcc.apiVersion, rcc.loader.Translator); err != nil { + return errors.Wrap(err, "writing artifacts") } + return nil +} - if template, err = transform.PrettyPrintArmTemplate(template); err != nil { - return errors.Wrap(err, "pretty-printing template") - } - if parameters, err = transform.BuildAzureParametersFile(parameters); err != nil { - return errors.Wrap(err, "pretty-printing template parameters") +func (rcc *rotateCertsCmd) generateTLSArtifacts() error { + log.Infoln("Generating new certificates") + rcc.cs.Properties.CertificateProfile = &api.CertificateProfile{} + if ok, _, err := rcc.cs.SetDefaultCerts(api.DefaultCertParams{PkiKeySize: helpers.DefaultPkiKeySize}); !ok || err != nil { + return errors.Wrap(err, "generating new certificates") } + return nil +} - writer := &engine.ArtifactWriter{ - Translator: &i18n.Translator{ - Locale: rcc.locale, - }, +// getControlPlaneNodes ... +func (rcc *rotateCertsCmd) getControlPlaneNodes() nodeMap { + nodes := make(nodeMap) + for _, master := range rcc.cs.Properties.GetMasterVMNameList() { + nodes[master] = &ssh.RemoteHost{ + URI: master, + Port: 22, + OperatingSystem: api.Linux, + AuthConfig: rcc.linuxAuthConfig, + Jumpbox: rcc.jumpbox, + } } - return writer.WriteTLSArtifacts(rcc.containerService, rcc.apiVersion, template, parameters, rcc.outputDirectory, true, false) + return nodes } -func (rcc *rotateCertsCmd) getClusterNodes() error { - kubeClient, err := rcc.getKubeClient() +// getAgentNodes ... +func (rcc *rotateCertsCmd) getAgentNodes() (nodeMap, error) { + nodeList, err := rcc.kubeClient.ListNodes() if err != nil { - return errors.Wrap(err, "failed to get Kubernetes Client") + return nil, err + } + nodes := make(nodeMap) + for _, nli := range nodeList.Items { + node := &ssh.RemoteHost{ + URI: nli.Name, + Port: 22, + Jumpbox: rcc.jumpbox, + } + switch api.OSType(strings.Title(nli.Status.NodeInfo.OperatingSystem)) { + case api.Linux: + node.OperatingSystem = api.Linux + node.AuthConfig = rcc.linuxAuthConfig + case api.Windows: + node.OperatingSystem = api.Windows + node.AuthConfig = rcc.windowsAuthConfig + default: + return nil, errors.Errorf("listing nodes, could not determine operating system of node %s", nli.Name) + } + nodes[node.URI] = node } - nodeList, err := kubeClient.ListNodes() - if err != nil { - return errors.Wrap(err, "failed to get cluster nodes") + for k, v := range nodes { + if isMaster(v) { + delete(nodes, k) + } } - for _, node := range nodeList.Items { - if strings.Contains(node.Name, "master") { - rcc.masterNodes = append(rcc.masterNodes, node) - } else { - rcc.agentNodes = append(rcc.agentNodes, node) + return nodes, nil +} + +// distributeCerts copies the new set of certificates to the cluster nodes. +func (rcc *rotateCertsCmd) distributeCerts() (err error) { + log.Info("Distributing certificates") + upload := func(files fileMap, node *ssh.RemoteHost) error { + for _, file := range files { + var co string + if co, err = ssh.CopyToRemote(node, file); err != nil { + log.Debugf("Remote command output: %s", co) + return errors.Wrap(err, "uploading certificate") + } + } + return nil + } + masterCerts, linuxCerts, windowsCerts, e := getFilesToDistribute(rcc.cs, "/etc/kubernetes/rotate-certs/certs") + if e != nil { + return errors.Wrap(e, "collecting files to distribute") + } + for _, node := range rcc.nodes { + log.Debugf("Uploading certificates to node %s", node.URI) + if isMaster(node) { + err = upload(masterCerts, node) + } else if isLinuxAgent(node) { + err = upload(linuxCerts, node) + } else if isWindowsAgent(node) { + err = upload(windowsCerts, node) + } + if err != nil { + return err } } return nil } -func (rcc *rotateCertsCmd) rebootAllNodes(ctx context.Context) error { - vmListPage, err := rcc.client.ListVirtualMachines(ctx, rcc.resourceGroupName) +func (rcc *rotateCertsCmd) rotateMasterCerts() (err error) { + rcc.nodes = rcc.getControlPlaneNodes() if err != nil { - return errors.Wrap(err, "failed to list Virtual Machines in resource group "+rcc.resourceGroupName) + return errors.Wrap(err, "listing cluster nodes") } - vmssListPage, err := rcc.client.ListVirtualMachineScaleSets(ctx, rcc.resourceGroupName) - if err != nil { - return errors.Wrap(err, "failed to list Virtual Machine Scale Sets in resource group "+rcc.resourceGroupName) + if err = rcc.distributeCerts(); err != nil { + return errors.Wrap(err, "distributing certificates") } - for _, vm := range vmListPage.Values() { - err = rcc.client.RestartVirtualMachine(ctx, rcc.resourceGroupName, *vm.Name) - if err != nil { - return errors.Wrap(err, "failed to restart Virtual Machine "+*vm.Name) - } + if err = rcc.backupRemote(); err != nil { + return err } - for _, vmss := range vmssListPage.Values() { - err = rcc.client.RestartVirtualMachineScaleSets(ctx, rcc.resourceGroupName, *vmss.Name, nil) - if err != nil { - return errors.Wrap(err, "failed to restart Virtual Machine Scale Sets "+*vmss.Name) - } + if err = rcc.rotateMasters(); err != nil { + return err + } + if err = rcc.cleanupRemote(); err != nil { + return err + } + if err = rcc.waitForNodesReady(keys(rcc.nodes)); err != nil { + return err + } + if err = rcc.waitForControlPlaneReadiness(); err != nil { + return err } return nil } -func (rcc *rotateCertsCmd) deleteAllPods() error { - kubeClient, err := rcc.getKubeClient() +func (rcc *rotateCertsCmd) rotateAgentCerts() (err error) { + rcc.nodes, err = rcc.getAgentNodes() if err != nil { - return errors.Wrap(err, "failed to get Kubernetes Client") + return errors.Wrap(err, "listing cluster nodes") } - pods, err := kubeClient.ListAllPods() - if err != nil { - return errors.Wrap(err, "failed to get pods") + if err = rcc.distributeCerts(); err != nil { + return errors.Wrap(err, "distributing certificates") } - for _, pod := range pods.Items { - log.Debugf("Deleting pod %s", pod.Name) - err = kubeClient.DeletePod(&pod) - if err != nil { - return errors.Wrap(err, "failed to delete pod "+pod.Name) + if err = rcc.backupRemote(); err != nil { + return err + } + if err = rcc.rotateAgents(); err != nil { + return err + } + if err = rcc.cleanupRemote(); err != nil { + return err + } + if err = rcc.waitForNodesReady(keys(rcc.nodes)); err != nil { + return err + } + log.Info("Recreating service account tokens") + if err = ops.RotateServiceAccountTokens(rcc.kubeClient, rcc.saTokenNamespaces); err != nil { + return err + } + if err = rcc.waitForKubeSystemReadiness(); err != nil { + log.Errorf("waitForKubeSystemReadiness returned an error: %s", err.Error()) + } + return nil +} + +func (rcc *rotateCertsCmd) backupRemote() error { + log.Info("Backing up node certificates") + step := "backup" + for _, node := range rcc.nodes { + if err := execStepsSequence(isLinux, node, execRemoteFunc(remoteBashScript(step))); err != nil { + return errors.Wrapf(err, "executing %s function on remote host %s", step, node.URI) + } + if err := execStepsSequence(isWindowsAgent, node, execRemoteFunc(remotePowershellScript("Backup"))); err != nil { + return errors.Wrapf(err, "executing %s function on remote host %s", step, node.URI) } } return nil } -func (rcc *rotateCertsCmd) deleteServiceAccounts() error { - kubeClient, err := rcc.getKubeClient() - if err != nil { - return errors.Wrap(err, "failed to get Kubernetes Client") +func (rcc *rotateCertsCmd) rotateMasters() error { + log.Info("Rotating control plane certificates") + step := "cp_certs" + for _, node := range rcc.nodes { + log.Debugf("Node: %s. Step: %s", node.URI, step) + if err := execStepsSequence(isMaster, node, execRemoteFunc(remoteBashScript(step))); err != nil { + return errors.Wrapf(err, "executing %s function on remote host %s", step, node.URI) + } } - saList, err := kubeClient.ListServiceAccounts(kubeSystemNamespace) - if err != nil { - return errors.Wrap(err, "failed to get cluster service accounts in namespace "+kubeSystemNamespace) - } - for _, sa := range saList.Items { - switch sa.Name { - case common.KubeDNSAddonName, "kubernetes-dashboard", common.MetricsServerAddonName: - log.Debugf("Deleting service account %s", sa.Name) - err = kubeClient.DeleteServiceAccount(&sa) - if err != nil { - return errors.Wrap(err, "failed to delete service account "+sa.Name) - } + if err := rcc.rebootNodes(rcc.cs.Properties.GetMasterVMNameList()...); err != nil { + return err + } + if err := rcc.waitForVMsRunning(keys(rcc.nodes)); err != nil { + return err + } + if err := rcc.waitForNodesReady(keys(rcc.nodes)); err != nil { + return err + } + log.Info("Rotating proxy certificates") + step = "cp_proxy" + for _, node := range rcc.nodes { + log.Debugf("Node: %s. Step: %s", node.URI, step) + if err := execStepsSequence(isMaster, node, execRemoteFunc(remoteBashScript(step))); err != nil { + return errors.Wrapf(err, "executing %s function on remote host %s", step, node.URI) } } return nil } -func (rcc *rotateCertsCmd) updateKubeconfig() error { - kubeconfig, err := engine.GenerateKubeConfig(rcc.containerService.Properties, rcc.location) - if err != nil { - return errors.Wrap(err, "generating kubeconfig") +func (rcc *rotateCertsCmd) rotateAgents() error { + log.Info("Rotating agents certificates") + step := "agent_certs" + for _, node := range rcc.nodes { + log.Debugf("Node: %s. Step: %s", node.URI, step) + if err := execStepsSequence(isLinuxAgent, node, execRemoteFunc(remoteBashScript(step)), deletePodFunc(rcc.kubeClient, kubeProxyLabels)); err != nil { + return errors.Wrapf(err, "executing %s function on remote host %s", step, node.URI) + } + if err := execStepsSequence(isWindowsAgent, node, execRemoteFunc(remotePowershellScript("Start-CertRotation"))); err != nil { + return errors.Wrapf(err, "executing Start-CertRotation function on remote host %s", node.URI) + } } + return nil +} - for _, host := range rcc.masterNodes { - cmd := "sudo bash -c \"cat > ~/.kube/config << EOL \n" + strings.Replace(kubeconfig, "\"", "\\\"", -1) + "EOL\"" - out, err := rcc.sshCommandExecuter(cmd, rcc.masterFQDN, host.Name, "22", rcc.sshConfig) - if err != nil { - log.Printf("Command %s output: %s\n", cmd, out) - return errors.Wrap(err, "failed replacing kubeconfig file") +func (rcc *rotateCertsCmd) cleanupRemote() error { + log.Infoln("Deleting temporary artifacts from cluster nodes") + step := "cleanup" + for _, node := range rcc.nodes { + log.Debugf("Node: %s. Step: %s", node.URI, step) + if err := execStepsSequence(isLinux, node, execRemoteFunc(remoteBashScript(step))); err != nil { + return errors.Wrapf(err, "executing %s function on remote host %s", step, node.URI) + } + if err := execStepsSequence(isWindowsAgent, node, execRemoteFunc(remotePowershellScript("Clean"))); err != nil { + return errors.Wrapf(err, "executing %s function on remote host %s", step, node.URI) } } return nil } -func (rcc *rotateCertsCmd) getKubeClient() (kubernetes.Client, error) { - kubeconfig, err := engine.GenerateKubeConfig(rcc.containerService.Properties, rcc.location) - if err != nil { - return nil, errors.Wrap(err, "generating kubeconfig") +func (rcc *rotateCertsCmd) updateAPIModel() error { + log.Infof("Generating new artifacts") + if err := writeArtifacts(filepath.Dir(rcc.apiModelPath), rcc.cs, rcc.apiVersion, rcc.loader.Translator); err != nil { + return errors.Wrap(err, "writing artifacts") } - var kubeClient kubernetes.Client - if rcc.client != nil { - kubeClient, err = rcc.client.GetKubernetesClient("", kubeconfig, time.Second*1, time.Duration(60)*time.Minute) - if err != nil { - return nil, errors.Wrap(err, "failed to get a Kubernetes client") - } - return kubeClient, nil + if err := os.RemoveAll(rcc.outputDirectory); err != nil { + return errors.Wrap(err, "deleting output directory") } - return nil, errors.Wrap(err, "AKSEngineClient was nil") + return nil } -// Rotate etcd CA and certificates in all of the master nodes. -func (rcc *rotateCertsCmd) rotateEtcd(ctx context.Context) error { - caPrivateKeyCmd := "sudo bash -c \"cat > /etc/kubernetes/certs/ca.key << EOL \n" + rcc.containerService.Properties.CertificateProfile.CaPrivateKey + "EOL\"" - caCertificateCmd := "sudo bash -c \"cat > /etc/kubernetes/certs/ca.crt << EOL \n" + rcc.containerService.Properties.CertificateProfile.CaCertificate + "EOL\"" - etcdServerPrivateKeyCmd := "sudo bash -c \"cat > /etc/kubernetes/certs/etcdserver.key << EOL \n" + rcc.containerService.Properties.CertificateProfile.EtcdServerPrivateKey + "EOL\"" - etcdServerCertificateCmd := "sudo bash -c \"cat > /etc/kubernetes/certs/etcdserver.crt << EOL \n" + rcc.containerService.Properties.CertificateProfile.EtcdServerCertificate + "EOL\"" - etcdClientPrivateKeyCmd := "sudo bash -c \"cat > /etc/kubernetes/certs/etcdclient.key << EOL \n" + rcc.containerService.Properties.CertificateProfile.EtcdClientPrivateKey + "EOL\"" - etcdClientCertificateCmd := "sudo bash -c \"cat > /etc/kubernetes/certs/etcdclient.crt << EOL \n" + rcc.containerService.Properties.CertificateProfile.EtcdClientCertificate + "EOL\"" - - for i, host := range rcc.masterNodes { - log.Debugf("Ranging over node: %s\n", host.Name) - etcdPeerPrivateKeyCmd := "sudo bash -c \"cat > /etc/kubernetes/certs/etcdpeer" + strconv.Itoa(i) + ".key << EOL \n" + rcc.containerService.Properties.CertificateProfile.EtcdPeerPrivateKeys[i] + "EOL\"" - etcdPeerCertificateCmd := "sudo bash -c \"cat > /etc/kubernetes/certs/etcdpeer" + strconv.Itoa(i) + ".crt << EOL \n" + rcc.containerService.Properties.CertificateProfile.EtcdPeerCertificates[i] + "EOL\"" - - for _, cmd := range []string{caPrivateKeyCmd, caCertificateCmd} { - out, err := rcc.sshCommandExecuter(cmd, rcc.masterFQDN, host.Name, "22", rcc.sshConfig) - if err != nil { - log.Printf("Command %s output: %s\n", cmd, out) - return errors.Wrap(err, "failed replacing certificate file") - } - } - - for _, cmd := range []string{etcdServerPrivateKeyCmd, etcdServerCertificateCmd, etcdClientPrivateKeyCmd, etcdClientCertificateCmd, etcdPeerPrivateKeyCmd, etcdPeerCertificateCmd} { - out, err := rcc.sshCommandExecuter(cmd, rcc.masterFQDN, host.Name, "22", rcc.sshConfig) - if err != nil { - log.Printf("Command %s output: %s\n", cmd, out) - return errors.Wrap(err, "failed replacing certificate file") - } +func execStepsSequence(cond nodeCondition, node *ssh.RemoteHost, steps ...func(node *ssh.RemoteHost) error) error { + if !cond(node) { + return nil + } + for _, step := range steps { + if err := step(node); err != nil { + return err } } + return nil +} - log.Infoln("Rebooting all nodes... This might take a few minutes") - err := rcc.rebootAllNodes(ctx) - if err != nil { - return errors.Wrap(err, "rebooting the nodes") +func execRemoteFunc(script string) func(node *ssh.RemoteHost) error { + return func(node *ssh.RemoteHost) error { + out, err := ssh.ExecuteRemote(node, script) + if err != nil { + log.Debugf("Remote command output: %s", out) + } + return err } +} - for _, host := range rcc.masterNodes { - log.Debugf("Restarting etcd on node %s", host.Name) - out, err := rcc.sshCommandExecuter("sudo systemctl restart etcd", rcc.masterFQDN, host.Name, "22", rcc.sshConfig) +func deletePodFunc(client *kubernetes.CompositeClientSet, labels string) func(node *ssh.RemoteHost) error { + return func(node *ssh.RemoteHost) error { + err := client.DeletePods(metav1.NamespaceSystem, metav1.ListOptions{ + FieldSelector: fmt.Sprintf("spec.nodeName=%s", node.URI), + LabelSelector: labels, + }) if err != nil { - log.Printf("Command `sudo systemctl restart etcd` output: %s\n", out) - return errors.Wrap(err, "failed to restart etcd") + return errors.Wrapf(err, "deleting pod with labels %s from node %s", labels, node.URI) } + return nil } +} +// waitForControlPlaneReadiness checks that the control plane components are in a healthy state before we move to the next step. +func (rcc *rotateCertsCmd) waitForControlPlaneReadiness() error { + log.Info("Checking health of control plane components") + pods := make([]string, 0) + for _, n := range rcc.cs.Properties.GetMasterVMNameList() { + for _, c := range []string{kubeAddonManager, kubeAPIServer, kubeControllerManager, kubeScheduler} { + pods = append(pods, fmt.Sprintf("%s-%s", c, n)) + } + } + if err := ops.WaitForReady(rcc.kubeClient, metav1.NamespaceSystem, pods, rotateCertsDefaultInterval, rotateCertsDefaultTimeout, rcc.nodes); err != nil { + return errors.Wrap(err, "waiting for control plane containers to reach the Ready state within the timeout period") + } return nil } -// From the first master node, rotate apiserver certificates in the nodes. -func (rcc *rotateCertsCmd) rotateApiserver() error { - caCertificateCmd := "sudo bash -c \"cat > /etc/kubernetes/certs/ca.crt << EOL \n" + rcc.containerService.Properties.CertificateProfile.CaCertificate + "EOL\"" - apiServerPrivateKeyCmd := "sudo bash -c \"cat > /etc/kubernetes/certs/apiserver.key << EOL \n" + rcc.containerService.Properties.CertificateProfile.APIServerPrivateKey + "EOL\"" - apiServerCertificateCmd := "sudo bash -c \"cat > /etc/kubernetes/certs/apiserver.crt << EOL \n" + rcc.containerService.Properties.CertificateProfile.APIServerCertificate + "EOL\"" - - for _, host := range rcc.masterNodes { - log.Debugf("Ranging over node: %s\n", host.Name) - for _, cmd := range []string{apiServerPrivateKeyCmd, apiServerCertificateCmd} { - out, err := rcc.sshCommandExecuter(cmd, rcc.masterFQDN, host.Name, "22", rcc.sshConfig) - if err != nil { - log.Printf("Command %s output: %s\n", cmd, out) - return errors.Wrap(err, "failed replacing certificate file") - } - } +func (rcc *rotateCertsCmd) waitForNodesReady(nodes []string) error { + log.Infof("Waiting for cluster nodes readiness: %s", nodes) + if err := ops.WaitForNodesReady(rcc.kubeClient, nodes, rotateCertsDefaultInterval, rotateCertsDefaultTimeout); err != nil { + return errors.Wrap(err, "waiting for cluster nodes readiness") } + return nil +} - for _, host := range rcc.agentNodes { - log.Debugf("Ranging over node: %s\n", host.Name) - for _, cmd := range []string{caCertificateCmd} { - out, err := rcc.sshCommandExecuter(cmd, rcc.masterFQDN, host.Name, "22", rcc.sshConfig) - if err != nil { - log.Printf("Command %s output: %s\n", cmd, out) - return errors.Wrap(err, "failed replacing certificate file") - } +func (rcc *rotateCertsCmd) waitForVMsRunning(nodes []string) error { + if rcc.cs.Properties.MasterProfile.IsAvailabilitySet() { + if err := ops.WaitForVMsRunning(rcc.armClient, rcc.resourceGroupName, nodes, rotateCertsDefaultInterval, rotateCertsDefaultTimeout); err != nil { + return errors.Wrap(err, "waiting for VMs to reach the running state") + } + } else { + vmssName := fmt.Sprintf("%svmss", rcc.cs.Properties.GetMasterVMPrefix()) + count := rcc.cs.Properties.MasterProfile.Count + if err := ops.WaitForVMSSIntancesRunning(rcc.armClient, rcc.resourceGroupName, vmssName, count, rotateCertsDefaultInterval, rotateCertsDefaultTimeout); err != nil { + return errors.Wrap(err, "waiting for VMs to reach the running state") } } return nil } -func (rcc *rotateCertsCmd) rotateKubelet() error { - clientCertificateCmd := "sudo bash -c \"cat > /etc/kubernetes/certs/client.crt << EOL \n" + rcc.containerService.Properties.CertificateProfile.ClientCertificate + "EOL\"" - clientPrivateKeyCmd := "sudo bash -c \"cat > /etc/kubernetes/certs/client.key << EOL \n" + rcc.containerService.Properties.CertificateProfile.ClientPrivateKey + "EOL\"" +// waitForKubeSystemReadiness checks that all kube-system pods are in a healthy state before we move to the next step. +func (rcc *rotateCertsCmd) waitForKubeSystemReadiness() error { + log.Info("Checking health of all kube-system pods") + timeout := time.Duration(len(rcc.nodes)) * time.Duration(float64(time.Minute)*1.25) + if rotateCertsDefaultTimeout > timeout { + timeout = rotateCertsDefaultTimeout + } + if err := ops.WaitForAllInNamespaceReady(rcc.kubeClient, metav1.NamespaceSystem, rotateCertsDefaultInterval, timeout, rcc.nodes); err != nil { + return errors.Wrap(err, "waiting for kube-system containers to reach the Ready state within the timeout period") + } + return nil +} - for _, host := range append(rcc.masterNodes, rcc.agentNodes...) { - log.Debugf("Ranging over node: %s\n", host.Name) - for _, cmd := range []string{clientCertificateCmd, clientPrivateKeyCmd} { - out, err := rcc.sshCommandExecuter(cmd, rcc.masterFQDN, host.Name, "22", rcc.sshConfig) - if err != nil { - log.Printf("Command %s output: %s\n", cmd, out) - return errors.Wrap(err, "failed replacing certificate file") +func (rcc *rotateCertsCmd) rebootNodes(nodes ...string) error { + log.Info("Rebooting control plane nodes") + if rcc.cs.Properties.MasterProfile.IsAvailabilitySet() { + for _, node := range nodes { + log.Debugf("Node: %s. Step: reboot", node) + if err := rcc.armClient.RestartVirtualMachine(rcc.resourceGroupName, node); err != nil { + return errors.Wrapf(err, "rebooting host %s", node) } } + } else { + vmssName := fmt.Sprintf("%svmss", rcc.cs.Properties.GetMasterVMPrefix()) + if err := rcc.armClient.RestartVirtualMachineScaleSets(rcc.resourceGroupName, vmssName); err != nil { + return errors.Wrapf(err, "rebooting vmss %s", vmssName) + } } return nil } -func (rcc *rotateCertsCmd) setSSHConfig() { - rcc.sshConfig = &ssh.ClientConfig{ - HostKeyCallback: ssh.InsecureIgnoreHostKey(), - User: "azureuser", - Auth: []ssh.AuthMethod{ - publicKeyFile(rcc.sshFilepath), - }, +func (rcc *rotateCertsCmd) getKubeClient() (*kubernetes.CompositeClientSet, error) { + configPathSuffix := path.Join("kubeconfig", fmt.Sprintf("kubeconfig.%s.json", rcc.location)) + + oldConfigPath := path.Join(rcc.backupDirectory, configPathSuffix) + oldConfig, err := ioutil.ReadFile(oldConfigPath) + if err != nil { + return nil, errors.Wrapf(err, "reading %s", oldConfigPath) + } + oldCAClient, err := kubernetes.NewClient("", string(oldConfig), rotateCertsDefaultInterval, rotateCertsDefaultTimeout) + if err != nil { + return nil, errors.Wrapf(err, "creating client from %s", oldConfigPath) + } + + newConfigPath := path.Join(rcc.outputDirectory, configPathSuffix) + newConfig, err := ioutil.ReadFile(newConfigPath) + if err != nil { + return nil, errors.Wrapf(err, "reading %s", newConfigPath) + } + newCAClient, err := kubernetes.NewClient("", string(newConfig), rotateCertsDefaultInterval, rotateCertsDefaultTimeout) + if err != nil { + return nil, errors.Wrapf(err, "creating client from %s", newConfigPath) } + + return kubernetes.NewCompositeClient(oldCAClient, newCAClient, rotateCertsDefaultInterval, rotateCertsDefaultTimeout), nil } -func publicKeyFile(file string) ssh.AuthMethod { - buffer, err := ioutil.ReadFile(file) +func getFilesToDistribute(cs *api.ContainerService, dir string) (fileMap, fileMap, fileMap, error) { + p := cs.Properties.CertificateProfile + + kubeconfig, err := remoteKubeConfig(cs, dir) if err != nil { - return nil + return nil, nil, nil, errors.Wrap(err, "generating new kubeconfig") } - key, err := ssh.ParsePrivateKey(buffer) + linuxScript, err := loadLinuxScript() if err != nil { - return nil + return nil, nil, nil, errors.Wrap(err, "loading rotate-certs.sh") } - return ssh.PublicKeys(key) + windowsScript, err := loadWindowsScript() + if err != nil { + return nil, nil, nil, errors.Wrap(err, "loading rotate-certs.ps1") + } + + masterFiles := fileMap{ + "apiserver.crt": ssh.NewRemoteFile(path.Join(dir, "apiserver.crt"), crtPermissions, rootUserGroup, []byte(p.APIServerCertificate)), + "apiserver.key": ssh.NewRemoteFile(path.Join(dir, "apiserver.key"), keyPermissions, rootUserGroup, []byte(p.APIServerPrivateKey)), + "ca.crt": ssh.NewRemoteFile(path.Join(dir, "ca.crt"), crtPermissions, rootUserGroup, []byte(p.CaCertificate)), + "ca.key": ssh.NewRemoteFile(path.Join(dir, "ca.key"), keyPermissions, rootUserGroup, []byte(p.CaPrivateKey)), + "client.crt": ssh.NewRemoteFile(path.Join(dir, "client.crt"), crtPermissions, rootUserGroup, []byte(p.ClientCertificate)), + "client.key": ssh.NewRemoteFile(path.Join(dir, "client.key"), keyPermissions, rootUserGroup, []byte(p.ClientPrivateKey)), + "etcdclient.crt": ssh.NewRemoteFile(path.Join(dir, "etcdclient.crt"), crtPermissions, rootUserGroup, []byte(p.EtcdClientCertificate)), + "etcdclient.key": ssh.NewRemoteFile(path.Join(dir, "etcdclient.key"), keyPermissions, rootUserGroup, []byte(p.EtcdClientPrivateKey)), + "etcdserver.crt": ssh.NewRemoteFile(path.Join(dir, "etcdserver.crt"), crtPermissions, rootUserGroup, []byte(p.EtcdServerCertificate)), + "etcdserver.key": ssh.NewRemoteFile(path.Join(dir, "etcdserver.key"), keyPermissions, etcdUserGroup, []byte(p.EtcdServerPrivateKey)), + "kubectlClient.crt": ssh.NewRemoteFile(path.Join(dir, "kubectlClient.crt"), crtPermissions, rootUserGroup, []byte(p.KubeConfigCertificate)), + "kubectlClient.key": ssh.NewRemoteFile(path.Join(dir, "kubectlClient.key"), keyPermissions, rootUserGroup, []byte(p.KubeConfigPrivateKey)), + "kubeconfig": kubeconfig, + "script": linuxScript, + } + for i := 0; i < cs.Properties.MasterProfile.Count; i++ { + crt := fmt.Sprintf("etcdpeer%d.crt", i) + masterFiles[crt] = ssh.NewRemoteFile(path.Join(dir, crt), crtPermissions, etcdUserGroup, []byte(p.EtcdPeerCertificates[i])) + key := fmt.Sprintf("etcdpeer%d.key", i) + masterFiles[key] = ssh.NewRemoteFile(path.Join(dir, key), keyPermissions, etcdUserGroup, []byte(p.EtcdPeerPrivateKeys[i])) + } + linuxFiles := fileMap{ + "ca.crt": masterFiles["ca.crt"], + "client.crt": masterFiles["client.crt"], + "client.key": masterFiles["client.key"], + "script": linuxScript, + } + windowsFiles := fileMap{ + "ca.crt": ssh.NewRemoteFile(fmt.Sprintf("$env:temp\\%s", "ca.crt"), "", "", []byte(p.CaCertificate)), + "client.crt": ssh.NewRemoteFile(fmt.Sprintf("$env:temp\\%s", "client.crt"), "", "", []byte(p.ClientCertificate)), + "client.key": ssh.NewRemoteFile(fmt.Sprintf("$env:temp\\%s", "client.key"), "", "", []byte(p.ClientPrivateKey)), + "script": windowsScript, + } + return masterFiles, linuxFiles, windowsFiles, nil } -func executeCmd(command, masterFQDN, hostname string, port string, config *ssh.ClientConfig) (string, error) { - // Dial connection to the master via public load balancer - lbClient, err := ssh.Dial("tcp", fmt.Sprintf("%s:%s", masterFQDN, port), config) +func remoteKubeConfig(cs *api.ContainerService, dir string) (*ssh.RemoteFile, error) { + adminUsername := fmt.Sprintf("%s:%s", cs.Properties.LinuxProfile.AdminUsername, cs.Properties.LinuxProfile.AdminUsername) + kubeconfig, err := engine.GenerateKubeConfig(cs.Properties, cs.Location) if err != nil { - return "", errors.Wrap(err, "Dialing LB") + return nil, err } + return ssh.NewRemoteFile(path.Join(dir, "kubeconfig"), configPermissions, adminUsername, []byte(kubeconfig)), nil +} - // Dial a connection to the agent host, from the master - conn, err := lbClient.Dial("tcp", fmt.Sprintf("%s:%s", hostname, port)) +func loadLinuxScript() (*ssh.RemoteFile, error) { + c, err := engine.Asset("k8s/rotate-certs.sh") if err != nil { - return "", errors.Wrap(err, "Dialing host") + return nil, err } + return ssh.NewRemoteFile("/etc/kubernetes/rotate-certs/rotate-certs.sh", "744", rootUserGroup, c), nil +} - ncc, chans, reqs, err := ssh.NewClientConn(conn, hostname, config) +func loadWindowsScript() (*ssh.RemoteFile, error) { + c, err := engine.Asset("k8s/rotate-certs.ps1") if err != nil { - return "", errors.Wrap(err, "starting new client connection to host") + return nil, err } + return ssh.NewRemoteFile("$env:temp\\rotate-certs.ps1", "", "", c), nil +} - sClient := ssh.NewClient(ncc, chans, reqs) +func remoteBashScript(step string) string { + return fmt.Sprintf("bash -euxo pipefail -c \"if [ -f /etc/kubernetes/rotate-certs/rotate-certs.sh ]; then sudo /etc/kubernetes/rotate-certs/rotate-certs.sh %s |& sudo tee -a /var/log/azure/rotate-certs.log; fi\"", step) +} - session, err := sClient.NewSession() +func remotePowershellScript(step string) string { + filePath := "$env:temp\\rotate-certs.ps1" + return fmt.Sprintf("powershell -noprofile -command \"cd c:\\k\\; Import-Module %s; iex %s | Out-File -Append -Encoding utf8 rotate-certs.log\"", filePath, step) +} - if err != nil { - return "", errors.Wrap(err, "opening SSH session") - } - defer session.Close() +type nodeCondition func(*ssh.RemoteHost) bool - var stdoutBuf bytes.Buffer - session.Stdout = &stdoutBuf +func isMaster(node *ssh.RemoteHost) bool { + return strings.HasPrefix(node.URI, common.LegacyControlPlaneVMPrefix) +} +func isLinux(node *ssh.RemoteHost) bool { return node.OperatingSystem == api.Linux } +func isWindowsAgent(node *ssh.RemoteHost) bool { return node.OperatingSystem == api.Windows } +func isLinuxAgent(node *ssh.RemoteHost) bool { return isLinux(node) && !isMaster(node) } - err = session.Run(command) - if err != nil { - return fmt.Sprintf("%s -> %s", hostname, stdoutBuf.String()), errors.Wrap(err, "running command") +func (rcc *rotateCertsCmd) getNamespacesWithSATokensToRotate() []string { + // TODO parametize addons namespace so hard-coding their names is not required. + // TODO maybe add an extra cli param so user can add extra namespaces + namespaces := []string{metav1.NamespaceSystem} + if rcc.cs.Properties.OrchestratorProfile.KubernetesConfig.IsAddonEnabled(common.DashboardAddonName) { + namespaces = append(namespaces, "kubernetes-dashboard") + } + if rcc.cs.Properties.OrchestratorProfile.KubernetesConfig.IsAddonEnabled(common.AzureArcOnboardingAddonName) { + namespaces = append(namespaces, "azure-arc") + } + if rcc.cs.Properties.OrchestratorProfile.KubernetesConfig.IsAddonEnabled(common.AzurePolicyAddonName) { + namespaces = append(namespaces, "gatekeeper-system") } + if rcc.cs.Properties.OrchestratorProfile.KubernetesConfig.IsAddonEnabled(common.ScheduledMaintenanceAddonName) { + namespaces = append(namespaces, "drainsafe-system") + } + return namespaces +} - return fmt.Sprintf("%s -> %s", hostname, stdoutBuf.String()), nil +func keys(nodes nodeMap) []string { + n := make([]string, 0) + for k := range nodes { + n = append(n, k) + } + return n } diff --git a/cmd/rotate_certs_test.go b/cmd/rotate_certs_test.go index 4bc4a671a1..b7c887340f 100644 --- a/cmd/rotate_certs_test.go +++ b/cmd/rotate_certs_test.go @@ -4,413 +4,174 @@ package cmd import ( - "context" - "fmt" - "io/ioutil" - "os" "testing" - "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-12-01/compute" + "github.com/Azure/aks-engine/pkg/armhelpers" "github.com/google/uuid" . "github.com/onsi/gomega" "github.com/pkg/errors" "github.com/spf13/cobra" - "golang.org/x/crypto/ssh" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - - "github.com/Azure/aks-engine/pkg/api" - "github.com/Azure/aks-engine/pkg/api/common" - "github.com/Azure/aks-engine/pkg/armhelpers" - "github.com/Azure/aks-engine/pkg/helpers" ) -func mockExecuteCmd(command, masterFQDN, hostname string, port string, config *ssh.ClientConfig) (string, error) { - if masterFQDN != "valid" { - return "error running command", errors.New("executeCmd failed") - } - return "success", nil -} - func TestNewRotateCertsCmd(t *testing.T) { t.Parallel() - - output := newRotateCertsCmd() - if output.Use != rotateCertsName || output.Short != rotateCertsShortDescription || output.Long != rotateCertsLongDescription { - t.Fatalf("rotate-certs command should have use %s equal %s, short %s equal %s and long %s equal to %s", output.Use, rotateCertsName, output.Short, rotateCertsShortDescription, output.Long, rotateCertsLongDescription) - } - - expectedFlags := []string{"location", "resource-group", "apiserver", "api-model", "ssh"} - for _, f := range expectedFlags { - if output.Flags().Lookup(f) == nil { - t.Fatalf("rotate-certs command should have flag %s", f) - } - } -} - -func TestRotateCertsCmdRun(t *testing.T) { - t.Parallel() - - tmpSSHFile, del := makeTmpFile(t, "_test_ssh") - defer del() - - tmpOutputDir, del := makeTmpDir(t) - defer del() - - rcc := &rotateCertsCmd{ - client: &armhelpers.MockAKSEngineClient{}, - authProvider: &mockAuthProvider{ - authArgs: &authArgs{}, - getClientMock: &armhelpers.MockAKSEngineClient{}, - }, - apiModelPath: "../pkg/engine/testdata/key-vault-certs/kubernetes.json", - outputDirectory: tmpOutputDir, - location: "westus", - sshFilepath: tmpSSHFile, - sshCommandExecuter: mockExecuteCmd, - masterFQDN: "valid", - } - - r := &cobra.Command{} - f := r.Flags() - addAuthFlags(rcc.getAuthArgs(), f) - fakeRawSubscriptionID := "6dc93fae-9a76-421f-bbe5-cc6460ea81cb" - fakeSubscriptionID, err := uuid.Parse(fakeRawSubscriptionID) - fakeClientID := "b829b379-ca1f-4f1d-91a2-0d26b244680d" - fakeClientSecret := "0se43bie-3zs5-303e-aav5-dcf231vb82ds" - if err != nil { - t.Fatalf("Invalid SubscriptionId in Test: %s", err) - } - - rcc.getAuthArgs().SubscriptionID = fakeSubscriptionID - rcc.getAuthArgs().rawSubscriptionID = fakeRawSubscriptionID - rcc.getAuthArgs().rawClientID = fakeClientID - rcc.getAuthArgs().ClientSecret = fakeClientSecret - err = rcc.run(r, []string{}) - if err != nil { - t.Fatalf("Failed to run rotate-certs command: %s", err) - } -} - -func TestGetClusterNodes(t *testing.T) { - t.Parallel() - g := NewGomegaWithT(t) - mockClient := &armhelpers.MockAKSEngineClient{MockKubernetesClient: &armhelpers.MockKubernetesClient{}} - mockClient.MockKubernetesClient.FailListNodes = true - rcc := rotateCertsCmd{ - authProvider: &authArgs{}, - client: mockClient, - containerService: api.CreateMockContainerService("testcluster", "1.10.13", 3, 2, false), - } - err := rcc.getClusterNodes() - g.Expect(err).To(HaveOccurred()) - g.Expect(err.Error()).To(ContainSubstring("failed to get cluster nodes")) - - mockClient.MockKubernetesClient.FailListNodes = false - err = rcc.getClusterNodes() - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(len(rcc.masterNodes)).To(Equal(1)) - g.Expect(len(rcc.agentNodes)).To(Equal(1)) -} -func TestDeleteAllPods(t *testing.T) { - t.Parallel() - - g := NewGomegaWithT(t) - mockClient := &armhelpers.MockAKSEngineClient{MockKubernetesClient: &armhelpers.MockKubernetesClient{}} - mockClient.MockKubernetesClient.FailListPods = true - rcc := rotateCertsCmd{ - authProvider: &authArgs{}, - client: mockClient, - containerService: api.CreateMockContainerService("testcluster", "1.10.13", 3, 2, false), - } - err := rcc.deleteAllPods() - g.Expect(err).To(HaveOccurred()) - g.Expect(err.Error()).To(ContainSubstring("failed to get pods")) + command := newRotateCertsCmd() + g.Expect(command.Use).Should(Equal(rotateCertsName)) + g.Expect(command.Short).Should(Equal(rotateCertsShortDescription)) + g.Expect(command.Long).Should(Equal(rotateCertsLongDescription)) - mockClient.MockKubernetesClient.FailListPods = false - mockClient.MockKubernetesClient.FailDeletePod = true - mockClient.MockKubernetesClient.PodsList = &v1.PodList{ - Items: []v1.Pod{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: common.KubeDNSAddonName, - Namespace: "kube-system", - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pod", - Namespace: "kube-system", - }, - }, - }, - } - err = rcc.deleteAllPods() + command.SetArgs([]string{}) + err := command.Execute() g.Expect(err).To(HaveOccurred()) - g.Expect(err.Error()).To(ContainSubstring("failed to delete pod")) - mockClient.MockKubernetesClient.FailDeletePod = false - err = rcc.deleteAllPods() - g.Expect(err).NotTo(HaveOccurred()) -} - -func TestRebootAllNodes(t *testing.T) { - t.Parallel() - - ctx := context.Background() - g := NewGomegaWithT(t) - mockClient := &armhelpers.MockAKSEngineClient{MockKubernetesClient: &armhelpers.MockKubernetesClient{}} - mockClient.FailListVirtualMachines = true - mockClient.FailListVirtualMachineScaleSets = false - mockClient.FakeListVirtualMachineScaleSetsResult = func() []compute.VirtualMachineScaleSet { - scalesetName := "scalesetName" - sku := compute.Sku{} - location := "eastus" - return []compute.VirtualMachineScaleSet{ - { - Name: &scalesetName, - Sku: &sku, - Location: &location, - }, + for _, f := range []string{"location", "ssh-host", "api-model", "linux-ssh-private-key"} { + if command.Flags().Lookup(f) == nil { + t.Fatalf("rotate-certs command should have flag %s", f) } } - rcc := rotateCertsCmd{ - authProvider: &authArgs{}, - client: mockClient, - containerService: api.CreateMockContainerService("testcluster", "1.10.13", 3, 2, false), - resourceGroupName: "test-rg", - } - err := rcc.rebootAllNodes(ctx) - g.Expect(err).To(HaveOccurred()) - g.Expect(err.Error()).To(ContainSubstring("failed to list Virtual Machines in resource group test-rg")) - - mockClient.FailListVirtualMachines = false - mockClient.FailListVirtualMachineScaleSets = true - err = rcc.rebootAllNodes(ctx) - g.Expect(err).To(HaveOccurred()) - g.Expect(err.Error()).To(ContainSubstring("failed to list Virtual Machine Scale Sets in resource group test-rg")) - - mockClient.FailListVirtualMachines = false - mockClient.FailListVirtualMachineScaleSets = false - mockClient.FailRestartVirtualMachine = true - mockClient.FailRestartVirtualMachineScaleSets = false - err = rcc.rebootAllNodes(ctx) - g.Expect(err).To(HaveOccurred()) - g.Expect(err.Error()).To(ContainSubstring("failed to restart Virtual Machine")) - - mockClient.FailRestartVirtualMachine = false - mockClient.FailRestartVirtualMachineScaleSets = true - err = rcc.rebootAllNodes(ctx) - g.Expect(err).To(HaveOccurred()) - g.Expect(err.Error()).To(ContainSubstring("failed to restart Virtual Machine Scale Sets")) - - mockClient.FailRestartVirtualMachine = false - mockClient.FailRestartVirtualMachineScaleSets = false - err = rcc.rebootAllNodes(ctx) - g.Expect(err).NotTo(HaveOccurred()) } -func TestDeleteServiceAccounts(t *testing.T) { +func TestRotateCertsCmdValidateArgs(t *testing.T) { t.Parallel() - g := NewGomegaWithT(t) - mockClient := &armhelpers.MockAKSEngineClient{MockKubernetesClient: &armhelpers.MockKubernetesClient{}} - mockClient.MockKubernetesClient.FailListServiceAccounts = true - mockClient.MockKubernetesClient.ServiceAccountList = &v1.ServiceAccountList{ - Items: []v1.ServiceAccount{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: common.KubeDNSAddonName, - Namespace: "kube-system", - }, + + existingFile := "../examples/kubernetes.json" + missingFile := "./random/file" + + cases := []struct { + rcc *rotateCertsCmd + expectedErr error + assert func(*rotateCertsCmd) + name string + }{ + { + rcc: &rotateCertsCmd{ + apiModelPath: existingFile, + linuxSSHPrivateKeyPath: existingFile, + sshHostURI: "server.example.com", + location: "southcentralus", }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-sa", - Namespace: "kube-system", - }, + expectedErr: nil, + name: "Valid input", + }, + { + rcc: &rotateCertsCmd{ + linuxSSHPrivateKeyPath: existingFile, + sshHostURI: "server.example.com", + location: "southcentralus", }, + expectedErr: errors.New("--api-model must be specified"), + name: "Missing api-model", }, - } - rcc := rotateCertsCmd{ - authProvider: &authArgs{}, - client: mockClient, - containerService: api.CreateMockContainerService("testcluster", "1.10.13", 3, 2, false), - } - err := rcc.deleteServiceAccounts() - g.Expect(err).To(HaveOccurred()) - g.Expect(err.Error()).To(ContainSubstring("failed to get cluster service accounts in namespace kube-system")) - - mockClient.MockKubernetesClient.FailListServiceAccounts = false - mockClient.MockKubernetesClient.FailDeleteServiceAccount = true - err = rcc.deleteServiceAccounts() - g.Expect(err).To(HaveOccurred()) - g.Expect(err.Error()).To(ContainSubstring("failed to delete service account kube-dns")) - - mockClient.MockKubernetesClient.FailDeleteServiceAccount = false - err = rcc.deleteServiceAccounts() - g.Expect(err).NotTo(HaveOccurred()) -} - -func TestWriteArtifacts(t *testing.T) { - t.Parallel() - - g := NewGomegaWithT(t) - cs := api.CreateMockContainerService("testcluster", "1.16.14", 3, 2, false) - _, err := cs.SetPropertiesDefaults(api.PropertiesDefaultsParams{ - IsScale: false, - IsUpgrade: false, - PkiKeySize: helpers.DefaultPkiKeySize, - }) - g.Expect(err).NotTo(HaveOccurred()) - outdir, del := makeTmpDir(t) - defer del() - - rcc := rotateCertsCmd{ - authProvider: &authArgs{}, - containerService: cs, - apiVersion: "vlabs", - outputDirectory: outdir, - } - - err = rcc.writeArtifacts() - g.Expect(err).NotTo(HaveOccurred()) -} - -func TestUpdateKubeconfig(t *testing.T) { - t.Parallel() - - g := NewGomegaWithT(t) - cs := api.CreateMockContainerService("testcluster", "1.10.13", 3, 2, false) - _, err := cs.SetPropertiesDefaults(api.PropertiesDefaultsParams{ - IsScale: false, - IsUpgrade: false, - PkiKeySize: helpers.DefaultPkiKeySize, - }) - g.Expect(err).NotTo(HaveOccurred()) - - rcc := rotateCertsCmd{ - authProvider: &authArgs{}, - containerService: cs, - apiVersion: "vlabs", - sshCommandExecuter: mockExecuteCmd, - masterFQDN: "valid", - masterNodes: []v1.Node{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-1234-0", common.LegacyControlPlaneVMPrefix), - }, + { + rcc: &rotateCertsCmd{ + apiModelPath: missingFile, + linuxSSHPrivateKeyPath: existingFile, + sshHostURI: "server.example.com", + location: "southcentralus", }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-1234-2", common.LegacyControlPlaneVMPrefix), - }, + expectedErr: errors.Errorf("specified --api-model does not exist (%s)", missingFile), + name: "Invalid api-model", + }, + { + rcc: &rotateCertsCmd{ + apiModelPath: existingFile, + linuxSSHPrivateKeyPath: existingFile, + sshHostURI: "server.example.com", + location: "", }, + expectedErr: errors.New("--location must be specified"), + name: "Missing location", }, - } - err = rcc.updateKubeconfig() - g.Expect(err).NotTo(HaveOccurred()) - - rcc.masterFQDN = "invalid" - err = rcc.updateKubeconfig() - g.Expect(err).To(HaveOccurred()) -} - -func TestRotateCerts(t *testing.T) { - t.Parallel() - - ctx := context.Background() - g := NewGomegaWithT(t) - cs := api.CreateMockContainerService("testcluster", "1.10.13", 3, 2, false) - _, err := cs.SetPropertiesDefaults(api.PropertiesDefaultsParams{ - IsScale: false, - IsUpgrade: false, - PkiKeySize: helpers.DefaultPkiKeySize, - }) - g.Expect(err).NotTo(HaveOccurred()) - - mockClient := &armhelpers.MockAKSEngineClient{MockKubernetesClient: &armhelpers.MockKubernetesClient{}} - rcc := rotateCertsCmd{ - authProvider: &authArgs{}, - containerService: cs, - sshCommandExecuter: mockExecuteCmd, - masterFQDN: "valid", - client: mockClient, - masterNodes: []v1.Node{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-1234-0", common.LegacyControlPlaneVMPrefix), - }, + { + rcc: &rotateCertsCmd{ + apiModelPath: existingFile, + linuxSSHPrivateKeyPath: existingFile, + sshHostURI: "", + location: "southcentralus", }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-1234-1", common.LegacyControlPlaneVMPrefix), - }, + expectedErr: errors.New("--ssh-host must be specified"), + name: "Missing SSH host", + }, + { + rcc: &rotateCertsCmd{ + apiModelPath: existingFile, + linuxSSHPrivateKeyPath: "", + sshHostURI: "server.example.com", + location: "southcentralus", }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-1234-2", common.LegacyControlPlaneVMPrefix), - }, + expectedErr: errors.New("--linux-ssh-private-key must be specified"), + name: "Missing SSH private key", + }, + { + rcc: &rotateCertsCmd{ + apiModelPath: existingFile, + linuxSSHPrivateKeyPath: missingFile, + sshHostURI: "server.example.com", + location: "southcentralus", }, + expectedErr: errors.Errorf("specified --linux-ssh-private-key does not exist (%s)", missingFile), + name: "Invalid SSH private key", }, - agentNodes: []v1.Node{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "k8s-agents-1234-0", - }, + { + rcc: &rotateCertsCmd{ + apiModelPath: existingFile, + linuxSSHPrivateKeyPath: existingFile, + newCertsPath: missingFile, + sshHostURI: "server.example.com", + location: "southcentralus", }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "k8s-agents-1234-1", - }, + expectedErr: errors.Errorf("specified --certificate-profile does not exist (%s)", missingFile), + name: "Invalid new certs profile path", + }, + { + rcc: &rotateCertsCmd{ + apiModelPath: existingFile, + linuxSSHPrivateKeyPath: existingFile, + newCertsPath: existingFile, + sshHostURI: "server.example.com", + location: "southcentralus", }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "k8s-agents-1234-2", - }, + expectedErr: nil, + assert: func(rcc *rotateCertsCmd) { + g.Expect(rcc.generateCerts).To(Equal(false)) }, + name: "Unset generateCerts if newCertsPath is set", }, } - - err = rcc.rotateEtcd(ctx) - g.Expect(err).NotTo(HaveOccurred()) - - err = rcc.rotateApiserver() - g.Expect(err).NotTo(HaveOccurred()) - - err = rcc.rotateKubelet() - g.Expect(err).NotTo(HaveOccurred()) - - rcc.masterFQDN = "invalid" - err = rcc.rotateEtcd(ctx) - g.Expect(err).To(HaveOccurred()) - - err = rcc.rotateApiserver() - g.Expect(err).To(HaveOccurred()) - - err = rcc.rotateKubelet() - g.Expect(err).To(HaveOccurred()) -} - -func makeTmpFile(t *testing.T, name string) (string, func()) { - tmpF, err := ioutil.TempFile(os.TempDir(), name) - if err != nil { - t.Fatalf("unable to create file: %s", err.Error()) - } - - return tmpF.Name(), func() { - defer os.Remove(tmpF.Name()) - } -} - -func makeTmpDir(t *testing.T) (string, func()) { - tmpDir, err := ioutil.TempDir(os.TempDir(), "_tmp_dir") - if err != nil { - t.Fatalf("unable to create dir: %s", err.Error()) - } - return tmpDir, func() { - defer os.RemoveAll(tmpDir) + for _, tc := range cases { + c := tc + t.Run(c.name, func(t *testing.T) { + c.rcc.authProvider = &mockAuthProvider{ + authArgs: &authArgs{}, + getClientMock: &armhelpers.MockAKSEngineClient{}, + } + cmd := &cobra.Command{} + f := cmd.Flags() + addAuthFlags(c.rcc.getAuthArgs(), f) + fakeRawSubscriptionID := "6dc93fae-9a76-421f-bbe5-cc6460ea81cb" + fakeSubscriptionID, err := uuid.Parse(fakeRawSubscriptionID) + fakeClientID := "b829b379-ca1f-4f1d-91a2-0d26b244680d" + fakeClientSecret := "0se43bie-3zs5-303e-aav5-dcf231vb82ds" + if err != nil { + t.Fatalf("Invalid SubscriptionId in Test: %s", err) + } + c.rcc.getAuthArgs().SubscriptionID = fakeSubscriptionID + c.rcc.getAuthArgs().rawSubscriptionID = fakeRawSubscriptionID + c.rcc.getAuthArgs().rawClientID = fakeClientID + c.rcc.getAuthArgs().ClientSecret = fakeClientSecret + + err = c.rcc.validateArgs() + if c.expectedErr != nil { + g.Expect(err.Error()).To(Equal(c.expectedErr.Error())) + } else { + g.Expect(err).ToNot(HaveOccurred()) + } + if c.assert != nil { + c.assert(c.rcc) + } + }) } } diff --git a/cmd/rotatecerts/arm.go b/cmd/rotatecerts/arm.go new file mode 100644 index 0000000000..9c7ba22707 --- /dev/null +++ b/cmd/rotatecerts/arm.go @@ -0,0 +1,100 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +package rotatecerts + +import ( + "context" + "strings" + "time" + + "github.com/Azure/aks-engine/pkg/armhelpers" + "github.com/pkg/errors" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/util/retry" +) + +// ARMClientWrapper is an ARM client with simple retry logic +type ARMClientWrapper struct { + client armhelpers.AKSEngineClient + timeout time.Duration + backoff wait.Backoff + retryFunc func(err error) bool +} + +// NewARMClientWrapper returns an ARM client with simple retry logic +func NewARMClientWrapper(client armhelpers.AKSEngineClient, interval, timeout time.Duration) *ARMClientWrapper { + return &ARMClientWrapper{ + client: client, + timeout: timeout, + backoff: wait.Backoff{ + Steps: int(int64(timeout/time.Millisecond) / int64(interval/time.Millisecond)), + Duration: interval, + }, + retryFunc: func(err error) bool { return err != nil }, + } +} + +// GetVirtualMachinePowerState restarts the specified virtual machine. +func (arm *ARMClientWrapper) GetVirtualMachinePowerState(resourceGroup, vmName string) (string, error) { + var err error + status := "" + err = retry.OnError(arm.backoff, arm.retryFunc, func() error { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + status, err = arm.client.GetVirtualMachinePowerState(ctx, resourceGroup, vmName) + if err != nil { + return errors.Errorf("fetching virtual machine resource") + } + return nil + }) + return status, err +} + +// GetVirtualMachineScaleSetInstancePowerState restarts the specified scale set virtual machine instance +func (arm *ARMClientWrapper) GetVirtualMachineScaleSetInstancePowerState(resourceGroup, vmssName, instanceID string) (string, error) { + var err error + status := "" + err = retry.OnError(arm.backoff, arm.retryFunc, func() error { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + status, err = arm.client.GetVirtualMachineScaleSetInstancePowerState(ctx, resourceGroup, vmssName, instanceID) + if err != nil { + return errors.Errorf("fetching virtual machine resource") + } + return nil + }) + return status, err +} + +// RestartVirtualMachine returns the virtual machine's Power state +func (arm *ARMClientWrapper) RestartVirtualMachine(resourceGroup, vmName string) error { + var err error + err = retry.OnError(arm.backoff, arm.retryFunc, func() error { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + if err = arm.client.RestartVirtualMachine(ctx, resourceGroup, vmName); err != nil { + return errors.Errorf("restarting virtual machine") + } + return nil + }) + return err +} + +// RestartVirtualMachineScaleSets returns the scale set virtual machine instance's Power state +func (arm *ARMClientWrapper) RestartVirtualMachineScaleSets(resourceGroup, vmssName string) error { + var err error + err = retry.OnError(arm.backoff, arm.retryFunc, func() error { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + if err = arm.client.RestartVirtualMachineScaleSets(ctx, resourceGroup, vmssName, nil); err != nil { + return errors.Errorf("restarting virtual machine") + } + return nil + }) + return err +} + +func isVirtualMachineRunning(status string) bool { + return strings.EqualFold(status, "PowerState/running") +} diff --git a/cmd/rotatecerts/internal/interfaces.go b/cmd/rotatecerts/internal/interfaces.go new file mode 100644 index 0000000000..8b64c9c000 --- /dev/null +++ b/cmd/rotatecerts/internal/interfaces.go @@ -0,0 +1,49 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +package internal + +import ( + appsv1 "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +type KubeClient interface { + // ListPods returns Pods based on the passed in list options. + ListPods(namespace string, opts metav1.ListOptions) (*v1.PodList, error) + // ListNodes returns a list of Nodes registered in the api server. + ListNodes() (*v1.NodeList, error) + // ListServiceAccounts returns a list of Service Accounts in the provided namespace. + ListServiceAccounts(namespace string, opts metav1.ListOptions) (*v1.ServiceAccountList, error) + // ListDeployments returns a list of deployments in the provided namespace. + ListDeployments(namespace string, opts metav1.ListOptions) (*appsv1.DeploymentList, error) + // ListDaemonSets returns a list of daemonsets in the provided namespace. + ListDaemonSets(namespace string, opts metav1.ListOptions) (*appsv1.DaemonSetList, error) + // GetDeployment returns a given deployment in a namespace. + GetDeployment(namespace, name string) (*appsv1.Deployment, error) + // PatchDeployment applies a JSON patch to a deployment in the provided namespace. + PatchDeployment(namespace, name, jsonPatch string) (*appsv1.Deployment, error) + // PatchDaemonSet applies a JSON patch to a daemonset in the provided namespace. + PatchDaemonSet(namespace, name, jsonPatch string) (*appsv1.DaemonSet, error) + // DeletePods deletes all pods in a namespace that match the option filters. + DeletePods(namespace string, opts metav1.ListOptions) error + // DeleteServiceAccount deletes the passed in service account. + DeleteServiceAccount(secret *v1.ServiceAccount) error + // DeleteSecret deletes the passed in secret. + DeleteSecret(secret *v1.Secret) error +} + +type ARMClient interface { + // RestartVirtualMachine restarts the specified virtual machine. + RestartVirtualMachine(resourceGroup, vmName string) error + + // RestartVirtualMachineScaleSets restarts the specified virtual machine scale set. + RestartVirtualMachineScaleSets(resourceGroup, vmssName string) error + + // GetVirtualMachinePowerState returns the virtual machine's Power state. + GetVirtualMachinePowerState(resourceGroup, vmName string) (string, error) + + // GetVirtualMachineScaleSetInstancePowerState returns the virtual machine's Power state. + GetVirtualMachineScaleSetInstancePowerState(resourceGroup, vmssName, instanceID string) (string, error) +} diff --git a/cmd/rotatecerts/internal/mock_internal/client_mock.go b/cmd/rotatecerts/internal/mock_internal/client_mock.go new file mode 100644 index 0000000000..8acf8917e4 --- /dev/null +++ b/cmd/rotatecerts/internal/mock_internal/client_mock.go @@ -0,0 +1,282 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +// Code generated by MockGen. DO NOT EDIT. +// Source: ../interfaces.go + +// Package mock_internal is a generated GoMock package. +package mock_internal + +import ( + gomock "github.com/golang/mock/gomock" + v1 "k8s.io/api/apps/v1" + v10 "k8s.io/api/core/v1" + v11 "k8s.io/apimachinery/pkg/apis/meta/v1" + reflect "reflect" +) + +// MockKubeClient is a mock of KubeClient interface +type MockKubeClient struct { + ctrl *gomock.Controller + recorder *MockKubeClientMockRecorder +} + +// MockKubeClientMockRecorder is the mock recorder for MockKubeClient +type MockKubeClientMockRecorder struct { + mock *MockKubeClient +} + +// NewMockKubeClient creates a new mock instance +func NewMockKubeClient(ctrl *gomock.Controller) *MockKubeClient { + mock := &MockKubeClient{ctrl: ctrl} + mock.recorder = &MockKubeClientMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use +func (m *MockKubeClient) EXPECT() *MockKubeClientMockRecorder { + return m.recorder +} + +// ListPods mocks base method +func (m *MockKubeClient) ListPods(namespace string, opts v11.ListOptions) (*v10.PodList, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ListPods", namespace, opts) + ret0, _ := ret[0].(*v10.PodList) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// ListPods indicates an expected call of ListPods +func (mr *MockKubeClientMockRecorder) ListPods(namespace, opts interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListPods", reflect.TypeOf((*MockKubeClient)(nil).ListPods), namespace, opts) +} + +// ListNodes mocks base method +func (m *MockKubeClient) ListNodes() (*v10.NodeList, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ListNodes") + ret0, _ := ret[0].(*v10.NodeList) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// ListNodes indicates an expected call of ListNodes +func (mr *MockKubeClientMockRecorder) ListNodes() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListNodes", reflect.TypeOf((*MockKubeClient)(nil).ListNodes)) +} + +// ListServiceAccounts mocks base method +func (m *MockKubeClient) ListServiceAccounts(namespace string, opts v11.ListOptions) (*v10.ServiceAccountList, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ListServiceAccounts", namespace, opts) + ret0, _ := ret[0].(*v10.ServiceAccountList) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// ListServiceAccounts indicates an expected call of ListServiceAccounts +func (mr *MockKubeClientMockRecorder) ListServiceAccounts(namespace, opts interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListServiceAccounts", reflect.TypeOf((*MockKubeClient)(nil).ListServiceAccounts), namespace, opts) +} + +// ListDeployments mocks base method +func (m *MockKubeClient) ListDeployments(namespace string, opts v11.ListOptions) (*v1.DeploymentList, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ListDeployments", namespace, opts) + ret0, _ := ret[0].(*v1.DeploymentList) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// ListDeployments indicates an expected call of ListDeployments +func (mr *MockKubeClientMockRecorder) ListDeployments(namespace, opts interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListDeployments", reflect.TypeOf((*MockKubeClient)(nil).ListDeployments), namespace, opts) +} + +// ListDaemonSets mocks base method +func (m *MockKubeClient) ListDaemonSets(namespace string, opts v11.ListOptions) (*v1.DaemonSetList, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ListDaemonSets", namespace, opts) + ret0, _ := ret[0].(*v1.DaemonSetList) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// ListDaemonSets indicates an expected call of ListDaemonSets +func (mr *MockKubeClientMockRecorder) ListDaemonSets(namespace, opts interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListDaemonSets", reflect.TypeOf((*MockKubeClient)(nil).ListDaemonSets), namespace, opts) +} + +// GetDeployment mocks base method +func (m *MockKubeClient) GetDeployment(namespace, name string) (*v1.Deployment, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetDeployment", namespace, name) + ret0, _ := ret[0].(*v1.Deployment) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetDeployment indicates an expected call of GetDeployment +func (mr *MockKubeClientMockRecorder) GetDeployment(namespace, name interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetDeployment", reflect.TypeOf((*MockKubeClient)(nil).GetDeployment), namespace, name) +} + +// PatchDeployment mocks base method +func (m *MockKubeClient) PatchDeployment(namespace, name, jsonPatch string) (*v1.Deployment, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "PatchDeployment", namespace, name, jsonPatch) + ret0, _ := ret[0].(*v1.Deployment) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// PatchDeployment indicates an expected call of PatchDeployment +func (mr *MockKubeClientMockRecorder) PatchDeployment(namespace, name, jsonPatch interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "PatchDeployment", reflect.TypeOf((*MockKubeClient)(nil).PatchDeployment), namespace, name, jsonPatch) +} + +// PatchDaemonSet mocks base method +func (m *MockKubeClient) PatchDaemonSet(namespace, name, jsonPatch string) (*v1.DaemonSet, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "PatchDaemonSet", namespace, name, jsonPatch) + ret0, _ := ret[0].(*v1.DaemonSet) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// PatchDaemonSet indicates an expected call of PatchDaemonSet +func (mr *MockKubeClientMockRecorder) PatchDaemonSet(namespace, name, jsonPatch interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "PatchDaemonSet", reflect.TypeOf((*MockKubeClient)(nil).PatchDaemonSet), namespace, name, jsonPatch) +} + +// DeletePods mocks base method +func (m *MockKubeClient) DeletePods(namespace string, opts v11.ListOptions) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DeletePods", namespace, opts) + ret0, _ := ret[0].(error) + return ret0 +} + +// DeletePods indicates an expected call of DeletePods +func (mr *MockKubeClientMockRecorder) DeletePods(namespace, opts interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeletePods", reflect.TypeOf((*MockKubeClient)(nil).DeletePods), namespace, opts) +} + +// DeleteServiceAccount mocks base method +func (m *MockKubeClient) DeleteServiceAccount(secret *v10.ServiceAccount) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DeleteServiceAccount", secret) + ret0, _ := ret[0].(error) + return ret0 +} + +// DeleteServiceAccount indicates an expected call of DeleteServiceAccount +func (mr *MockKubeClientMockRecorder) DeleteServiceAccount(secret interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteServiceAccount", reflect.TypeOf((*MockKubeClient)(nil).DeleteServiceAccount), secret) +} + +// DeleteSecret mocks base method +func (m *MockKubeClient) DeleteSecret(secret *v10.Secret) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DeleteSecret", secret) + ret0, _ := ret[0].(error) + return ret0 +} + +// DeleteSecret indicates an expected call of DeleteSecret +func (mr *MockKubeClientMockRecorder) DeleteSecret(secret interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteSecret", reflect.TypeOf((*MockKubeClient)(nil).DeleteSecret), secret) +} + +// MockARMClient is a mock of ARMClient interface +type MockARMClient struct { + ctrl *gomock.Controller + recorder *MockARMClientMockRecorder +} + +// MockARMClientMockRecorder is the mock recorder for MockARMClient +type MockARMClientMockRecorder struct { + mock *MockARMClient +} + +// NewMockARMClient creates a new mock instance +func NewMockARMClient(ctrl *gomock.Controller) *MockARMClient { + mock := &MockARMClient{ctrl: ctrl} + mock.recorder = &MockARMClientMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use +func (m *MockARMClient) EXPECT() *MockARMClientMockRecorder { + return m.recorder +} + +// RestartVirtualMachine mocks base method +func (m *MockARMClient) RestartVirtualMachine(resourceGroup, vmName string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "RestartVirtualMachine", resourceGroup, vmName) + ret0, _ := ret[0].(error) + return ret0 +} + +// RestartVirtualMachine indicates an expected call of RestartVirtualMachine +func (mr *MockARMClientMockRecorder) RestartVirtualMachine(resourceGroup, vmName interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RestartVirtualMachine", reflect.TypeOf((*MockARMClient)(nil).RestartVirtualMachine), resourceGroup, vmName) +} + +// RestartVirtualMachineScaleSets mocks base method +func (m *MockARMClient) RestartVirtualMachineScaleSets(resourceGroup, vmssName string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "RestartVirtualMachineScaleSets", resourceGroup, vmssName) + ret0, _ := ret[0].(error) + return ret0 +} + +// RestartVirtualMachineScaleSets indicates an expected call of RestartVirtualMachineScaleSets +func (mr *MockARMClientMockRecorder) RestartVirtualMachineScaleSets(resourceGroup, vmssName interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RestartVirtualMachineScaleSets", reflect.TypeOf((*MockARMClient)(nil).RestartVirtualMachineScaleSets), resourceGroup, vmssName) +} + +// GetVirtualMachinePowerState mocks base method +func (m *MockARMClient) GetVirtualMachinePowerState(resourceGroup, vmName string) (string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetVirtualMachinePowerState", resourceGroup, vmName) + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetVirtualMachinePowerState indicates an expected call of GetVirtualMachinePowerState +func (mr *MockARMClientMockRecorder) GetVirtualMachinePowerState(resourceGroup, vmName interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetVirtualMachinePowerState", reflect.TypeOf((*MockARMClient)(nil).GetVirtualMachinePowerState), resourceGroup, vmName) +} + +// GetVirtualMachineScaleSetInstancePowerState mocks base method +func (m *MockARMClient) GetVirtualMachineScaleSetInstancePowerState(resourceGroup, vmssName, instanceID string) (string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetVirtualMachineScaleSetInstancePowerState", resourceGroup, vmssName, instanceID) + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetVirtualMachineScaleSetInstancePowerState indicates an expected call of GetVirtualMachineScaleSetInstancePowerState +func (mr *MockARMClientMockRecorder) GetVirtualMachineScaleSetInstancePowerState(resourceGroup, vmssName, instanceID interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetVirtualMachineScaleSetInstancePowerState", reflect.TypeOf((*MockARMClient)(nil).GetVirtualMachineScaleSetInstancePowerState), resourceGroup, vmssName, instanceID) +} diff --git a/cmd/rotatecerts/internal/mock_internal/doc.go b/cmd/rotatecerts/internal/mock_internal/doc.go new file mode 100644 index 0000000000..efaec5352d --- /dev/null +++ b/cmd/rotatecerts/internal/mock_internal/doc.go @@ -0,0 +1,7 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +//go:generate mockgen -destination client_mock.go --package mock_internal --source ../interfaces.go Client +//go:generate /usr/bin/env bash -c "cat ../../../../scripts/copyright.txt client_mock.go > _client_mock.go && mv _client_mock.go client_mock.go" + +package mock_internal //nolint diff --git a/cmd/rotatecerts/operations.go b/cmd/rotatecerts/operations.go new file mode 100644 index 0000000000..5c6d6871b4 --- /dev/null +++ b/cmd/rotatecerts/operations.go @@ -0,0 +1,164 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +package rotatecerts + +import ( + "fmt" + "math/rand" + "time" + + "github.com/Azure/aks-engine/cmd/rotatecerts/internal" + "github.com/Azure/aks-engine/pkg/api/common" + "github.com/pkg/errors" + log "github.com/sirupsen/logrus" + v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// PauseClusterAutoscaler scales to zero the replica count of the cluster autoscaler deployment +// and returns a function that scales back to the original replica count. +// +// It NOPs if the original replica count is zero. +func PauseClusterAutoscaler(client internal.KubeClient) (func() error, error) { + name := common.ClusterAutoscalerAddonName + + deploy, err := client.GetDeployment(metav1.NamespaceSystem, name) + if err != nil && !apierrors.IsNotFound(err) { + return nil, errors.Wrapf(err, "getting %s deployment", name) + } + if apierrors.IsNotFound(err) || *deploy.Spec.Replicas == 0 { + return func() error { return nil }, nil + } + + patch := func(msg string, count int32) error { + log.Infof(msg) + json := fmt.Sprintf(`{"spec":{"replicas": %d}}`, count) + if _, err = client.PatchDeployment(metav1.NamespaceSystem, name, json); err != nil { + return errors.Wrapf(err, "applying patch to %s deployment", name) + } + return nil + } + + if err := patch(fmt.Sprintf("Pausing %s, setting replica count to 0", name), 0); err != nil { + return nil, err + } + + return func() error { + c := *deploy.Spec.Replicas + err := patch(fmt.Sprintf("Resuming %s, setting replica count to %d", name, c), c) + log.Warnln("Run \"aks-engine upgrade\" to refresh the cluster-autoscaler node template") + if err != nil { + return err + } + return nil + }, nil +} + +// RotateServiceAccountTokens deletes service account tokens referenced by daemonsets and deployments +// from the namespaces of interest and triggers a rollout once the tokens are deleted. +// +// Service account tokens are signed by the cluster CA, +// deleting them after the CA is rotated ensures that KCM will regenerate tokens signed by the new CA. +func RotateServiceAccountTokens(client internal.KubeClient, namespaces []string) error { + for _, ns := range namespaces { + deleteSATokens, err := deleteSATokensFunc(client, ns) + if err != nil { + return err + } + if deleteSATokens == nil { + // no tokens to rotate in this namespace + continue + } + if err = deleteDeploymentSATokensAndForceRollout(client, ns, deleteSATokens); err != nil { + return err + } + if err = deleteDaemonSetSATokensAndForceRollout(client, ns, deleteSATokens); err != nil { + return err + } + } + return nil +} + +func deleteDeploymentSATokensAndForceRollout(client internal.KubeClient, ns string, deleteSATokens func(string) error) error { + random := rand.New(rand.NewSource(time.Now().UnixNano())) + patch := fmt.Sprintf(`{"spec":{"template":{"metadata":{"annotations":{"ca-rotation":"%d"}}}}}`, random.Int31()) + + deployList, err := client.ListDeployments(ns, metav1.ListOptions{}) + if err != nil { + return errors.Wrapf(err, "listing %s deployments", ns) + } + for _, deploy := range deployList.Items { + if deploy.Spec.Template.Spec.ServiceAccountName != "" { + // delete SA tokens + if err = deleteSATokens(deploy.Spec.Template.Spec.ServiceAccountName); err != nil { + return err + } + } + // trigger rollout so the deploy replicas mount the newly generated sa token + if _, err := client.PatchDeployment(ns, deploy.Name, patch); err != nil { + return errors.Wrapf(err, "patching %s deployment %s", ns, deploy.Name) + } + } + return nil +} + +func deleteDaemonSetSATokensAndForceRollout(client internal.KubeClient, ns string, deleteSATokens func(string) error) error { + random := rand.New(rand.NewSource(time.Now().UnixNano())) + patch := fmt.Sprintf(`{"spec":{"template":{"metadata":{"annotations":{"ca-rotation":"%d"}}}}}`, random.Int31()) + + dsList, err := client.ListDaemonSets(ns, metav1.ListOptions{}) + if err != nil { + return errors.Wrapf(err, "listing %s daemonsets", ns) + } + for _, ds := range dsList.Items { + if ds.Spec.Template.Spec.ServiceAccountName != "" { + // delete SA tokens + if err = deleteSATokens(ds.Spec.Template.Spec.ServiceAccountName); err != nil { + return err + } + } + // trigger rollout so the ds replicas mount the newly generated sa token + if _, err = client.PatchDaemonSet(ns, ds.Name, patch); err != nil { + return errors.Wrapf(err, "patching %s daemonset %s", ns, ds.Name) + } + } + return nil +} + +func deleteSATokensFunc(client internal.KubeClient, ns string) (func(string) error, error) { + saList, err := client.ListServiceAccounts(ns, metav1.ListOptions{}) + if err != nil { + return nil, errors.Wrapf(err, "listing %s service accounts", ns) + } + if len(saList.Items) == 0 { + return nil, nil + } + saMap := make(map[string]v1.ServiceAccount) + for _, sa := range saList.Items { + saMap[sa.Name] = sa + } + return func(name string) error { + sa, ok := saMap[name] + if !ok { + return nil + } + for _, s := range sa.Secrets { + err := client.DeleteSecret(&v1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: ns, + Name: s.Name, + }, + }) + if err != nil && !apierrors.IsNotFound(err) { + return errors.Wrapf(err, "deleting %s secret %s", ns, s.Name) + } + } + if err := client.DeleteServiceAccount(&sa); err != nil && !apierrors.IsNotFound(err) { + return errors.Wrapf(err, "deleting %s service account %s", ns, sa.Name) + } + delete(saMap, name) + return nil + }, nil +} diff --git a/cmd/rotatecerts/operations_test.go b/cmd/rotatecerts/operations_test.go new file mode 100644 index 0000000000..acdb40d3fd --- /dev/null +++ b/cmd/rotatecerts/operations_test.go @@ -0,0 +1,477 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +package rotatecerts + +import ( + "testing" + + mock "github.com/Azure/aks-engine/cmd/rotatecerts/internal/mock_internal" + gomock "github.com/golang/mock/gomock" + . "github.com/onsi/gomega" + "github.com/pkg/errors" + appsv1 "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +var ( + errAPIGeneric = errors.New("generic api error") + errAPINotFound = &apierrors.StatusError{ + ErrStatus: metav1.Status{ + Reason: metav1.StatusReasonNotFound, + }, + } +) + +func TestPauseClusterAutoscaler(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + + t.Run("GetDeployment fails", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().GetDeployment(gomock.Any(), gomock.Any()).Return(nil, errAPIGeneric).Times(1) + + resume, err := PauseClusterAutoscaler(mock) + g.Expect(resume).To(BeNil()) + g.Expect(err).To(HaveOccurred()) + g.Expect(errors.Cause(err)).To(Equal(errAPIGeneric)) + }) + + t.Run("Deployment does not exist", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().GetDeployment(gomock.Any(), gomock.Any()).Return(nil, errAPINotFound).Times(1) + mock.EXPECT().PatchDeployment(gomock.Any(), gomock.Any(), gomock.Any()).Times(0) + + resume, err := PauseClusterAutoscaler(mock) + g.Expect(resume).ToNot(BeNil()) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(resume()).ToNot(HaveOccurred()) + }) + + t.Run("Deployment replica count is zero", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + var replicas int32 = 0 + deploy := appsv1.Deployment{} + deploy.Spec.Replicas = &replicas + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().GetDeployment(gomock.Any(), gomock.Any()).Return(&deploy, nil).Times(1) + mock.EXPECT().PatchDeployment(gomock.Any(), gomock.Any(), gomock.Any()).Times(0) + + resume, err := PauseClusterAutoscaler(mock) + g.Expect(resume).ToNot(BeNil()) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(resume()).ToNot(HaveOccurred()) + }) + + t.Run("Deployment scale down fails", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + var replicas int32 = 1 + deploy := appsv1.Deployment{} + deploy.Spec.Replicas = &replicas + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().GetDeployment(gomock.Any(), gomock.Any()).Return(&deploy, nil).Times(1) + mock.EXPECT().PatchDeployment(gomock.Any(), gomock.Any(), gomock.Any()).Return(&deploy, errAPIGeneric).Times(1) + + resume, err := PauseClusterAutoscaler(mock) + g.Expect(resume).To(BeNil()) + g.Expect(err).To(HaveOccurred()) + g.Expect(errors.Cause(err)).To(Equal(errAPIGeneric)) + }) + + t.Run("Deployment scale ok", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + var replicas int32 = 1 + deploy := appsv1.Deployment{} + deploy.Spec.Replicas = &replicas + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().GetDeployment(gomock.Any(), gomock.Any()).Return(&deploy, nil).Times(1) + mock.EXPECT().PatchDeployment(gomock.Any(), gomock.Any(), gomock.Any()).Return(&deploy, nil).Times(2) + + resume, err := PauseClusterAutoscaler(mock) + g.Expect(resume).ToNot(BeNil()) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(resume()).ToNot(HaveOccurred()) + }) + + t.Run("Deployment scale up fails", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + var replicas int32 = 1 + deploy := appsv1.Deployment{} + deploy.Spec.Replicas = &replicas + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().GetDeployment(gomock.Any(), gomock.Any()).Return(&deploy, nil).Times(1) + gomock.InOrder( + mock.EXPECT().PatchDeployment(gomock.Any(), gomock.Any(), gomock.Any()).Return(&deploy, nil), + mock.EXPECT().PatchDeployment(gomock.Any(), gomock.Any(), gomock.Any()).Return(&deploy, errAPIGeneric), + ) + + resume, err := PauseClusterAutoscaler(mock) + g.Expect(resume).ToNot(BeNil()) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(resume()).To(HaveOccurred()) + }) +} + +func TestDeleteDeploymentSATokensAndForceRollout(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + + deleteSATokens := func(saMap map[string]bool) func(name string) error { + return func(name string) error { + saMap[name] = false + return nil + } + } + + t.Run("ListDeployment fails", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListDeployments(gomock.Any(), gomock.Any()).Return(nil, errAPIGeneric).Times(1) + + err := deleteDeploymentSATokensAndForceRollout(mock, "ns", deleteSATokens(nil)) + g.Expect(err).To(HaveOccurred()) + }) + + t.Run("All deployments are patched, SA deleted", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + noSA := appsv1.Deployment{} + noSA.Name = "noSA" + noSA.Spec.Template.Spec.ServiceAccountName = "noSA" + hasSA := appsv1.Deployment{} + hasSA.Name = "hasSA" + hasSA.Spec.Template.Spec.ServiceAccountName = "hasSA" + list := &appsv1.DeploymentList{Items: []appsv1.Deployment{hasSA, noSA}} + + saMap := make(map[string]bool) + saMap[hasSA.Spec.Template.Spec.ServiceAccountName] = true + saMap["random"] = true + + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListDeployments(gomock.Any(), gomock.Any()).Return(list, nil).Times(1) + mock.EXPECT().PatchDeployment(gomock.Any(), "noSA", gomock.Any()).Return(nil, nil).Times(1) + mock.EXPECT().PatchDeployment(gomock.Any(), "hasSA", gomock.Any()).Return(nil, nil).Times(1) + + err := deleteDeploymentSATokensAndForceRollout(mock, "ns", deleteSATokens(saMap)) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(saMap["random"]).To(BeTrue()) + g.Expect(saMap["hasSA"]).To(BeFalse()) + }) + + t.Run("Return error if delete SA fails", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + hasSA := appsv1.Deployment{} + hasSA.Spec.Template.Spec.ServiceAccountName = "hasSA" + list := &appsv1.DeploymentList{Items: []appsv1.Deployment{hasSA}} + + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListDeployments(gomock.Any(), gomock.Any()).Return(list, nil).Times(1) + + err := deleteDeploymentSATokensAndForceRollout(mock, "ns", func(name string) error { + return errors.New("Delete SA failed") + }) + g.Expect(err).To(HaveOccurred()) + }) + + t.Run("Return error if patch deployment fails", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + hasSA := appsv1.Deployment{} + hasSA.Spec.Template.Spec.ServiceAccountName = "hasSA" + list := &appsv1.DeploymentList{Items: []appsv1.Deployment{hasSA}} + + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListDeployments(gomock.Any(), gomock.Any()).Return(list, nil).Times(1) + mock.EXPECT().PatchDeployment(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, errAPIGeneric).Times(1) + + err := deleteDeploymentSATokensAndForceRollout(mock, "ns", func(_ string) error { return nil }) + g.Expect(err).To(HaveOccurred()) + g.Expect(errors.Cause(err)).To(Equal(errAPIGeneric)) + }) +} + +func TestDeleteDaemonSetSATokensAndForceRollout(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + + deleteSATokens := func(saMap map[string]bool) func(name string) error { + return func(name string) error { + saMap[name] = false + return nil + } + } + + t.Run("ListDaemonSets fails", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListDaemonSets(gomock.Any(), gomock.Any()).Return(nil, errAPIGeneric).Times(1) + + err := deleteDaemonSetSATokensAndForceRollout(mock, "ns", deleteSATokens(nil)) + g.Expect(err).To(HaveOccurred()) + }) + + t.Run("All daemonsets are patched, SA deleted", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + noSA := appsv1.DaemonSet{} + noSA.Name = "noSA" + noSA.Spec.Template.Spec.ServiceAccountName = "noSA" + hasSA := appsv1.DaemonSet{} + hasSA.Name = "hasSA" + hasSA.Spec.Template.Spec.ServiceAccountName = "hasSA" + list := &appsv1.DaemonSetList{Items: []appsv1.DaemonSet{hasSA, noSA}} + + saMap := make(map[string]bool) + saMap[hasSA.Spec.Template.Spec.ServiceAccountName] = true + saMap["random"] = true + + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListDaemonSets(gomock.Any(), gomock.Any()).Return(list, nil).Times(1) + mock.EXPECT().PatchDaemonSet(gomock.Any(), "noSA", gomock.Any()).Return(nil, nil).Times(1) + mock.EXPECT().PatchDaemonSet(gomock.Any(), "hasSA", gomock.Any()).Return(nil, nil).Times(1) + + err := deleteDaemonSetSATokensAndForceRollout(mock, "ns", deleteSATokens(saMap)) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(saMap["random"]).To(BeTrue()) + g.Expect(saMap["hasSA"]).To(BeFalse()) + }) + + t.Run("Return error if delete SA fails", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + hasSA := appsv1.DaemonSet{} + hasSA.Spec.Template.Spec.ServiceAccountName = "hasSA" + list := &appsv1.DaemonSetList{Items: []appsv1.DaemonSet{hasSA}} + + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListDaemonSets(gomock.Any(), gomock.Any()).Return(list, nil).Times(1) + + err := deleteDaemonSetSATokensAndForceRollout(mock, "ns", func(name string) error { + return errors.New("Delete SA failed") + }) + g.Expect(err).To(HaveOccurred()) + }) + + t.Run("Return error if patch daemonset fails", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + hasSA := appsv1.DaemonSet{} + hasSA.Spec.Template.Spec.ServiceAccountName = "hasSA" + list := &appsv1.DaemonSetList{Items: []appsv1.DaemonSet{hasSA}} + + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListDaemonSets(gomock.Any(), gomock.Any()).Return(list, nil).Times(1) + mock.EXPECT().PatchDaemonSet(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, errAPIGeneric).Times(1) + + err := deleteDaemonSetSATokensAndForceRollout(mock, "ns", func(_ string) error { return nil }) + g.Expect(err).To(HaveOccurred()) + g.Expect(errors.Cause(err)).To(Equal(errAPIGeneric)) + }) +} + +func TestDeleteSATokensFunc(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + + t.Run("List ServiceAccounts fails", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + list := &v1.ServiceAccountList{} + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListServiceAccounts(gomock.Any(), gomock.Any()).Return(list, errAPIGeneric).Times(1) + + deleteSATokens, err := deleteSATokensFunc(mock, "ns") + g.Expect(deleteSATokens).To(BeNil()) + g.Expect(errors.Cause(err)).To(Equal(errAPIGeneric)) + }) + + t.Run("No ServiceAccounts in namespace", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + list := &v1.ServiceAccountList{} + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListServiceAccounts(gomock.Any(), gomock.Any()).Return(list, nil).Times(1) + + deleteSATokens, err := deleteSATokensFunc(mock, "ns") + g.Expect(deleteSATokens).To(BeNil()) + g.Expect(err).To(BeNil()) + }) + + t.Run("Expected ServiceAccount not found", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + sa := v1.ServiceAccount{} + sa.Name = "sa" + sa.Secrets = []v1.ObjectReference{ + v1.ObjectReference{ + Name: "sasecret1", + Namespace: "ns", + }, + v1.ObjectReference{ + Name: "sasecret2", + Namespace: "ns", + }, + } + list := &v1.ServiceAccountList{Items: []v1.ServiceAccount{sa}} + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListServiceAccounts(gomock.Any(), gomock.Any()).Return(list, nil).Times(1) + + deleteSATokens, err := deleteSATokensFunc(mock, "ns") + g.Expect(deleteSATokens).ToNot(BeNil()) + g.Expect(err).To(BeNil()) + err = deleteSATokens("404") + g.Expect(err).To(BeNil()) + }) + + t.Run("Secret to delete not found, service account deleted", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + sa := v1.ServiceAccount{} + sa.Name = "sa" + sa.Secrets = []v1.ObjectReference{ + v1.ObjectReference{ + Name: "sasecret1", + Namespace: "ns", + }, + v1.ObjectReference{ + Name: "sasecret2", + Namespace: "ns", + }, + } + list := &v1.ServiceAccountList{Items: []v1.ServiceAccount{sa}} + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListServiceAccounts(gomock.Any(), gomock.Any()).Return(list, nil).Times(1) + mock.EXPECT().DeleteSecret(gomock.Any()).Return(errAPINotFound).Times(2) + mock.EXPECT().DeleteServiceAccount(&sa).Return(nil).Times(1) + + deleteSATokens, err := deleteSATokensFunc(mock, "ns") + g.Expect(deleteSATokens).ToNot(BeNil()) + g.Expect(err).To(BeNil()) + err = deleteSATokens(sa.Name) + g.Expect(err).To(BeNil()) + }) + + t.Run("Delete Secret fails", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + sa := v1.ServiceAccount{} + sa.Name = "sa" + sa.Secrets = []v1.ObjectReference{ + v1.ObjectReference{ + Name: "sasecret1", + Namespace: "ns", + }, + v1.ObjectReference{ + Name: "sasecret2", + Namespace: "ns", + }, + } + list := &v1.ServiceAccountList{Items: []v1.ServiceAccount{sa}} + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListServiceAccounts(gomock.Any(), gomock.Any()).Return(list, nil).Times(1) + mock.EXPECT().DeleteSecret(gomock.Any()).Return(errAPIGeneric).Times(1) + + deleteSATokens, err := deleteSATokensFunc(mock, "ns") + g.Expect(deleteSATokens).ToNot(BeNil()) + g.Expect(err).To(BeNil()) + err = deleteSATokens(sa.Name) + g.Expect(err).ToNot(BeNil()) + g.Expect(errors.Cause(err)).To(Equal(errAPIGeneric)) + }) + + t.Run("Secrets deleted, service account deleted", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + sa := v1.ServiceAccount{} + sa.Name = "sa" + sa.Secrets = []v1.ObjectReference{ + v1.ObjectReference{ + Name: "sasecret1", + Namespace: "ns", + }, + v1.ObjectReference{ + Name: "sasecret2", + Namespace: "ns", + }, + } + + list := &v1.ServiceAccountList{Items: []v1.ServiceAccount{sa}} + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListServiceAccounts(gomock.Any(), gomock.Any()).Return(list, nil).Times(1) + mock.EXPECT().DeleteSecret(gomock.Any()).Return(nil).Times(2) + mock.EXPECT().DeleteServiceAccount(&sa).Return(nil).Times(1) + + deleteSATokens, err := deleteSATokensFunc(mock, "ns") + g.Expect(deleteSATokens).ToNot(BeNil()) + g.Expect(err).To(BeNil()) + err = deleteSATokens(sa.Name) + g.Expect(err).To(BeNil()) + // check is only deleted once + err = deleteSATokens(sa.Name) + g.Expect(err).To(BeNil()) + }) + + t.Run("Secrets deleted, delete service account fails", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + sa := v1.ServiceAccount{} + sa.Name = "sa" + sa.Secrets = []v1.ObjectReference{ + v1.ObjectReference{ + Name: "sasecret1", + Namespace: "ns", + }, + v1.ObjectReference{ + Name: "sasecret2", + Namespace: "ns", + }, + } + + list := &v1.ServiceAccountList{Items: []v1.ServiceAccount{sa}} + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListServiceAccounts(gomock.Any(), gomock.Any()).Return(list, nil).Times(1) + mock.EXPECT().DeleteSecret(gomock.Any()).Return(nil).Times(2) + mock.EXPECT().DeleteServiceAccount(&sa).Return(errAPIGeneric).Times(1) + + deleteSATokens, err := deleteSATokensFunc(mock, "ns") + g.Expect(deleteSATokens).ToNot(BeNil()) + g.Expect(err).To(BeNil()) + err = deleteSATokens(sa.Name) + g.Expect(err).ToNot(BeNil()) + g.Expect(errors.Cause(err)).To(Equal(errAPIGeneric)) + }) +} diff --git a/cmd/rotatecerts/wait.go b/cmd/rotatecerts/wait.go new file mode 100644 index 0000000000..6fa34026cc --- /dev/null +++ b/cmd/rotatecerts/wait.go @@ -0,0 +1,322 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +package rotatecerts + +import ( + "fmt" + "time" + + "github.com/Azure/aks-engine/cmd/rotatecerts/internal" + "github.com/Azure/aks-engine/pkg/helpers/ssh" + "github.com/Azure/aks-engine/pkg/kubernetes" + "github.com/pkg/errors" + appsv1 "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" +) + +const defaultSuccessesNeeded int = 5 + +type nodesCondition func(*v1.NodeList) bool + +// waitForNodesCondition checks that nodesCondition is met for every node in the cluster +func waitForNodesCondition(client internal.KubeClient, condition nodesCondition, successesNeeded int, interval, timeout time.Duration) (*v1.NodeList, error) { + var nl *v1.NodeList + var err error + var successesCount int + err = wait.PollImmediate(interval, timeout, func() (bool, error) { + nl, err = client.ListNodes() + if err != nil { + return false, err + } + if !condition(nl) { + return false, nil + } + successesCount++ + if successesCount < successesNeeded { + return false, nil + } + return true, nil + }) + return nl, err +} + +// WaitForNodesReady returns true if all requiredNodes reached the Ready state +func WaitForNodesReady(client internal.KubeClient, requiredNodes []string, interval, timeout time.Duration) error { + _, err := waitForNodesCondition(client, allNodesReadyCondition(requiredNodes), defaultSuccessesNeeded, interval, timeout) + return err +} + +func allNodesReadyCondition(requiredNodes []string) nodesCondition { + return func(nl *v1.NodeList) bool { + requiredReady := make(map[string]bool) + for _, name := range requiredNodes { + requiredReady[name] = false + } + for _, nli := range nl.Items { + _, ok := requiredReady[nli.ObjectMeta.Name] + if !ok { + continue + } + ready := kubernetes.IsNodeReady(&nli) + if !ready { + return false + } + requiredReady[nli.ObjectMeta.Name] = ready + } + for _, ready := range requiredReady { + if !ready { + return false + } + } + return true + } +} + +type podsCondition func(*v1.PodList) error + +// waitForPodsCondition checks that podsCondition is met for every pod in the specified namespace +func waitForPodsCondition(client internal.KubeClient, namespace string, condition podsCondition, successesNeeded int, interval, timeout time.Duration) error { + var listErr, condErr error + var successesCount int + var pl *v1.PodList + err := wait.PollImmediate(interval, timeout, func() (bool, error) { + pl, listErr = client.ListPods(namespace, metav1.ListOptions{}) + if listErr != nil { + return false, listErr + } + if condErr = condition(pl); condErr != nil { + return false, nil + } + successesCount++ + if successesCount < successesNeeded { + return false, nil + } + return true, nil + }) + if listErr != nil { + return errors.Wrapf(listErr, "condition successesCount: %d", successesCount) + } + if condErr != nil { + return errors.Wrapf(condErr, "condition successesCount: %d", successesCount) + } + return err +} + +// WaitForAllInNamespaceReady returns true if all containers in a given namespace reached the Ready state +func WaitForAllInNamespaceReady(client internal.KubeClient, namespace string, interval, timeout time.Duration, nodes map[string]*ssh.RemoteHost) error { + if err := waitForDaemonSetCondition(client, namespace, allDaemontSetReplicasUpdatedCondition, defaultSuccessesNeeded, interval, timeout); err != nil { + return err + } + if err := waitForDeploymentCondition(client, namespace, allDeploymentReplicasUpdatedCondition, defaultSuccessesNeeded, interval, timeout); err != nil { + return err + } + return waitForPodsCondition(client, namespace, allListedPodsReadyCondition, defaultSuccessesNeeded, interval, timeout) +} + +func allListedPodsReadyCondition(pl *v1.PodList) error { + podsNotReady := make([]string, 0) + for _, pli := range pl.Items { + ready := pli.Status.Phase == v1.PodRunning + for _, c := range pli.Status.ContainerStatuses { + ready = ready && c.State.Running != nil && c.Ready + } + if !ready { + podsNotReady = append(podsNotReady, pli.Name) + } + } + if len(podsNotReady) != 0 { + return errors.Errorf("at least one pod did not reach the Ready state: %s", podsNotReady) + } + return nil +} + +// WaitForReady returns true if all containers in a given pod list reached the Ready state +func WaitForReady(client internal.KubeClient, namespace string, pods []string, interval, timeout time.Duration, nodes map[string]*ssh.RemoteHost) error { + waitFor := allExpectedPodsReadyCondition(pods) + return waitForPodsCondition(client, namespace, waitFor, defaultSuccessesNeeded, interval, timeout) +} + +func allExpectedPodsReadyCondition(expectedPods []string) podsCondition { + return func(pl *v1.PodList) error { + podReady := make(map[string]bool, len(expectedPods)) + for _, n := range expectedPods { + podReady[n] = false + } + for _, pli := range pl.Items { + _, ok := podReady[pli.ObjectMeta.Name] + if !ok { + continue + } + ready := pli.Status.Phase == v1.PodRunning + for _, c := range pli.Status.ContainerStatuses { + ready = ready && c.State.Running != nil && c.Ready + } + podReady[pli.ObjectMeta.Name] = ready + } + podsNotReady := make([]string, 0) + for pod, ready := range podReady { + if !ready { + podsNotReady = append(podsNotReady, pod) + } + } + if len(podsNotReady) != 0 { + return errors.Errorf("at least one pod did not reach the Ready state: %s", podsNotReady) + } + return nil + } +} + +type daemonsetCondition func(*appsv1.DaemonSetList) error + +// waitForDaemonSetCondition fetches the ds in a namespace and checks that daemonsetCondition is met for every ds in the cluster +func waitForDaemonSetCondition(client internal.KubeClient, namespace string, condition daemonsetCondition, successesNeeded int, interval, timeout time.Duration) error { + var listErr, condErr error + var successesCount int + var dsl *appsv1.DaemonSetList + err := wait.PollImmediate(interval, timeout, func() (bool, error) { + dsl, listErr = client.ListDaemonSets(namespace, metav1.ListOptions{}) + if listErr != nil { + return false, listErr + } + if condErr = condition(dsl); condErr != nil { + return false, nil + } + successesCount++ + if successesCount < successesNeeded { + return false, nil + } + return true, nil + }) + if listErr != nil { + return errors.Wrapf(listErr, "condition successesCount: %d", successesCount) + } + if condErr != nil { + return errors.Wrapf(condErr, "condition successesCount: %d", successesCount) + } + return err +} + +func allDaemontSetReplicasUpdatedCondition(dsl *appsv1.DaemonSetList) error { + dsNotReady := make([]string, 0) + for _, dsli := range dsl.Items { + desired := dsli.Status.DesiredNumberScheduled + current := dsli.Status.CurrentNumberScheduled + updated := dsli.Status.UpdatedNumberScheduled + if desired != current || desired != updated { + dsNotReady = append(dsNotReady, dsli.Name) + } + } + if len(dsNotReady) != 0 { + return errors.Errorf("at least one daemonset is still updating replicas: %s", dsNotReady) + } + return nil +} + +type deploymentCondition func(*appsv1.DeploymentList) error + +// waitForDeploymentCondition fetches the deployment in a namespace and checks that deployCondition is met for every deployment in the cluster +func waitForDeploymentCondition(client internal.KubeClient, namespace string, condition deploymentCondition, successesNeeded int, interval, timeout time.Duration) error { + var listErr, condErr error + var successesCount int + var dl *appsv1.DeploymentList + err := wait.PollImmediate(interval, timeout, func() (bool, error) { + dl, listErr = client.ListDeployments(namespace, metav1.ListOptions{}) + if listErr != nil { + return false, listErr + } + if condErr = condition(dl); condErr != nil { + return false, nil + } + successesCount++ + if successesCount < successesNeeded { + return false, nil + } + return true, nil + }) + if listErr != nil { + return errors.Wrapf(listErr, "condition successesCount: %d", successesCount) + } + if condErr != nil { + return errors.Wrapf(condErr, "condition successesCount: %d", successesCount) + } + return err +} + +func allDeploymentReplicasUpdatedCondition(dsl *appsv1.DeploymentList) error { + deployNotReady := make([]string, 0) + for _, dli := range dsl.Items { + desired := dli.Status.Replicas + current := dli.Status.AvailableReplicas + updated := dli.Status.UpdatedReplicas + if desired != current || desired != updated { + deployNotReady = append(deployNotReady, dli.Name) + } + } + if len(deployNotReady) != 0 { + return errors.Errorf("at least one deployment is still updating replicas: %s", deployNotReady) + } + return nil +} + +// WaitForVMsRunning checks that all requiredVMs are running +func WaitForVMsRunning(client internal.ARMClient, resourceGroupName string, requiredVMs []string, interval, timeout time.Duration) error { + var err error + var successesCount int + err = wait.PollImmediate(interval, timeout, func() (bool, error) { + allRunning := true + for _, vm := range requiredVMs { + var state string + state, err = client.GetVirtualMachinePowerState(resourceGroupName, vm) + if err != nil { + return false, nil + } + running := isVirtualMachineRunning(state) + if err != nil { + return false, err + } + allRunning = allRunning && running + } + if !allRunning { + return false, nil + } + successesCount++ + if successesCount < 1 { + return false, nil + } + return true, nil + }) + return err +} + +// WaitForVMSSIntancesRunning checks that all required scale set VMs are running +func WaitForVMSSIntancesRunning(client internal.ARMClient, resourceGroupName, vmssName string, count int, interval, timeout time.Duration) error { + var err error + var successesCount int + err = wait.PollImmediate(interval, timeout, func() (bool, error) { + allRunning := true + for i := 0; i < count; i++ { + var state string + state, err = client.GetVirtualMachineScaleSetInstancePowerState(resourceGroupName, vmssName, fmt.Sprint(i)) + if err != nil { + return false, nil + } + running := isVirtualMachineRunning(state) + if err != nil { + return false, err + } + allRunning = allRunning && running + } + if !allRunning { + return false, nil + } + successesCount++ + if successesCount < 1 { + return false, nil + } + return true, nil + }) + return err +} diff --git a/cmd/rotatecerts/wait_test.go b/cmd/rotatecerts/wait_test.go new file mode 100644 index 0000000000..11c39dea8b --- /dev/null +++ b/cmd/rotatecerts/wait_test.go @@ -0,0 +1,416 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +package rotatecerts + +import ( + "fmt" + "testing" + "time" + + mock "github.com/Azure/aks-engine/cmd/rotatecerts/internal/mock_internal" + gomock "github.com/golang/mock/gomock" + . "github.com/onsi/gomega" + "github.com/pkg/errors" + appsv1 "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" +) + +func TestWaitForNodesCondition(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + falseCond := func(*v1.NodeList) bool { return false } + trueCond := func(*v1.NodeList) bool { return true } + + t.Run("ListNodes fails", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListNodes().Return(nil, errAPIGeneric).AnyTimes() + + _, err := waitForNodesCondition(mock, falseCond, 1, 1*time.Second, 1*time.Minute) + g.Expect(err).To(HaveOccurred()) + g.Expect(errors.Cause(err)).To(Equal(errAPIGeneric)) + }) + + t.Run("Node condition met within timeout period", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + list := &v1.NodeList{} + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListNodes().Return(list, nil).AnyTimes() + + nl, err := waitForNodesCondition(mock, trueCond, 2, 500*time.Millisecond, 2*time.Second) + g.Expect(nl).NotTo(BeNil()) + g.Expect(err).NotTo(HaveOccurred()) + }) + + t.Run("Node condition not met within timeout period", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + list := &v1.NodeList{} + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListNodes().Return(list, nil).AnyTimes() + + _, err := waitForNodesCondition(mock, falseCond, 1, 500*time.Millisecond, 1*time.Second) + g.Expect(err).To(HaveOccurred()) + g.Expect(fmt.Sprint(err)).To(Equal("timed out waiting for the condition")) + }) +} + +func TestAllNodesReadyCondition(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + + nodeReadyCondition := v1.NodeCondition{Type: v1.NodeReady, Status: v1.ConditionTrue} + nodeNotReadyCondition := v1.NodeCondition{Type: v1.NodeReady, Status: v1.ConditionFalse} + + t.Run("All nodes ready, expected nodes included", func(t *testing.T) { + expected := []string{"m1"} + m1 := v1.Node{} + m1.Name = "m1" + m1.Status.Conditions = []v1.NodeCondition{nodeReadyCondition} + a1 := v1.Node{} + a1.Name = "a1" + a1.Status.Conditions = []v1.NodeCondition{nodeReadyCondition} + nl := &v1.NodeList{Items: []v1.Node{m1, a1}} + cond := allNodesReadyCondition(expected)(nl) + g.Expect(cond).To(BeTrue()) + }) + + t.Run("Some node not ready", func(t *testing.T) { + expected := []string{"m1"} + m1 := v1.Node{} + m1.Name = "m1" + m1.Status.Conditions = []v1.NodeCondition{nodeNotReadyCondition} + a1 := v1.Node{} + a1.Name = "a1" + a1.Status.Conditions = []v1.NodeCondition{nodeReadyCondition} + nl := &v1.NodeList{Items: []v1.Node{m1, a1}} + cond := allNodesReadyCondition(expected)(nl) + g.Expect(cond).To(BeFalse()) + + m1.Status.Conditions = []v1.NodeCondition{nodeReadyCondition} + a1.Status.Conditions = []v1.NodeCondition{nodeNotReadyCondition} + cond = allNodesReadyCondition(expected)(nl) + g.Expect(cond).To(BeFalse()) + }) + + t.Run("Expected node missing", func(t *testing.T) { + expected := []string{"m1"} + a1 := v1.Node{} + a1.Name = "a1" + a1.Status.Conditions = []v1.NodeCondition{nodeReadyCondition} + nl := &v1.NodeList{Items: []v1.Node{a1}} + cond := allNodesReadyCondition(expected)(nl) + g.Expect(cond).To(BeFalse()) + }) +} + +func TestWaitForPodsCondition(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + falseCond := func(*v1.PodList) error { return nil } + trueCond := func(*v1.PodList) error { return errors.New("condition not met") } + + t.Run("ListPods fails", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListPods(gomock.Any(), gomock.Any()).Return(nil, errAPIGeneric).AnyTimes() + + err := waitForPodsCondition(mock, "ns", falseCond, 2, 500*time.Millisecond, 200*time.Second) + g.Expect(err).To(HaveOccurred()) + g.Expect(errors.Cause(err)).To(Equal(errAPIGeneric)) + }) + + t.Run("Pod condition met within timeout period", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + list := &v1.PodList{} + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListPods(gomock.Any(), gomock.Any()).Return(list, nil).AnyTimes() + + err := waitForPodsCondition(mock, "ns", falseCond, 2, 500*time.Millisecond, 200*time.Second) + g.Expect(err).NotTo(HaveOccurred()) + }) + + t.Run("Pod condition not met within timeout period", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + list := &v1.PodList{} + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListPods(gomock.Any(), gomock.Any()).Return(list, nil).AnyTimes() + + err := waitForPodsCondition(mock, "ns", trueCond, 1, 500*time.Millisecond, 1*time.Second) + g.Expect(err).To(HaveOccurred()) + g.Expect(fmt.Sprint(err)).To(Equal("condition successesCount: 0: condition not met")) + }) +} + +func TestAllListedPodsReadyCondition(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + + podRunningCondition := v1.ContainerStatus{ + State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}, + Ready: true, + } + podNotRunningCondition := v1.ContainerStatus{ + State: v1.ContainerState{Waiting: &v1.ContainerStateWaiting{}}, + } + + t.Run("Pod not running", func(t *testing.T) { + p1 := v1.Pod{} + p1.Name = "p1" + p1.Status.Phase = v1.PodPending + pl := &v1.PodList{Items: []v1.Pod{p1}} + err := allListedPodsReadyCondition(pl) + g.Expect(err).To(HaveOccurred()) + }) + + t.Run("Container not running", func(t *testing.T) { + p1 := v1.Pod{} + p1.Name = "p1" + p1.Status.Phase = v1.PodRunning + p1.Status.ContainerStatuses = []v1.ContainerStatus{podNotRunningCondition} + pl := &v1.PodList{Items: []v1.Pod{p1}} + err := allListedPodsReadyCondition(pl) + g.Expect(err).To(HaveOccurred()) + }) + + t.Run("Container running", func(t *testing.T) { + p1 := v1.Pod{} + p1.Name = "p1" + p1.Status.Phase = v1.PodRunning + p1.Status.ContainerStatuses = []v1.ContainerStatus{podRunningCondition} + pl := &v1.PodList{Items: []v1.Pod{p1}} + err := allListedPodsReadyCondition(pl) + g.Expect(err).ToNot(HaveOccurred()) + }) + + t.Run("No pods", func(t *testing.T) { + err := allListedPodsReadyCondition(&v1.PodList{}) + g.Expect(err).ToNot(HaveOccurred()) + }) +} + +func TestAllExpectedPodsReadyCondition(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + + podRunningCondition := v1.ContainerStatus{ + State: v1.ContainerState{Running: &v1.ContainerStateRunning{}}, + Ready: true, + } + podNotRunningCondition := v1.ContainerStatus{ + State: v1.ContainerState{Waiting: &v1.ContainerStateWaiting{}}, + } + + t.Run("All expected pods ready", func(t *testing.T) { + expected := []string{"p1"} + p1 := v1.Pod{} + p1.Name = "p1" + p1.Status.Phase = v1.PodRunning + p1.Status.ContainerStatuses = []v1.ContainerStatus{podRunningCondition} + pl := &v1.PodList{Items: []v1.Pod{p1}} + err := allExpectedPodsReadyCondition(expected)(pl) + g.Expect(err).ToNot(HaveOccurred()) + }) + + t.Run("Some expected pods ready", func(t *testing.T) { + expected := []string{"p1"} + p1 := v1.Pod{} + p1.Name = "p1" + p1.Status.Phase = v1.PodRunning + p1.Status.ContainerStatuses = []v1.ContainerStatus{podNotRunningCondition} + pl := &v1.PodList{Items: []v1.Pod{p1}} + err := allExpectedPodsReadyCondition(expected)(pl) + g.Expect(err).To(HaveOccurred()) + }) + + t.Run("Expected pod missing", func(t *testing.T) { + expected := []string{"p2"} + p1 := v1.Pod{} + p1.Name = "p1" + p1.Status.Phase = v1.PodRunning + p1.Status.ContainerStatuses = []v1.ContainerStatus{podRunningCondition} + pl := &v1.PodList{Items: []v1.Pod{p1}} + err := allExpectedPodsReadyCondition(expected)(pl) + g.Expect(err).To(HaveOccurred()) + }) +} + +func TestWaitForDaemonSetCondition(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + falseCond := func(*appsv1.DaemonSetList) error { return nil } + trueCond := func(*appsv1.DaemonSetList) error { return errors.New("condition not met") } + + t.Run("ListDaemonSets fails", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListDaemonSets(gomock.Any(), gomock.Any()).Return(nil, errAPIGeneric).AnyTimes() + + err := waitForDaemonSetCondition(mock, "ns", falseCond, 2, 500*time.Millisecond, 200*time.Second) + g.Expect(err).To(HaveOccurred()) + g.Expect(errors.Cause(err)).To(Equal(errAPIGeneric)) + }) + + t.Run("DaemonSet condition met within timeout period", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + list := &appsv1.DaemonSetList{} + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListDaemonSets(gomock.Any(), gomock.Any()).Return(list, nil).AnyTimes() + + err := waitForDaemonSetCondition(mock, "ns", falseCond, 2, 500*time.Millisecond, 200*time.Second) + g.Expect(err).NotTo(HaveOccurred()) + }) + + t.Run("DaemonSet condition not met within timeout period", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + list := &appsv1.DaemonSetList{} + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListDaemonSets(gomock.Any(), gomock.Any()).Return(list, nil).AnyTimes() + + err := waitForDaemonSetCondition(mock, "ns", trueCond, 1, 500*time.Millisecond, 1*time.Second) + g.Expect(err).To(HaveOccurred()) + g.Expect(fmt.Sprint(err)).To(Equal("condition successesCount: 0: condition not met")) + }) +} + +func TestAllDaemontSetReplicasUpdatedCondition(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + + t.Run("Desired replicas updated and available", func(t *testing.T) { + d1 := appsv1.DaemonSet{} + d1.Name = "p1" + d1.Status.DesiredNumberScheduled = 2 + d1.Status.CurrentNumberScheduled = 2 + d1.Status.UpdatedNumberScheduled = 2 + dl := &appsv1.DaemonSetList{Items: []appsv1.DaemonSet{d1}} + err := allDaemontSetReplicasUpdatedCondition(dl) + g.Expect(err).ToNot(HaveOccurred()) + }) + + t.Run("Not all updated replicas are available", func(t *testing.T) { + d1 := appsv1.DaemonSet{} + d1.Name = "p1" + d1.Status.DesiredNumberScheduled = 2 + d1.Status.CurrentNumberScheduled = 1 + d1.Status.UpdatedNumberScheduled = 2 + dl := &appsv1.DaemonSetList{Items: []appsv1.DaemonSet{d1}} + err := allDaemontSetReplicasUpdatedCondition(dl) + g.Expect(err).To(HaveOccurred()) + g.Expect(err).To(MatchError("at least one daemonset is still updating replicas: [p1]")) + }) + + t.Run("Not all replicas updated their template", func(t *testing.T) { + d1 := appsv1.DaemonSet{} + d1.Name = "p1" + d1.Status.DesiredNumberScheduled = 2 + d1.Status.CurrentNumberScheduled = 2 + d1.Status.UpdatedNumberScheduled = 1 + dl := &appsv1.DaemonSetList{Items: []appsv1.DaemonSet{d1}} + err := allDaemontSetReplicasUpdatedCondition(dl) + g.Expect(err).To(HaveOccurred()) + g.Expect(err).To(MatchError("at least one daemonset is still updating replicas: [p1]")) + }) +} + +func TestWaitForDeploymentCondition(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + falseCond := func(*appsv1.DeploymentList) error { return nil } + trueCond := func(*appsv1.DeploymentList) error { return errors.New("condition not met") } + + t.Run("ListDaemonSets fails", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListDeployments(gomock.Any(), gomock.Any()).Return(nil, errAPIGeneric).AnyTimes() + + err := waitForDeploymentCondition(mock, "ns", falseCond, 2, 500*time.Millisecond, 200*time.Second) + g.Expect(err).To(HaveOccurred()) + g.Expect(errors.Cause(err)).To(Equal(errAPIGeneric)) + }) + + t.Run("Deployment condition met within timeout period", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + list := &appsv1.DeploymentList{} + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListDeployments(gomock.Any(), gomock.Any()).Return(list, nil).AnyTimes() + + err := waitForDeploymentCondition(mock, "ns", falseCond, 2, 500*time.Millisecond, 200*time.Second) + g.Expect(err).NotTo(HaveOccurred()) + }) + + t.Run("Deployment condition not met within timeout period", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + list := &appsv1.DeploymentList{} + mock := mock.NewMockKubeClient(mockCtrl) + mock.EXPECT().ListDeployments(gomock.Any(), gomock.Any()).Return(list, nil).AnyTimes() + + err := waitForDeploymentCondition(mock, "ns", trueCond, 1, 500*time.Millisecond, 1*time.Second) + g.Expect(err).To(HaveOccurred()) + g.Expect(fmt.Sprint(err)).To(Equal("condition successesCount: 0: condition not met")) + }) +} + +func TestAllDeploymentReplicasUpdatedCondition(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + + t.Run("Desired replicas updated and available", func(t *testing.T) { + d1 := appsv1.Deployment{} + d1.Name = "p1" + d1.Status.Replicas = 2 + d1.Status.AvailableReplicas = 2 + d1.Status.UpdatedReplicas = 2 + dl := &appsv1.DeploymentList{Items: []appsv1.Deployment{d1}} + err := allDeploymentReplicasUpdatedCondition(dl) + g.Expect(err).ToNot(HaveOccurred()) + }) + + t.Run("Not all updated replicas are available", func(t *testing.T) { + d1 := appsv1.Deployment{} + d1.Name = "p1" + d1.Status.Replicas = 2 + d1.Status.AvailableReplicas = 1 + d1.Status.UpdatedReplicas = 2 + dl := &appsv1.DeploymentList{Items: []appsv1.Deployment{d1}} + err := allDeploymentReplicasUpdatedCondition(dl) + g.Expect(err).To(HaveOccurred()) + g.Expect(err).To(MatchError("at least one deployment is still updating replicas: [p1]")) + }) + + t.Run("Not all replicas updated their template", func(t *testing.T) { + d1 := appsv1.Deployment{} + d1.Name = "p1" + d1.Status.Replicas = 2 + d1.Status.AvailableReplicas = 2 + d1.Status.UpdatedReplicas = 1 + dl := &appsv1.DeploymentList{Items: []appsv1.Deployment{d1}} + err := allDeploymentReplicasUpdatedCondition(dl) + g.Expect(err).To(HaveOccurred()) + g.Expect(err).To(MatchError("at least one deployment is still updating replicas: [p1]")) + }) +} diff --git a/docs/topics/rotate-certs.md b/docs/topics/rotate-certs.md new file mode 100644 index 0000000000..394406c1a2 --- /dev/null +++ b/docs/topics/rotate-certs.md @@ -0,0 +1,137 @@ +# Rotating Kubernetes Certificates + +## Prerequisites + +This guide assumes that you already have deployed a cluster using `aks-engine` and the cluster is in a healthy state. + +## Certificate Rotation + +This document provides guidance on how to rotate certificates on an existing AKS Engine cluster and recommendations for adopting `aks-engine rotate-certs` as a tool. + +### Know before you go + +In order to ensure that your `aks-engine rotate-certs` operation runs smoothly, there are a few things you should be aware of before getting started. + +1. You will need access to the API Model (`apimodel.json`) that was generated by `aks-engine deploy` or `aks-engine generate` (by default this file is placed into a relative directory that looks like `_output//`). + +1. An `aks-engine rotate-certs` operation causes API Server downtime. + +1. `aks-engine rotate-certs` expects an API model that conforms to the current state of the cluster. `aks-engine rotate-certs` executes remote commands on the cluster nodes and uses the API Model information to establish a secure SSH connection. `aks-engine rotate-certs` also relies on some resources (such as VMs) to be named in accordance with the original `aks-engine` deployment. + +1. `aks-engine rotate-certs` relies upon a working connection to the cluster control plane during certificate rotation, both (1) to validate each step of the process, and (2) to restart/recreate cluster resources like `kube-system` pods and service account tokens. If you are rotating the certificates of a **private cluster**, you must run `aks-engine rotate-certs` from a host VM that has network access to the control plane, for example a jumpbox VM that resides in the same VNET as the master VMs. For more information on private clusters [refer to this documentation](features.md#feat-private-cluster). + +1. If using `aks-engine rotate-certs` in production, it is recommended to stage a certificate rotation test on an cluster that was built to the same specifications (built with the same cluster configuration + the same version of the `aks-engine` command line tool + the same set of enabled addons) as your production cluster before performing the certificate rotation. The reason for this is that AKS Engine supports many different cluster configurations and the extent of E2E testing that the AKS Engine team runs cannot practically cover every possible configuration. Therefore, it is recommended that you ensure in a staging environment that your specific cluster configuration works with `aks-engine rotate-certs` before attempting this potentially destructive operation on your production cluster. + +1. `aks-engine rotate-certs` does **not** guarantees backwards compatibility. If you deployed with `aks-engine` version `0.60.x`, you should prefer executing the certificate rotation process with version `0.60.x`. + +### Parameters + +|Parameter|Required|Description| +|-----------------|---|---| +|--api-model|yes|Relative path to the API model (cluster definition) that declares the expected cluster configuration.| +|--ssh-host|yes|FQDN, or IP address, of an SSH listener that can reach all nodes in the cluster.| +|--linux-ssh-private-key|yes|Path to a valid private SSH key to access the cluster's Linux nodes.| +|--location|yes|Azure location where the cluster is deployed.| +|--subscription-id|yes|Azure subscription where the cluster infra is deployed.| +|--resource-group|yes|Azure resource group where the cluster infra is deployed.| +|--client-id|depends|The Service Principal Client ID. Required if the auth-method is set to client_secret or client_certificate.| +|--client-secret|depends| The Service Principal Client secret. Required if the auth-method is set to client_secret.| +|--certificate-profile|no|Relative path to a JSON file containing the new set of certificates.| +|--force|no|Force execution even if API Server is not responsive.| + +### Simple steps to rotate certificates + +Once you have read all the [requirements](#pre-requirements), run `aks-engine rotate-certs` with the appropriate arguments: + +```bash +./bin/aks-engine rotate-certs \ + --location \ + --api-model \ + --linux-ssh-private-key \ + --ssh-host \ + --resource-group \ + --client-id \ + --client-secret \ + --subscription-id +``` + +For example, + +```bash +./bin/aks-engine rotate-certs \ + --location "westus2" \ + --api-model "_output/my-cluster/apimodel.json" \ + --linux-ssh-private-key "~/.ssh/id_rsa" \ + --ssh-host "my-cluster.westus2.cloudapp.azure.com"\ + --resource-group "my-cluster" \ + --client-id "12345678-XXXX-YYYY-ZZZZ-1234567890ab" \ + --client-secret "12345678-XXXX-YYYY-ZZZZ-1234567890ab" \ + --subscription-id "12345678-XXXX-YYYY-ZZZZ-1234567890ab" +``` + +> Fetching a new set of certificates from Key Vault is not supported at this point. + +## Under The Hood + +A Kubernetes cluster relies on multiple PKIs to secure the communication between its components (apiserver, kubelet, etcd, etc). On an AKS Engine cluster, these multiple PKIs share a single certificate authoritiy (CA). On control plane nodes, `aks-engine rotate-certs` rotates all these PKIs at once and reboots the virtual machine. On agent nodes, only `kubelet` and `kube-proxy` are restarted once the node certificates are replaced. + +If the certificate rotation process halts before completion due to a failure or transient issue (e.x.: network connectivity), it is safe to rerun `aks-engine rotate-certs` using the `--force` flag. + +At a high level, the `aks-engine rotate-certs` command performs the following tasks: + +- backup current set of certificates in directory `_rotate_certs_backup/` (relative to the `--api-model` path) +- generate/load new set of certificate and persist them in local directory `_rotate_certs_output/` (relative to the `--api-model` path) +- distribute certificates to the cluster nodes over SSH +- rotate control plane certificates +- reboot control plane nodes +- rotate agent certificates +- update input `apimodel.json` with new certificates information + +### Generating certificates + +`aks-engine rotate-certs` is able to generate the new set of certificates that will be deployed to the cluster based on the information found in the API model. + +Alternatively, AKS Engine can load a new set of certificates from a JSON file specified in `--certificate-profile`. + +```json +{ + "certificateProfile": { + "caCertificate": "", + "caPrivateKey": "", + "apiServerCertificate": "", + "apiServerPrivateKey": "", + "clientCertificate": "", + "clientPrivateKey": "", + "kubeConfigCertificate": "", + "kubeConfigPrivateKey": "", + "etcdServerCertificate": "", + "etcdServerPrivateKey": "", + "etcdClientCertificate": "", + "etcdClientPrivateKey": "", + "etcdPeerCertificates": ["","",""], + "etcdPeerPrivateKeys": ["","",""] + } +} +``` + +### Certificates distribution + +The new certificates are securely copied to each cluster node before the certificates rotation process starts. On Linux nodes, they are located in directory `/etc/kubernetes/rotate-certs/certs`. On Windows nodes, the directory is `$env:temp`. + +## Best Practices + +### Use a reliable network connection + +`aks-engine rotate-certs` requires the execution of multiple remote commands which are subject to potential failures, mostly if the connection to the cluster nodes is not reliable. + +Executing `aks-engine rotate-certs` from a VM running on the target cloud (Azure or Azure Stack) can drastically reduce the occurence of transient issues. + +## Known Limitations + +### Cluster-autoscaler + +`aks-engine rotate-certs` will not update the ARM template that `cluster-autoscaler` uses to create new cluster nodes. Because the cluster certificates are embedded in the ARM template, the addon won't be able to produce functioning agent nodes after a certificate rotation operation. An `aks-engine upgrade` operation will take care of updating the "new node" template. + +## Troubleshooting + +`aks-engine rotate-certs` logs the output of every step in file `/var/log/azure/rotate-certs.log` (Linux) and `c:\k\rotate-certs.log` (Windows). diff --git a/parts/k8s/rotate-certs.ps1 b/parts/k8s/rotate-certs.ps1 new file mode 100644 index 0000000000..e0efd84646 --- /dev/null +++ b/parts/k8s/rotate-certs.ps1 @@ -0,0 +1,86 @@ +<# +.DESCRIPTION + This script rotates a windows node certificates. + It assumes that client.key, client.crt and ca.crt will be dropped in $env:temp. +#> + +. c:\AzureData\k8s\windowskubeletfunc.ps1 +. c:\AzureData\k8s\kuberneteswindowsfunctions.ps1 + +$global:KubeDir = "c:\k" + +$global:AgentKeyPath = [io.path]::Combine($env:temp, "client.key") +$global:AgentCertificatePath = [io.path]::Combine($env:temp, "client.crt") +$global:CACertificatePath = [io.path]::Combine($env:temp, "ca.crt") + +function Prereqs { + Assert-FileExists $global:AgentKeyPath + Assert-FileExists $global:AgentCertificatePath + Assert-FileExists $global:CACertificatePath +} + +function Backup { + Copy-Item "c:\k\config" "c:\k\config.bak" + Copy-Item "c:\k\ca.crt" "c:\k\ca.crt.bak" +} + +function Update-CACertificate { + Write-Log "Write ca root" + Write-CACert -CACertificate $global:CACertificate -KubeDir $global:KubeDir +} + +function Update-KubeConfig { + Write-Log "Write kube config" + $ClusterConfiguration = ConvertFrom-Json ((Get-Content "c:\k\kubeclusterconfig.json" -ErrorAction Stop) | out-string) + $MasterIP = $ClusterConfiguration.Kubernetes.ControlPlane.IpAddress + + $CloudProviderConfig = ConvertFrom-Json ((Get-Content "c:\k\azure.json" -ErrorAction Stop) | out-string) + $MasterFQDNPrefix = $CloudProviderConfig.ResourceGroup + + $AgentKey = [System.Convert]::ToBase64String([System.Text.Encoding]::UTF8.GetBytes((Get-Content -Raw $AgentKeyPath))) + $AgentCertificate = [System.Convert]::ToBase64String([System.Text.Encoding]::UTF8.GetBytes((Get-Content -Raw $AgentCertificatePath))) + + Write-KubeConfig -CACertificate $global:CACertificate ` + -KubeDir $global:KubeDir ` + -MasterFQDNPrefix $MasterFQDNPrefix ` + -MasterIP $MasterIP ` + -AgentKey $AgentKey ` + -AgentCertificate $AgentCertificate +} + +function Force-Kubelet-CertRotation { + Remove-Item "/var/lib/kubelet/pki/kubelet-client-current.pem" -Force -ErrorAction Ignore + Remove-Item "/var/lib/kubelet/pki/kubelet.crt" -Force -ErrorAction Ignore + Remove-Item "/var/lib/kubelet/pki/kubelet.key" -Force -ErrorAction Ignore + + $err = Retry-Command -Command "c:\k\windowsnodereset.ps1" -Args @{Foo="Bar"} -Retries 3 -RetryDelaySeconds 10 + if(!$err) { + Write-Error 'Error reseting Windows node' + throw $_ + } +} + +function Start-CertRotation { + try + { + $global:CACertificate = [System.Convert]::ToBase64String([System.Text.Encoding]::UTF8.GetBytes((Get-Content -Raw $CACertificatePath))) + + Prereqs + Update-CACertificate + Update-KubeConfig + Force-Kubelet-CertRotation + } + catch + { + Write-Error $_ + throw $_ + } +} + +function Clean { + Remove-Item "c:\k\config.bak" -Force -ErrorAction Ignore + Remove-Item "c:\k\ca.crt.bak" -Force -ErrorAction Ignore + Remove-Item $global:AgentKeyPath -Force -ErrorAction Ignore + Remove-Item $global:AgentCertificatePath -Force -ErrorAction Ignore + Remove-Item $global:CACertificatePath -Force -ErrorAction Ignore +} diff --git a/parts/k8s/rotate-certs.sh b/parts/k8s/rotate-certs.sh new file mode 100644 index 0000000000..bb586f8f9e --- /dev/null +++ b/parts/k8s/rotate-certs.sh @@ -0,0 +1,58 @@ +#!/bin/bash -ex + +export WD=/etc/kubernetes/rotate-certs +export NEW_CERTS_DIR=${WD}/certs + +# copied from cse_helpers.sh, sourcing that file not always works +systemctl_restart() { + retries=$1; wait_sleep=$2; timeout=$3 svcname=$4 + for i in $(seq 1 $retries); do + timeout $timeout systemctl daemon-reload + timeout $timeout systemctl restart $svcname && break || + if [ $i -eq $retries ]; then + return 1 + else + sleep $wait_sleep + fi + done +} + +backup() { + if [ ! -d /etc/kubernetes/certs.bak ]; then + cp -rp /etc/kubernetes/certs/ /etc/kubernetes/certs.bak + fi +} + +cp_certs() { + cp -p ${NEW_CERTS_DIR}/etcdpeer* /etc/kubernetes/certs/ + cp -p ${NEW_CERTS_DIR}/etcdclient* /etc/kubernetes/certs/ + cp -p ${NEW_CERTS_DIR}/etcdserver* /etc/kubernetes/certs/ + cp -p ${NEW_CERTS_DIR}/ca.* /etc/kubernetes/certs/ + cp -p ${NEW_CERTS_DIR}/client.* /etc/kubernetes/certs/ + cp -p ${NEW_CERTS_DIR}/apiserver.* /etc/kubernetes/certs/ + cp -p ${NEW_CERTS_DIR}/kubeconfig ~/.kube/config + + rm -f /var/lib/kubelet/pki/kubelet-client-current.pem +} + +cp_proxy() { + source /etc/environment + /etc/kubernetes/generate-proxy-certs.sh +} + +agent_certs() { + cp -p ${NEW_CERTS_DIR}/ca.* /etc/kubernetes/certs/ + cp -p ${NEW_CERTS_DIR}/client.* /etc/kubernetes/certs/ + + rm -f /var/lib/kubelet/pki/kubelet-client-current.pem + sync + sleep 5 + systemctl_restart 10 5 10 kubelet +} + +cleanup() { + rm -rf ${WD} + rm -rf /etc/kubernetes/certs.bak +} + +"$@" diff --git a/pkg/api/apiloader.go b/pkg/api/apiloader.go index 0fbe8cd71c..df47d1d70c 100644 --- a/pkg/api/apiloader.go +++ b/pkg/api/apiloader.go @@ -164,3 +164,24 @@ func (a *Apiloader) LoadAgentPoolProfile(contents []byte) (*AgentPoolProfile, er } return agentPoolProfile, nil } + +// LoadCertificateProfileFromFile loads a CertificateProfile object from a JSON file +func (a *Apiloader) LoadCertificateProfileFromFile(jsonFile string) (*CertificateProfile, error) { + content, err := ioutil.ReadFile(jsonFile) + if err != nil { + return nil, a.Translator.Errorf("error reading file %s: %s", jsonFile, err.Error()) + } + return a.LoadCertificateProfile(content) +} + +// LoadCertificateProfile marshalls raw data into a strongly typed CertificateProfile return object +func (a *Apiloader) LoadCertificateProfile(content []byte) (*CertificateProfile, error) { + certificateProfile := &CertificateProfile{} + if err := json.Unmarshal(content, &certificateProfile); err != nil { + return nil, err + } + if err := checkJSONKeys(content, reflect.TypeOf(*certificateProfile), reflect.TypeOf(TypeMeta{})); err != nil { + return nil, err + } + return certificateProfile, nil +} diff --git a/pkg/api/apiloader_test.go b/pkg/api/apiloader_test.go index 75d0b57bc0..3666902f4e 100644 --- a/pkg/api/apiloader_test.go +++ b/pkg/api/apiloader_test.go @@ -566,3 +566,26 @@ func TestSerializeContainerService(t *testing.T) { t.Errorf("expected SerializedCS JSON %s, but got %s", expected, string(b)) } } + +func TestLoadCertificateProfileFromFile(t *testing.T) { + locale := gotext.NewLocale(path.Join("..", "..", "translations"), "en_US") + if err := i18n.Initialize(locale); err != nil { + t.Error(err) + } + apiloader := &Apiloader{ + Translator: &i18n.Translator{ + Locale: locale, + }, + } + + _, err := apiloader.LoadCertificateProfileFromFile("../engine/profiles/certificate-profile/kubernetes.json") + if err != nil { + t.Error(err.Error()) + } + + // Test error scenario + _, err = apiloader.LoadCertificateProfileFromFile("../this-file-doesnt-exist.json") + if err == nil { + t.Errorf("expected error passing a non-existent filepath string to apiloader.LoadCertificateProfileFromFile(), instead got nil") + } +} diff --git a/pkg/api/types.go b/pkg/api/types.go index f4d7d3ad3b..01aa289fb7 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -889,6 +889,19 @@ func (p *Properties) AnyAgentIsLinux() bool { return false } +// GetMasterVMNameList returns the control plane VM name list +func (p *Properties) GetMasterVMNameList() []string { + masters := []string{} + for i := 0; i < p.MasterProfile.Count; i++ { + if p.MasterProfile.IsAvailabilitySet() { + masters = append(masters, fmt.Sprintf("%s%d", p.GetMasterVMPrefix(), i)) + } else { + masters = append(masters, fmt.Sprintf("%svmss00000%d", p.GetMasterVMPrefix(), i)) + } + } + return masters +} + // GetMasterVMPrefix returns the prefix of master VMs func (p *Properties) GetMasterVMPrefix() string { return p.K8sOrchestratorName() + "-master-" + p.GetClusterID() + "-" diff --git a/pkg/api/types_test.go b/pkg/api/types_test.go index 4566f1422f..f5ae157722 100644 --- a/pkg/api/types_test.go +++ b/pkg/api/types_test.go @@ -10,6 +10,7 @@ import ( "testing" "github.com/Azure/go-autorest/autorest/to" + . "github.com/onsi/gomega" "github.com/Azure/aks-engine/pkg/api/common" "github.com/Azure/aks-engine/pkg/helpers" @@ -5212,6 +5213,37 @@ func TestKubernetesConfig_RequiresDocker(t *testing.T) { } } +func TestProperties_GetMasterVMNameList(t *testing.T) { + g := NewGomegaWithT(t) + + p := &Properties{ + OrchestratorProfile: &OrchestratorProfile{ + OrchestratorType: Kubernetes, + }, + MasterProfile: &MasterProfile{ + Count: 3, + DNSPrefix: "myprefix1", + AvailabilityProfile: "AvailabilitySet", + }, + } + actual := p.GetMasterVMNameList() + expected := []string{ + fmt.Sprintf("%s-30819786-0", common.LegacyControlPlaneVMPrefix), + fmt.Sprintf("%s-30819786-1", common.LegacyControlPlaneVMPrefix), + fmt.Sprintf("%s-30819786-2", common.LegacyControlPlaneVMPrefix), + } + g.Expect(actual).To(Equal(expected)) + + p.MasterProfile.AvailabilityProfile = "VirtualMachineScaleSets" + actual = p.GetMasterVMNameList() + expected = []string{ + fmt.Sprintf("%s-30819786-vmss000000", common.LegacyControlPlaneVMPrefix), + fmt.Sprintf("%s-30819786-vmss000001", common.LegacyControlPlaneVMPrefix), + fmt.Sprintf("%s-30819786-vmss000002", common.LegacyControlPlaneVMPrefix), + } + g.Expect(actual).To(Equal(expected)) +} + func TestProperties_GetMasterVMPrefix(t *testing.T) { p := &Properties{ OrchestratorProfile: &OrchestratorProfile{ diff --git a/pkg/armhelpers/azurestack/compute.go b/pkg/armhelpers/azurestack/compute.go index 5078176ba1..85b8cb6cbc 100644 --- a/pkg/armhelpers/azurestack/compute.go +++ b/pkg/armhelpers/azurestack/compute.go @@ -11,6 +11,7 @@ import ( "github.com/Azure/aks-engine/pkg/armhelpers" "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2017-03-30/compute" azcompute "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-12-01/compute" + "github.com/pkg/errors" log "github.com/sirupsen/logrus" ) @@ -195,3 +196,23 @@ func (az *AzureClient) GetAvailabilitySetFaultDomainCount(ctx context.Context, r } return count, nil } + +// GetVirtualMachinePowerState returns the virtual machine's PowerState status code +func (az *AzureClient) GetVirtualMachinePowerState(ctx context.Context, resourceGroup, name string) (string, error) { + vm, err := az.virtualMachinesClient.Get(ctx, resourceGroup, name, compute.InstanceView) + if err != nil { + return "", errors.Wrapf(err, "fetching virtual machine resource") + } + for _, status := range *vm.VirtualMachineProperties.InstanceView.Statuses { + if strings.HasPrefix(*status.Code, "PowerState") { + return *status.Code, nil + } + } + return "", nil +} + +// GetVirtualMachineScaleSetInstancePowerState returns the virtual machine's PowerState status code +func (az *AzureClient) GetVirtualMachineScaleSetInstancePowerState(ctx context.Context, resourceGroup, name, instanceID string) (string, error) { + // TODO Pass compute.InstanceView once we upgrade azure stack compute's api version + return "", errors.Errorf("operation not supported") +} diff --git a/pkg/armhelpers/compute.go b/pkg/armhelpers/compute.go index 86ea9430f9..d27bb9eea1 100644 --- a/pkg/armhelpers/compute.go +++ b/pkg/armhelpers/compute.go @@ -8,6 +8,7 @@ import ( "strings" "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-12-01/compute" + "github.com/pkg/errors" ) // ListVirtualMachines returns (the first page of) the machines in the specified resource group. @@ -150,3 +151,31 @@ func (az *AzureClient) GetAvailabilitySetFaultDomainCount(ctx context.Context, r } return count, nil } + +// GetVirtualMachinePowerState returns the virtual machine's PowerState status code +func (az *AzureClient) GetVirtualMachinePowerState(ctx context.Context, resourceGroup, name string) (string, error) { + vm, err := az.virtualMachinesClient.Get(ctx, resourceGroup, name, compute.InstanceView) + if err != nil { + return "", errors.Wrapf(err, "fetching virtual machine resource") + } + for _, status := range *vm.VirtualMachineProperties.InstanceView.Statuses { + if strings.HasPrefix(*status.Code, "PowerState") { + return *status.Code, nil + } + } + return "", nil +} + +// GetVirtualMachineScaleSetInstancePowerState returns the virtual machine's PowerState status code +func (az *AzureClient) GetVirtualMachineScaleSetInstancePowerState(ctx context.Context, resourceGroup, name, instanceID string) (string, error) { + vm, err := az.virtualMachineScaleSetVMsClient.Get(ctx, resourceGroup, name, instanceID, compute.InstanceView) + if err != nil { + return "", errors.Wrapf(err, "fetching virtual machine resource") + } + for _, status := range *vm.VirtualMachineScaleSetVMProperties.InstanceView.Statuses { + if strings.HasPrefix(*status.Code, "PowerState") { + return *status.Code, nil + } + } + return "", nil +} diff --git a/pkg/armhelpers/interfaces.go b/pkg/armhelpers/interfaces.go index 5d2c90c94c..3248b9358b 100644 --- a/pkg/armhelpers/interfaces.go +++ b/pkg/armhelpers/interfaces.go @@ -160,6 +160,12 @@ type AKSEngineClient interface { // VM availability set IDs provided. GetAvailabilitySetFaultDomainCount(ctx context.Context, resourceGroup string, vmasIDs []string) (int, error) + // GetVirtualMachinePowerState returns the virtual machine's PowerState status code + GetVirtualMachinePowerState(ctx context.Context, resourceGroup, name string) (string, error) + + // GetVirtualMachineScaleSetInstancePowerState returns the virtual machine's PowerState status code + GetVirtualMachineScaleSetInstancePowerState(ctx context.Context, resourceGroup, name, instanceID string) (string, error) + // // STORAGE diff --git a/pkg/armhelpers/mockclients.go b/pkg/armhelpers/mockclients.go index c2d131df45..b19f6fd0f6 100644 --- a/pkg/armhelpers/mockclients.go +++ b/pkg/armhelpers/mockclients.go @@ -1074,3 +1074,13 @@ func (mc *MockAKSEngineClient) GetLogAnalyticsWorkspaceInfo(ctx context.Context, return "00000000-0000-0000-0000-000000000000", "4D+vyd5/jScBmsAwZOF/0GOBQ5kuFQc9JVaW+HlnJ58cyePJcwTpks+rVmvgcXGmmyujLDNEVPiT8pB274a9Yg==", "westus", nil } + +// GetVirtualMachinePowerState returns the virtual machine's PowerState status code +func (mc *MockAKSEngineClient) GetVirtualMachinePowerState(ctx context.Context, resourceGroup, name string) (string, error) { + return "", nil +} + +// GetVirtualMachineScaleSetInstancePowerState returns the virtual machine's PowerState status code +func (mc *MockAKSEngineClient) GetVirtualMachineScaleSetInstancePowerState(ctx context.Context, resourceGroup, name, instanceID string) (string, error) { + return "", nil +} diff --git a/pkg/engine/profiles/certificate-profile/kubernetes.json b/pkg/engine/profiles/certificate-profile/kubernetes.json new file mode 100644 index 0000000000..ea0a832d3c --- /dev/null +++ b/pkg/engine/profiles/certificate-profile/kubernetes.json @@ -0,0 +1,20 @@ +{ + "caCertificate": "caCertificate", + "caPrivateKey": "caPrivateKey", + "apiServerCertificate": "apiServerCertificate", + "apiServerPrivateKey": "apiServerPrivateKey", + "clientCertificate": "clientCertificate", + "clientPrivateKey": "clientPrivateKey", + "kubeConfigCertificate": "kubeConfigCertificate", + "kubeConfigPrivateKey": "kubeConfigPrivateKey", + "etcdClientCertificate": "etcdClientCertificate", + "etcdClientPrivateKey": "etcdClientPrivateKey", + "etcdServerCertificate": "etcdServerCertificate", + "etcdServerPrivateKey": "etcdServerPrivateKey", + "etcdPeerCertificates": [ + "etcdPeerCertificate0" + ], + "etcdPeerPrivateKeys": [ + "etcdPeerPrivateKey0" + ] +} \ No newline at end of file diff --git a/pkg/engine/templates_generated.go b/pkg/engine/templates_generated.go index dc20294cbc..930feae98d 100644 --- a/pkg/engine/templates_generated.go +++ b/pkg/engine/templates_generated.go @@ -92,6 +92,8 @@ // ../../parts/k8s/manifests/kubernetesmaster-kube-apiserver.yaml // ../../parts/k8s/manifests/kubernetesmaster-kube-controller-manager.yaml // ../../parts/k8s/manifests/kubernetesmaster-kube-scheduler.yaml +// ../../parts/k8s/rotate-certs.ps1 +// ../../parts/k8s/rotate-certs.sh // ../../parts/k8s/windowsazurecnifunc.ps1 // ../../parts/k8s/windowsazurecnifunc.tests.ps1 // ../../parts/k8s/windowscnifunc.ps1 @@ -18156,6 +18158,184 @@ func k8sManifestsKubernetesmasterKubeSchedulerYaml() (*asset, error) { return a, nil } +var _k8sRotateCertsPs1 = []byte(`<# +.DESCRIPTION + This script rotates a windows node certificates. + It assumes that client.key, client.crt and ca.crt will be dropped in $env:temp. +#> + +. c:\AzureData\k8s\windowskubeletfunc.ps1 +. c:\AzureData\k8s\kuberneteswindowsfunctions.ps1 + +$global:KubeDir = "c:\k" + +$global:AgentKeyPath = [io.path]::Combine($env:temp, "client.key") +$global:AgentCertificatePath = [io.path]::Combine($env:temp, "client.crt") +$global:CACertificatePath = [io.path]::Combine($env:temp, "ca.crt") + +function Prereqs { + Assert-FileExists $global:AgentKeyPath + Assert-FileExists $global:AgentCertificatePath + Assert-FileExists $global:CACertificatePath +} + +function Backup { + Copy-Item "c:\k\config" "c:\k\config.bak" + Copy-Item "c:\k\ca.crt" "c:\k\ca.crt.bak" +} + +function Update-CACertificate { + Write-Log "Write ca root" + Write-CACert -CACertificate $global:CACertificate -KubeDir $global:KubeDir +} + +function Update-KubeConfig { + Write-Log "Write kube config" + $ClusterConfiguration = ConvertFrom-Json ((Get-Content "c:\k\kubeclusterconfig.json" -ErrorAction Stop) | out-string) + $MasterIP = $ClusterConfiguration.Kubernetes.ControlPlane.IpAddress + + $CloudProviderConfig = ConvertFrom-Json ((Get-Content "c:\k\azure.json" -ErrorAction Stop) | out-string) + $MasterFQDNPrefix = $CloudProviderConfig.ResourceGroup + + $AgentKey = [System.Convert]::ToBase64String([System.Text.Encoding]::UTF8.GetBytes((Get-Content -Raw $AgentKeyPath))) + $AgentCertificate = [System.Convert]::ToBase64String([System.Text.Encoding]::UTF8.GetBytes((Get-Content -Raw $AgentCertificatePath))) + + Write-KubeConfig -CACertificate $global:CACertificate ` + "`" + ` + -KubeDir $global:KubeDir ` + "`" + ` + -MasterFQDNPrefix $MasterFQDNPrefix ` + "`" + ` + -MasterIP $MasterIP ` + "`" + ` + -AgentKey $AgentKey ` + "`" + ` + -AgentCertificate $AgentCertificate +} + +function Force-Kubelet-CertRotation { + Remove-Item "/var/lib/kubelet/pki/kubelet-client-current.pem" -Force -ErrorAction Ignore + Remove-Item "/var/lib/kubelet/pki/kubelet.crt" -Force -ErrorAction Ignore + Remove-Item "/var/lib/kubelet/pki/kubelet.key" -Force -ErrorAction Ignore + + $err = Retry-Command -Command "c:\k\windowsnodereset.ps1" -Args @{Foo="Bar"} -Retries 3 -RetryDelaySeconds 10 + if(!$err) { + Write-Error 'Error reseting Windows node' + throw $_ + } +} + +function Start-CertRotation { + try + { + $global:CACertificate = [System.Convert]::ToBase64String([System.Text.Encoding]::UTF8.GetBytes((Get-Content -Raw $CACertificatePath))) + + Prereqs + Update-CACertificate + Update-KubeConfig + Force-Kubelet-CertRotation + } + catch + { + Write-Error $_ + throw $_ + } +} + +function Clean { + Remove-Item "c:\k\config.bak" -Force -ErrorAction Ignore + Remove-Item "c:\k\ca.crt.bak" -Force -ErrorAction Ignore + Remove-Item $global:AgentKeyPath -Force -ErrorAction Ignore + Remove-Item $global:AgentCertificatePath -Force -ErrorAction Ignore + Remove-Item $global:CACertificatePath -Force -ErrorAction Ignore +} +`) + +func k8sRotateCertsPs1Bytes() ([]byte, error) { + return _k8sRotateCertsPs1, nil +} + +func k8sRotateCertsPs1() (*asset, error) { + bytes, err := k8sRotateCertsPs1Bytes() + if err != nil { + return nil, err + } + + info := bindataFileInfo{name: "k8s/rotate-certs.ps1", size: 0, mode: os.FileMode(0), modTime: time.Unix(0, 0)} + a := &asset{bytes: bytes, info: info} + return a, nil +} + +var _k8sRotateCertsSh = []byte(`#!/bin/bash -ex + +export WD=/etc/kubernetes/rotate-certs +export NEW_CERTS_DIR=${WD}/certs + +# copied from cse_helpers.sh, sourcing that file not always works +systemctl_restart() { + retries=$1; wait_sleep=$2; timeout=$3 svcname=$4 + for i in $(seq 1 $retries); do + timeout $timeout systemctl daemon-reload + timeout $timeout systemctl restart $svcname && break || + if [ $i -eq $retries ]; then + return 1 + else + sleep $wait_sleep + fi + done +} + +backup() { + if [ ! -d /etc/kubernetes/certs.bak ]; then + cp -rp /etc/kubernetes/certs/ /etc/kubernetes/certs.bak + fi +} + +cp_certs() { + cp -p ${NEW_CERTS_DIR}/etcdpeer* /etc/kubernetes/certs/ + cp -p ${NEW_CERTS_DIR}/etcdclient* /etc/kubernetes/certs/ + cp -p ${NEW_CERTS_DIR}/etcdserver* /etc/kubernetes/certs/ + cp -p ${NEW_CERTS_DIR}/ca.* /etc/kubernetes/certs/ + cp -p ${NEW_CERTS_DIR}/client.* /etc/kubernetes/certs/ + cp -p ${NEW_CERTS_DIR}/apiserver.* /etc/kubernetes/certs/ + cp -p ${NEW_CERTS_DIR}/kubeconfig ~/.kube/config + + rm -f /var/lib/kubelet/pki/kubelet-client-current.pem +} + +cp_proxy() { + source /etc/environment + /etc/kubernetes/generate-proxy-certs.sh +} + +agent_certs() { + cp -p ${NEW_CERTS_DIR}/ca.* /etc/kubernetes/certs/ + cp -p ${NEW_CERTS_DIR}/client.* /etc/kubernetes/certs/ + + rm -f /var/lib/kubelet/pki/kubelet-client-current.pem + sync + sleep 5 + systemctl_restart 10 5 10 kubelet +} + +cleanup() { + rm -rf ${WD} + rm -rf /etc/kubernetes/certs.bak +} + +"$@" +`) + +func k8sRotateCertsShBytes() ([]byte, error) { + return _k8sRotateCertsSh, nil +} + +func k8sRotateCertsSh() (*asset, error) { + bytes, err := k8sRotateCertsShBytes() + if err != nil { + return nil, err + } + + info := bindataFileInfo{name: "k8s/rotate-certs.sh", size: 0, mode: os.FileMode(0), modTime: time.Unix(0, 0)} + a := &asset{bytes: bytes, info: info} + return a, nil +} + var _k8sWindowsazurecnifuncPs1 = []byte(` # TODO: remove - dead code? @@ -20104,6 +20284,8 @@ var _bindata = map[string]func() (*asset, error){ "k8s/manifests/kubernetesmaster-kube-apiserver.yaml": k8sManifestsKubernetesmasterKubeApiserverYaml, "k8s/manifests/kubernetesmaster-kube-controller-manager.yaml": k8sManifestsKubernetesmasterKubeControllerManagerYaml, "k8s/manifests/kubernetesmaster-kube-scheduler.yaml": k8sManifestsKubernetesmasterKubeSchedulerYaml, + "k8s/rotate-certs.ps1": k8sRotateCertsPs1, + "k8s/rotate-certs.sh": k8sRotateCertsSh, "k8s/windowsazurecnifunc.ps1": k8sWindowsazurecnifuncPs1, "k8s/windowsazurecnifunc.tests.ps1": k8sWindowsazurecnifuncTestsPs1, "k8s/windowscnifunc.ps1": k8sWindowscnifuncPs1, @@ -20260,6 +20442,8 @@ var _bintree = &bintree{nil, map[string]*bintree{ "kubernetesmaster-kube-controller-manager.yaml": {k8sManifestsKubernetesmasterKubeControllerManagerYaml, map[string]*bintree{}}, "kubernetesmaster-kube-scheduler.yaml": {k8sManifestsKubernetesmasterKubeSchedulerYaml, map[string]*bintree{}}, }}, + "rotate-certs.ps1": {k8sRotateCertsPs1, map[string]*bintree{}}, + "rotate-certs.sh": {k8sRotateCertsSh, map[string]*bintree{}}, "windowsazurecnifunc.ps1": {k8sWindowsazurecnifuncPs1, map[string]*bintree{}}, "windowsazurecnifunc.tests.ps1": {k8sWindowsazurecnifuncTestsPs1, map[string]*bintree{}}, "windowscnifunc.ps1": {k8sWindowscnifuncPs1, map[string]*bintree{}}, diff --git a/pkg/helpers/ssh/scp.go b/pkg/helpers/ssh/scp.go index 8961b7d998..86d9542052 100644 --- a/pkg/helpers/ssh/scp.go +++ b/pkg/helpers/ssh/scp.go @@ -15,7 +15,7 @@ import ( // CopyToRemote copies a file to a remote host func CopyToRemote(host *RemoteHost, file *RemoteFile) (combinedOutput string, err error) { - c, err := client(host) + c, err := clientWithRetry(host) if err != nil { return "", errors.Wrap(err, "creating SSH client") } @@ -41,7 +41,7 @@ func CopyFromRemote(host *RemoteHost, remoteFile *RemoteFile, destinationPath st return "", errors.Wrap(err, "opening destination file") } defer f.Close() - c, err := client(host) + c, err := clientWithRetry(host) if err != nil { return "", errors.Wrap(err, "creating SSH client") } diff --git a/pkg/helpers/ssh/ssh.go b/pkg/helpers/ssh/ssh.go index a502239596..c32974264e 100644 --- a/pkg/helpers/ssh/ssh.go +++ b/pkg/helpers/ssh/ssh.go @@ -6,14 +6,17 @@ package ssh import ( "fmt" "io/ioutil" + "time" "github.com/pkg/errors" "golang.org/x/crypto/ssh" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/util/retry" ) // ExecuteRemote executes a script in a remote host func ExecuteRemote(host *RemoteHost, script string) (combinedOutput string, err error) { - c, err := client(host) + c, err := clientWithRetry(host) if err != nil { return "", errors.Wrap(err, "creating SSH client") } @@ -42,6 +45,19 @@ func PublicKeyAuth(sshPrivateKeyPath string) (ssh.AuthMethod, error) { return ssh.PublicKeys(k), nil } +func clientWithRetry(host *RemoteHost) (*ssh.Client, error) { + // TODO Granular retry func + retryFunc := func(err error) bool { return true } + backoff := wait.Backoff{Steps: 300, Duration: 10 * time.Second} + var c *ssh.Client + var err error + err = retry.OnError(backoff, retryFunc, func() error { + c, err = client(host) + return err + }) + return c, err +} + func client(host *RemoteHost) (*ssh.Client, error) { jbConfig, err := config(host.Jumpbox.AuthConfig) if err != nil { @@ -63,7 +79,11 @@ func client(host *RemoteHost) (*ssh.Client, error) { if err != nil { return nil, errors.Wrapf(err, "starting new client connection to host (%s)", host.URI) } - return ssh.NewClient(ncc, chans, reqs), nil + c, err := ssh.NewClient(ncc, chans, reqs), nil + if err != nil { + return nil, errors.Wrapf(err, "creating new ssh client for host (%s)", host.URI) + } + return c, nil } func config(authConfig *AuthConfig) (*ssh.ClientConfig, error) { diff --git a/pkg/helpers/ssh/types.go b/pkg/helpers/ssh/types.go index f44b8cbb97..572d606938 100644 --- a/pkg/helpers/ssh/types.go +++ b/pkg/helpers/ssh/types.go @@ -14,6 +14,15 @@ type RemoteFile struct { Content []byte } +func NewRemoteFile(path, permissions, owner string, content []byte) *RemoteFile { + return &RemoteFile{ + Path: path, + Permissions: permissions, + Owner: owner, + Content: content, + } +} + type AuthConfig struct { User string Password string diff --git a/pkg/kubernetes/client.go b/pkg/kubernetes/client.go index 88c6815953..81ef16740a 100644 --- a/pkg/kubernetes/client.go +++ b/pkg/kubernetes/client.go @@ -14,6 +14,7 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" + "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/clientcmd" @@ -25,18 +26,14 @@ const ( evictionSubresource = "pods/eviction" ) -// kubernetesClientSetClient is a Kubernetes client hooked up to a live api server. -type kubernetesClientSetClient struct { +// ClientSetClient is a Kubernetes client hooked up to a live api server. +type ClientSetClient struct { clientset *kubernetes.Clientset interval, timeout time.Duration } -// TODO This contructor does not follow best practices -// https://github.com/golang/go/wiki/CodeReviewComments#interfaces - // NewClient returns a KubernetesClient hooked up to the api server at the apiserverURL. -func NewClient(apiserverURL, kubeConfig string, interval, timeout time.Duration) (Client, error) { - // creates the clientset +func NewClient(apiserverURL, kubeConfig string, interval, timeout time.Duration) (*ClientSetClient, error) { config, err := clientcmd.BuildConfigFromKubeconfigGetter(apiserverURL, func() (*clientcmdapi.Config, error) { return clientcmd.Load([]byte(kubeConfig)) }) @@ -47,57 +44,92 @@ func NewClient(apiserverURL, kubeConfig string, interval, timeout time.Duration) if err != nil { return nil, err } - return &kubernetesClientSetClient{clientset: clientset, interval: interval, timeout: timeout}, nil + return &ClientSetClient{clientset: clientset, interval: interval, timeout: timeout}, nil } // ListPods returns Pods running on the passed in node. -func (c *kubernetesClientSetClient) ListPods(node *v1.Node) (*v1.PodList, error) { - return c.clientset.CoreV1().Pods(metav1.NamespaceAll).List(metav1.ListOptions{ +func (c *ClientSetClient) ListPods(node *v1.Node) (*v1.PodList, error) { + return c.ListPodsByOptions(metav1.NamespaceAll, metav1.ListOptions{ FieldSelector: fields.SelectorFromSet(fields.Set{"spec.nodeName": node.Name}).String()}) } // ListAllPods returns all Pods running. -func (c *kubernetesClientSetClient) ListAllPods() (*v1.PodList, error) { - return c.clientset.CoreV1().Pods(metav1.NamespaceAll).List(metav1.ListOptions{}) +func (c *ClientSetClient) ListAllPods() (*v1.PodList, error) { + return c.ListPodsByOptions(metav1.NamespaceAll, metav1.ListOptions{}) +} + +// ListPodsByOptions returns Pods based on the passed in list options. +func (c *ClientSetClient) ListPodsByOptions(namespace string, opts metav1.ListOptions) (*v1.PodList, error) { + return c.clientset.CoreV1().Pods(namespace).List(opts) } // ListNodes returns a list of Nodes registered in the api server. -func (c *kubernetesClientSetClient) ListNodes() (*v1.NodeList, error) { +func (c *ClientSetClient) ListNodes() (*v1.NodeList, error) { return c.ListNodesByOptions(metav1.ListOptions{}) } -// ListNodes returns a list of Nodes registered in the api server. -func (c *kubernetesClientSetClient) ListNodesByOptions(opts metav1.ListOptions) (*v1.NodeList, error) { +// ListNodesByOptions returns a list of Nodes registered in the api server. +func (c *ClientSetClient) ListNodesByOptions(opts metav1.ListOptions) (*v1.NodeList, error) { return c.clientset.CoreV1().Nodes().List(opts) } // ListServiceAccounts returns a list of Service Accounts in the provided namespace. -func (c *kubernetesClientSetClient) ListServiceAccounts(namespace string) (*v1.ServiceAccountList, error) { - return c.clientset.CoreV1().ServiceAccounts(namespace).List(metav1.ListOptions{}) +func (c *ClientSetClient) ListServiceAccounts(namespace string) (*v1.ServiceAccountList, error) { + return c.ListServiceAccountsByOptions(namespace, metav1.ListOptions{}) +} + +// ListServiceAccountsByOptions returns a list of Service Accounts in the provided namespace. +func (c *ClientSetClient) ListServiceAccountsByOptions(namespace string, opts metav1.ListOptions) (*v1.ServiceAccountList, error) { + return c.clientset.CoreV1().ServiceAccounts(namespace).List(opts) +} + +// ListDeployments returns a list of deployments in the provided namespace. +func (c *ClientSetClient) ListDeployments(namespace string, opts metav1.ListOptions) (*appsv1.DeploymentList, error) { + return c.clientset.AppsV1().Deployments(namespace).List(opts) +} + +// ListDaemonSets returns a list of daemonsets in the provided namespace. +func (c *ClientSetClient) ListDaemonSets(namespace string, opts metav1.ListOptions) (*appsv1.DaemonSetList, error) { + return c.clientset.AppsV1().DaemonSets(namespace).List(opts) +} + +// ListSecrets returns a list of secrets in the provided namespace. +func (c *ClientSetClient) ListSecrets(namespace string, opts metav1.ListOptions) (*v1.SecretList, error) { + return c.clientset.CoreV1().Secrets(namespace).List(opts) +} + +// PatchDeployment applies a JSON patch to a deployment in the provided namespace. +func (c *ClientSetClient) PatchDeployment(namespace, name, jsonPatch string) (*appsv1.Deployment, error) { + return c.clientset.AppsV1().Deployments(namespace).Patch(name, types.StrategicMergePatchType, []byte(jsonPatch)) +} + +// PatchDaemonSet applies a JSON patch to a daemonset in the provided namespace. +func (c *ClientSetClient) PatchDaemonSet(namespace, name, jsonPatch string) (*appsv1.DaemonSet, error) { + return c.clientset.AppsV1().DaemonSets(namespace).Patch(name, types.StrategicMergePatchType, []byte(jsonPatch)) } // GetNode returns details about node with passed in name. -func (c *kubernetesClientSetClient) GetNode(name string) (*v1.Node, error) { +func (c *ClientSetClient) GetNode(name string) (*v1.Node, error) { return c.clientset.CoreV1().Nodes().Get(name, metav1.GetOptions{}) } // UpdateNode updates the node in the api server with the passed in info. -func (c *kubernetesClientSetClient) UpdateNode(node *v1.Node) (*v1.Node, error) { +func (c *ClientSetClient) UpdateNode(node *v1.Node) (*v1.Node, error) { return c.clientset.CoreV1().Nodes().Update(node) } // DeleteNode deregisters the node in the api server. -func (c *kubernetesClientSetClient) DeleteNode(name string) error { +func (c *ClientSetClient) DeleteNode(name string) error { return c.clientset.CoreV1().Nodes().Delete(name, &metav1.DeleteOptions{}) } // DeleteServiceAccount deletes the passed in service account. -func (c *kubernetesClientSetClient) DeleteServiceAccount(sa *v1.ServiceAccount) error { +func (c *ClientSetClient) DeleteServiceAccount(sa *v1.ServiceAccount) error { return c.clientset.CoreV1().ServiceAccounts(sa.Namespace).Delete(sa.Name, &metav1.DeleteOptions{}) } // SupportEviction queries the api server to discover if it supports eviction, and returns supported type if it is supported. -func (c *kubernetesClientSetClient) SupportEviction() (string, error) { +func (c *ClientSetClient) SupportEviction() (string, error) { discoveryClient := c.clientset.Discovery() groupList, err := discoveryClient.ServerGroups() if err != nil { @@ -128,27 +160,37 @@ func (c *kubernetesClientSetClient) SupportEviction() (string, error) { } // DeleteClusterRole deletes the passed in cluster role. -func (c *kubernetesClientSetClient) DeleteClusterRole(role *rbacv1.ClusterRole) error { +func (c *ClientSetClient) DeleteClusterRole(role *rbacv1.ClusterRole) error { return c.clientset.RbacV1().ClusterRoles().Delete(role.Name, &metav1.DeleteOptions{}) } // DeleteDaemonSet deletes the passed in daemonset. -func (c *kubernetesClientSetClient) DeleteDaemonSet(daemonset *appsv1.DaemonSet) error { +func (c *ClientSetClient) DeleteDaemonSet(daemonset *appsv1.DaemonSet) error { return c.clientset.AppsV1().DaemonSets(daemonset.Namespace).Delete(daemonset.Name, &metav1.DeleteOptions{}) } // DeleteDeployment deletes the passed in daemonset. -func (c *kubernetesClientSetClient) DeleteDeployment(deployment *appsv1.Deployment) error { +func (c *ClientSetClient) DeleteDeployment(deployment *appsv1.Deployment) error { return c.clientset.AppsV1().Deployments(deployment.Namespace).Delete(deployment.Name, &metav1.DeleteOptions{}) } // DeletePod deletes the passed in pod. -func (c *kubernetesClientSetClient) DeletePod(pod *v1.Pod) error { +func (c *ClientSetClient) DeletePod(pod *v1.Pod) error { return c.clientset.CoreV1().Pods(pod.Namespace).Delete(pod.Name, &metav1.DeleteOptions{}) } +// DeletePods deletes all pods in a namespace that match the option filters. +func (c *ClientSetClient) DeletePods(namespace string, opts metav1.ListOptions) error { + return c.clientset.CoreV1().Pods(namespace).DeleteCollection(&metav1.DeleteOptions{}, opts) +} + +// DeleteSecret deletes the passed in secret. +func (c *ClientSetClient) DeleteSecret(secret *v1.Secret) error { + return c.clientset.CoreV1().Secrets(secret.Namespace).Delete(secret.Name, &metav1.DeleteOptions{}) +} + // EvictPod evicts the passed in pod using the passed in api version. -func (c *kubernetesClientSetClient) EvictPod(pod *v1.Pod, policyGroupVersion string) error { +func (c *ClientSetClient) EvictPod(pod *v1.Pod, policyGroupVersion string) error { eviction := &policy.Eviction{ TypeMeta: metav1.TypeMeta{ APIVersion: policyGroupVersion, @@ -163,12 +205,12 @@ func (c *kubernetesClientSetClient) EvictPod(pod *v1.Pod, policyGroupVersion str } // GetPod returns the pod. -func (c *kubernetesClientSetClient) getPod(namespace, name string) (*v1.Pod, error) { +func (c *ClientSetClient) getPod(namespace, name string) (*v1.Pod, error) { return c.clientset.CoreV1().Pods(namespace).Get(name, metav1.GetOptions{}) } // WaitForDelete waits until all pods are deleted. Returns all pods not deleted and an error on failure. -func (c *kubernetesClientSetClient) WaitForDelete(logger *log.Entry, pods []v1.Pod, usingEviction bool) ([]v1.Pod, error) { +func (c *ClientSetClient) WaitForDelete(logger *log.Entry, pods []v1.Pod, usingEviction bool) ([]v1.Pod, error) { verbStr := "deleted" if usingEviction { verbStr = "evicted" @@ -196,16 +238,16 @@ func (c *kubernetesClientSetClient) WaitForDelete(logger *log.Entry, pods []v1.P } // GetDaemonSet returns a given daemonset in a namespace. -func (c *kubernetesClientSetClient) GetDaemonSet(namespace, name string) (*appsv1.DaemonSet, error) { +func (c *ClientSetClient) GetDaemonSet(namespace, name string) (*appsv1.DaemonSet, error) { return c.clientset.AppsV1().DaemonSets(namespace).Get(name, metav1.GetOptions{}) } // GetDeployment returns a given deployment in a namespace. -func (c *kubernetesClientSetClient) GetDeployment(namespace, name string) (*appsv1.Deployment, error) { +func (c *ClientSetClient) GetDeployment(namespace, name string) (*appsv1.Deployment, error) { return c.clientset.AppsV1().Deployments(namespace).Get(name, metav1.GetOptions{}) } // UpdateDeployment updates a deployment to match the given specification. -func (c *kubernetesClientSetClient) UpdateDeployment(namespace string, deployment *appsv1.Deployment) (*appsv1.Deployment, error) { +func (c *ClientSetClient) UpdateDeployment(namespace string, deployment *appsv1.Deployment) (*appsv1.Deployment, error) { return c.clientset.AppsV1().Deployments(namespace).Update(deployment) } diff --git a/pkg/kubernetes/composite_client.go b/pkg/kubernetes/composite_client.go new file mode 100644 index 0000000000..85e32fc20a --- /dev/null +++ b/pkg/kubernetes/composite_client.go @@ -0,0 +1,457 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +package kubernetes + +import ( + "crypto/x509" + "net/url" + "time" + + "github.com/Azure/aks-engine/pkg/kubernetes/internal" + appsv1 "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/util/retry" +) + +// CompositeClientSet wraps a pair of Kubernetes clients hooked up to a live api server. +// +// Prefer this client when the cluster CA is expected to change (ex.: secret rotation operations). +type CompositeClientSet struct { + oldCAClient internal.Client + newCAClient internal.Client + timeout time.Duration + backoff wait.Backoff + retryFunc func(err error) bool +} + +// NewCompositeClient returns a KubernetesClient hooked up to the api server at the apiserverURL. +func NewCompositeClient(oldCAClient, newCAClient internal.Client, interval, timeout time.Duration) *CompositeClientSet { + return &CompositeClientSet{ + oldCAClient: oldCAClient, + newCAClient: newCAClient, + timeout: timeout, + backoff: wait.Backoff{ + Steps: int(int64(timeout/time.Millisecond) / int64(interval/time.Millisecond)), + Duration: interval, + Factor: 1.0, + Jitter: 0.0, + }, + retryFunc: retriable, // Inject if ever needed + } +} + +// retriable returns true unless err is an x509.UnknownAuthorityError instance +func retriable(err error) bool { + switch err := err.(type) { + case x509.UnknownAuthorityError: + return false + case *url.Error: + return retriable(err.Unwrap()) + default: + return true + } +} + +type listPodsResult struct { + x *v1.PodList + err error +} + +// ListPods returns Pods based on the passed in list options. +func (c *CompositeClientSet) ListPods(namespace string, opts metav1.ListOptions) (*v1.PodList, error) { + lastError := wait.ErrWaitTimeout + result := func(oldCAClient, newCAClient internal.Client) <-chan listPodsResult { + stream := make(chan listPodsResult) + exec := func(client internal.Client) { + _ = retry.OnError(c.backoff, c.retryFunc, func() error { + x, err := client.ListPodsByOptions(namespace, opts) + if err != nil { + lastError = err + return err + } + stream <- listPodsResult{x, err} + return nil + }) + } + go exec(oldCAClient) + go exec(newCAClient) + return stream + }(c.oldCAClient, c.newCAClient) + for { + select { + case res := <-result: + return res.x, res.err + case <-time.After(c.timeout): + return nil, lastError + } + } +} + +type listNodesResult struct { + x *v1.NodeList + err error +} + +// ListNodes returns a list of Nodes registered in the api server. +func (c *CompositeClientSet) ListNodes() (x *v1.NodeList, err error) { + lastError := wait.ErrWaitTimeout + result := func(oldCAClient, newCAClient internal.Client) <-chan listNodesResult { + stream := make(chan listNodesResult) + exec := func(client internal.Client) { + _ = retry.OnError(c.backoff, c.retryFunc, func() error { + x, err := client.ListNodes() + if err != nil { + lastError = err + return err + } + stream <- listNodesResult{x, err} + return nil + }) + } + go exec(oldCAClient) + go exec(newCAClient) + return stream + }(c.oldCAClient, c.newCAClient) + for { + select { + case res := <-result: + return res.x, res.err + case <-time.After(c.timeout): + return nil, lastError + } + } +} + +type listServiceAccountsResult struct { + x *v1.ServiceAccountList + err error +} + +// ListServiceAccounts returns a list of Service Accounts in the provided namespace. +func (c *CompositeClientSet) ListServiceAccounts(namespace string, opts metav1.ListOptions) (*v1.ServiceAccountList, error) { + lastError := wait.ErrWaitTimeout + result := func(oldCAClient, newCAClient internal.Client) <-chan listServiceAccountsResult { + stream := make(chan listServiceAccountsResult) + exec := func(client internal.Client) { + _ = retry.OnError(c.backoff, c.retryFunc, func() error { + x, err := client.ListServiceAccountsByOptions(namespace, opts) + if err != nil { + lastError = err + return err + } + stream <- listServiceAccountsResult{x, err} + return nil + }) + } + go exec(oldCAClient) + go exec(newCAClient) + return stream + }(c.oldCAClient, c.newCAClient) + for { + select { + case res := <-result: + return res.x, res.err + case <-time.After(c.timeout): + return nil, lastError + } + } +} + +type listDeploymentsResult struct { + x *appsv1.DeploymentList + err error +} + +// ListDeployments returns a list of deployments in the provided namespace. +func (c *CompositeClientSet) ListDeployments(namespace string, opts metav1.ListOptions) (*appsv1.DeploymentList, error) { + lastError := wait.ErrWaitTimeout + result := func(oldCAClient, newCAClient internal.Client) <-chan listDeploymentsResult { + stream := make(chan listDeploymentsResult) + exec := func(client internal.Client) { + _ = retry.OnError(c.backoff, c.retryFunc, func() error { + x, err := client.ListDeployments(namespace, opts) + if err != nil { + lastError = err + return err + } + stream <- listDeploymentsResult{x, err} + return nil + }) + } + go exec(oldCAClient) + go exec(newCAClient) + return stream + }(c.oldCAClient, c.newCAClient) + for { + select { + case res := <-result: + return res.x, res.err + case <-time.After(c.timeout): + return nil, lastError + } + } +} + +type listDaemonSetsResult struct { + x *appsv1.DaemonSetList + err error +} + +// ListDaemonSets returns a list of daemonsets in the provided namespace. +func (c *CompositeClientSet) ListDaemonSets(namespace string, opts metav1.ListOptions) (*appsv1.DaemonSetList, error) { + lastError := wait.ErrWaitTimeout + result := func(oldCAClient, newCAClient internal.Client) <-chan listDaemonSetsResult { + stream := make(chan listDaemonSetsResult) + exec := func(client internal.Client) { + _ = retry.OnError(c.backoff, c.retryFunc, func() error { + x, err := client.ListDaemonSets(namespace, opts) + if err != nil { + lastError = err + return err + } + stream <- listDaemonSetsResult{x, err} + return nil + }) + } + go exec(oldCAClient) + go exec(newCAClient) + return stream + }(c.oldCAClient, c.newCAClient) + for { + select { + case res := <-result: + return res.x, res.err + case <-time.After(c.timeout): + return nil, lastError + } + } +} + +type listSecretsResult struct { + x *v1.SecretList + err error +} + +// ListSecrets returns a list of secrets in the provided namespace. +func (c *CompositeClientSet) ListSecrets(namespace string, opts metav1.ListOptions) (*v1.SecretList, error) { + lastError := wait.ErrWaitTimeout + result := func(oldCAClient, newCAClient internal.Client) <-chan listSecretsResult { + stream := make(chan listSecretsResult) + exec := func(client internal.Client) { + _ = retry.OnError(c.backoff, c.retryFunc, func() error { + x, err := client.ListSecrets(namespace, opts) + if err != nil { + lastError = err + return err + } + stream <- listSecretsResult{x, err} + return nil + }) + } + go exec(oldCAClient) + go exec(newCAClient) + return stream + }(c.oldCAClient, c.newCAClient) + for { + select { + case res := <-result: + return res.x, res.err + case <-time.After(c.timeout): + return nil, lastError + } + } +} + +type deploymentResult struct { + x *appsv1.Deployment + err error +} + +// GetDeployment blah. +func (c *CompositeClientSet) GetDeployment(namespace, name string) (*appsv1.Deployment, error) { + lastError := wait.ErrWaitTimeout + result := func(oldCAClient, newCAClient internal.Client) <-chan deploymentResult { + stream := make(chan deploymentResult) + exec := func(client internal.Client) { + _ = retry.OnError(c.backoff, c.retryFunc, func() error { + x, err := client.GetDeployment(namespace, name) + if err == nil || apierrors.IsNotFound(err) { + stream <- deploymentResult{x, err} + return nil + } + lastError = err + return err + }) + } + go exec(oldCAClient) + go exec(newCAClient) + return stream + }(c.oldCAClient, c.newCAClient) + for { + select { + case res := <-result: + return res.x, res.err + case <-time.After(c.timeout): + return nil, lastError + } + } +} + +// PatchDeployment applies a JSON patch to a deployment in the provided namespace. +func (c *CompositeClientSet) PatchDeployment(namespace, name, jsonPatch string) (*appsv1.Deployment, error) { + lastError := wait.ErrWaitTimeout + result := func(oldCAClient, newCAClient internal.Client) <-chan deploymentResult { + stream := make(chan deploymentResult) + exec := func(client internal.Client) { + _ = retry.OnError(c.backoff, c.retryFunc, func() error { + x, err := client.PatchDeployment(namespace, name, jsonPatch) + if err == nil || apierrors.IsNotFound(err) { + stream <- deploymentResult{x, err} + return nil + } + lastError = err + return err + }) + } + go exec(oldCAClient) + go exec(newCAClient) + return stream + }(c.oldCAClient, c.newCAClient) + for { + select { + case res := <-result: + return res.x, res.err + case <-time.After(c.timeout): + return nil, lastError + } + } +} + +type daemonsetResult struct { + x *appsv1.DaemonSet + err error +} + +// PatchDaemonSet applies a JSON patch to a daemonset in the provided namespace. +func (c *CompositeClientSet) PatchDaemonSet(namespace, name, jsonPatch string) (*appsv1.DaemonSet, error) { + lastError := wait.ErrWaitTimeout + result := func(oldCAClient, newCAClient internal.Client) <-chan daemonsetResult { + stream := make(chan daemonsetResult) + exec := func(client internal.Client) { + _ = retry.OnError(c.backoff, c.retryFunc, func() error { + x, err := client.PatchDaemonSet(namespace, name, jsonPatch) + if err == nil || apierrors.IsNotFound(err) { + stream <- daemonsetResult{x, err} + return nil + } + lastError = err + return err + }) + } + go exec(oldCAClient) + go exec(newCAClient) + return stream + }(c.oldCAClient, c.newCAClient) + for { + select { + case res := <-result: + return res.x, res.err + case <-time.After(c.timeout): + return nil, lastError + } + } +} + +// DeletePods deletes all pods in a namespace that match the option filters. +func (c *CompositeClientSet) DeletePods(namespace string, opts metav1.ListOptions) error { + lastError := wait.ErrWaitTimeout + result := func(oldCAClient, newCAClient internal.Client) <-chan error { + stream := make(chan error) + exec := func(client internal.Client) { + _ = retry.OnError(c.backoff, c.retryFunc, func() error { + err := client.DeletePods(namespace, opts) + if err == nil || apierrors.IsNotFound(err) { + stream <- err + return nil + } + lastError = err + return err + }) + } + go exec(oldCAClient) + go exec(newCAClient) + return stream + }(c.oldCAClient, c.newCAClient) + for { + select { + case err := <-result: + return err + case <-time.After(c.timeout): + return lastError + } + } +} + +// DeleteServiceAccount deletes the passed in service account. +func (c *CompositeClientSet) DeleteServiceAccount(serviceAccount *v1.ServiceAccount) error { + lastError := wait.ErrWaitTimeout + result := func(oldCAClient, newCAClient internal.Client) <-chan error { + stream := make(chan error) + exec := func(client internal.Client) { + _ = retry.OnError(c.backoff, c.retryFunc, func() error { + err := client.DeleteServiceAccount(serviceAccount) + if err == nil || apierrors.IsNotFound(err) { + stream <- err + return nil + } + lastError = err + return err + }) + } + go exec(oldCAClient) + go exec(newCAClient) + return stream + }(c.oldCAClient, c.newCAClient) + for { + select { + case err := <-result: + return err + case <-time.After(c.timeout): + return lastError + } + } +} + +// DeleteSecret deletes the passed in secret. +func (c *CompositeClientSet) DeleteSecret(secret *v1.Secret) error { + lastError := wait.ErrWaitTimeout + result := func(oldCAClient, newCAClient internal.Client) <-chan error { + stream := make(chan error) + exec := func(client internal.Client) { + _ = retry.OnError(c.backoff, c.retryFunc, func() error { + err := client.DeleteSecret(secret) + if err == nil || apierrors.IsNotFound(err) { + stream <- err + return nil + } + lastError = err + return err + }) + } + go exec(oldCAClient) + go exec(newCAClient) + return stream + }(c.oldCAClient, c.newCAClient) + for { + select { + case err := <-result: + return err + case <-time.After(c.timeout): + return lastError + } + } +} diff --git a/pkg/kubernetes/composite_client_test.go b/pkg/kubernetes/composite_client_test.go new file mode 100644 index 0000000000..3a571fd605 --- /dev/null +++ b/pkg/kubernetes/composite_client_test.go @@ -0,0 +1,639 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +package kubernetes + +import ( + "crypto/x509" + "errors" + "net/url" + "testing" + "time" + + mock "github.com/Azure/aks-engine/pkg/kubernetes/internal/mock_internal" + gomock "github.com/golang/mock/gomock" + . "github.com/onsi/gomega" + appsv1 "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +var ( + errAPIGeneric = errors.New("generic api error") + errAPINotFound = &apierrors.StatusError{ErrStatus: metav1.Status{Reason: metav1.StatusReasonNotFound}} + unknownAuthorityError = &url.Error{Err: x509.UnknownAuthorityError{}} +) + +func TestListPods(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + + ns, opts := "ns", metav1.ListOptions{} + result := &v1.PodList{} + + t.Run("good client success after a retry, bad client tries only once", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + gomock.InOrder( + oldCAClientMock.EXPECT().ListPodsByOptions(ns, opts).Return(nil, errAPIGeneric), + oldCAClientMock.EXPECT().ListPodsByOptions(ns, opts).Return(result, nil), + ) + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().ListPodsByOptions(ns, opts).Return(nil, unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + x, err := sut.ListPods(ns, opts) + g.Expect(x).To(Equal(result)) + g.Expect(err).NotTo(HaveOccurred()) + }) + + t.Run("both clients fail, process times out", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + oldCAClientMock.EXPECT().ListPodsByOptions(ns, opts).Return(nil, errAPIGeneric).AnyTimes() + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().ListPodsByOptions(ns, opts).Return(nil, unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + x, err := sut.ListPods(ns, opts) + g.Expect(x).To(BeNil()) + g.Expect(err).To(HaveOccurred()) + g.Expect(err).To(Equal(errAPIGeneric)) + }) +} + +func TestDeletePods(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + + ns, opts := "ns", metav1.ListOptions{} + + t.Run("good client success after a retry, bad client tries only once", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + gomock.InOrder( + oldCAClientMock.EXPECT().DeletePods(ns, opts).Return(errAPIGeneric), + oldCAClientMock.EXPECT().DeletePods(ns, opts).Return(nil), + ) + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().DeletePods(ns, opts).Return(unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + err := sut.DeletePods(ns, opts) + g.Expect(err).NotTo(HaveOccurred()) + }) + + t.Run("good client success after a 404, bad client tries only once", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + oldCAClientMock.EXPECT().DeletePods(ns, opts).Return(errAPINotFound).MaxTimes(1) + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().DeletePods(ns, opts).Return(unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + err := sut.DeletePods(ns, opts) + g.Expect(err).To(HaveOccurred()) + g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) + }) + + t.Run("both clients fail, process times out", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + oldCAClientMock.EXPECT().DeletePods(ns, opts).Return(errAPIGeneric).AnyTimes() + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().DeletePods(ns, opts).Return(unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + err := sut.DeletePods(ns, opts) + g.Expect(err).To(HaveOccurred()) + g.Expect(err).To(Equal(errAPIGeneric)) + }) +} + +func TestListNodes(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + + result := &v1.NodeList{} + + t.Run("good client success after a retry, bad client tries only once", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + gomock.InOrder( + oldCAClientMock.EXPECT().ListNodes().Return(nil, errAPIGeneric), + oldCAClientMock.EXPECT().ListNodes().Return(result, nil), + ) + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().ListNodes().Return(nil, unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + x, err := sut.ListNodes() + g.Expect(x).To(Equal(result)) + g.Expect(err).NotTo(HaveOccurred()) + }) + + t.Run("both clients fail, process times out", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + oldCAClientMock.EXPECT().ListNodes().Return(nil, errAPIGeneric).AnyTimes() + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().ListNodes().Return(nil, unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + x, err := sut.ListNodes() + g.Expect(x).To(BeNil()) + g.Expect(err).To(HaveOccurred()) + g.Expect(err).To(Equal(errAPIGeneric)) + }) +} + +func TestListServiceAccounts(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + + ns, opts := "ns", metav1.ListOptions{} + result := &v1.ServiceAccountList{} + + t.Run("good client success after a retry, bad client tries only once", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + gomock.InOrder( + oldCAClientMock.EXPECT().ListServiceAccountsByOptions(ns, opts).Return(nil, errAPIGeneric), + oldCAClientMock.EXPECT().ListServiceAccountsByOptions(ns, opts).Return(result, nil), + ) + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().ListServiceAccountsByOptions(ns, opts).Return(nil, unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + x, err := sut.ListServiceAccounts(ns, opts) + g.Expect(x).To(Equal(result)) + g.Expect(err).NotTo(HaveOccurred()) + }) + + t.Run("both clients fail, process times out", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + oldCAClientMock.EXPECT().ListServiceAccountsByOptions(ns, opts).Return(nil, errAPIGeneric).AnyTimes() + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().ListServiceAccountsByOptions(ns, opts).Return(nil, unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + x, err := sut.ListServiceAccounts(ns, opts) + g.Expect(x).To(BeNil()) + g.Expect(err).To(HaveOccurred()) + g.Expect(err).To(Equal(errAPIGeneric)) + }) +} + +func TestListDeployments(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + + ns, opts := "ns", metav1.ListOptions{} + result := &appsv1.DeploymentList{} + + t.Run("good client success after a retry, bad client tries only once", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + gomock.InOrder( + oldCAClientMock.EXPECT().ListDeployments(ns, opts).Return(nil, errAPIGeneric), + oldCAClientMock.EXPECT().ListDeployments(ns, opts).Return(result, nil), + ) + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().ListDeployments(ns, opts).Return(nil, unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + x, err := sut.ListDeployments(ns, opts) + g.Expect(x).To(Equal(result)) + g.Expect(err).NotTo(HaveOccurred()) + }) + + t.Run("both clients fail, process times out", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + oldCAClientMock.EXPECT().ListDeployments(ns, opts).Return(nil, errAPIGeneric).AnyTimes() + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().ListDeployments(ns, opts).Return(nil, unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + x, err := sut.ListDeployments(ns, opts) + g.Expect(x).To(BeNil()) + g.Expect(err).To(HaveOccurred()) + g.Expect(err).To(Equal(errAPIGeneric)) + }) +} + +func TestGetDeployment(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + + ns, name := "ns", "name" + result := &appsv1.Deployment{} + + t.Run("good client success after a retry, bad client tries only once", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + gomock.InOrder( + oldCAClientMock.EXPECT().GetDeployment(ns, name).Return(nil, errAPIGeneric), + oldCAClientMock.EXPECT().GetDeployment(ns, name).Return(result, nil), + ) + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().GetDeployment(ns, name).Return(nil, unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + x, err := sut.GetDeployment(ns, name) + g.Expect(x).To(Equal(result)) + g.Expect(err).NotTo(HaveOccurred()) + }) + + t.Run("good client success after a 404, bad client tries only once", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + oldCAClientMock.EXPECT().GetDeployment(ns, name).Return(nil, errAPINotFound).AnyTimes() + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().GetDeployment(ns, name).Return(nil, unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + x, err := sut.GetDeployment(ns, name) + g.Expect(x).To(BeNil()) + g.Expect(err).To(HaveOccurred()) + g.Expect(err).To(Equal(errAPINotFound)) + }) + + t.Run("both clients fail, process times out", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + oldCAClientMock.EXPECT().GetDeployment(ns, name).Return(nil, errAPIGeneric).AnyTimes() + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().GetDeployment(ns, name).Return(nil, unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + x, err := sut.GetDeployment(ns, name) + g.Expect(x).To(BeNil()) + g.Expect(err).To(HaveOccurred()) + g.Expect(err).To(Equal(errAPIGeneric)) + }) +} + +func TestPatchDeployment(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + + ns, name, json := "ns", "name", "patch" + result := &appsv1.Deployment{} + + t.Run("good client success after a retry, bad client tries only once", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + gomock.InOrder( + oldCAClientMock.EXPECT().PatchDeployment(ns, name, json).Return(nil, errAPIGeneric), + oldCAClientMock.EXPECT().PatchDeployment(ns, name, json).Return(result, nil), + ) + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().PatchDeployment(ns, name, json).Return(nil, unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + x, err := sut.PatchDeployment(ns, name, json) + g.Expect(x).To(Equal(result)) + g.Expect(err).NotTo(HaveOccurred()) + }) + + t.Run("good client success after a 404, bad client tries only once", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + oldCAClientMock.EXPECT().PatchDeployment(ns, name, json).Return(nil, errAPINotFound).AnyTimes() + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().PatchDeployment(ns, name, json).Return(nil, unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + x, err := sut.PatchDeployment(ns, name, json) + g.Expect(x).To(BeNil()) + g.Expect(err).To(HaveOccurred()) + g.Expect(err).To(Equal(errAPINotFound)) + }) + + t.Run("both clients fail, process times out", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + oldCAClientMock.EXPECT().PatchDeployment(ns, name, json).Return(nil, errAPIGeneric).AnyTimes() + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().PatchDeployment(ns, name, json).Return(nil, unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + x, err := sut.PatchDeployment(ns, name, json) + g.Expect(x).To(BeNil()) + g.Expect(err).To(HaveOccurred()) + g.Expect(err).To(Equal(errAPIGeneric)) + }) +} + +func TestListDaemonSets(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + + ns, opts := "ns", metav1.ListOptions{} + result := &appsv1.DaemonSetList{} + + t.Run("good client success after a retry, bad client tries only once", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + gomock.InOrder( + oldCAClientMock.EXPECT().ListDaemonSets(ns, opts).Return(nil, errAPIGeneric), + oldCAClientMock.EXPECT().ListDaemonSets(ns, opts).Return(result, nil), + ) + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().ListDaemonSets(ns, opts).Return(nil, unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + x, err := sut.ListDaemonSets(ns, opts) + g.Expect(x).To(Equal(result)) + g.Expect(err).NotTo(HaveOccurred()) + }) + + t.Run("both clients fail, process times out", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + oldCAClientMock.EXPECT().ListDaemonSets(ns, opts).Return(nil, errAPIGeneric).AnyTimes() + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().ListDaemonSets(ns, opts).Return(nil, unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + x, err := sut.ListDaemonSets(ns, opts) + g.Expect(x).To(BeNil()) + g.Expect(err).To(HaveOccurred()) + g.Expect(err).To(Equal(errAPIGeneric)) + }) +} + +func TestPatchDaemonSet(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + + ns, name, json := "ns", "name", "patch" + result := &appsv1.DaemonSet{} + + t.Run("good client success after a retry, bad client tries only once", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + gomock.InOrder( + oldCAClientMock.EXPECT().PatchDaemonSet(ns, name, json).Return(nil, errAPIGeneric), + oldCAClientMock.EXPECT().PatchDaemonSet(ns, name, json).Return(result, nil), + ) + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().PatchDaemonSet(ns, name, json).Return(nil, unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + x, err := sut.PatchDaemonSet(ns, name, json) + g.Expect(x).To(Equal(result)) + g.Expect(err).NotTo(HaveOccurred()) + }) + + t.Run("good client success after a 404, bad client tries only once", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + oldCAClientMock.EXPECT().PatchDaemonSet(ns, name, json).Return(nil, errAPINotFound).AnyTimes() + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().PatchDaemonSet(ns, name, json).Return(nil, unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + x, err := sut.PatchDaemonSet(ns, name, json) + g.Expect(x).To(BeNil()) + g.Expect(err).To(HaveOccurred()) + g.Expect(err).To(Equal(errAPINotFound)) + }) + + t.Run("both clients fail, process times out", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + oldCAClientMock.EXPECT().PatchDaemonSet(ns, name, json).Return(nil, errAPIGeneric).AnyTimes() + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().PatchDaemonSet(ns, name, json).Return(nil, unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + x, err := sut.PatchDaemonSet(ns, name, json) + g.Expect(x).To(BeNil()) + g.Expect(err).To(HaveOccurred()) + g.Expect(err).To(Equal(errAPIGeneric)) + }) +} + +func TestListSecrets(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + + ns, opts := "ns", metav1.ListOptions{} + result := &v1.SecretList{} + + t.Run("good client success after a retry, bad client tries only once", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + gomock.InOrder( + oldCAClientMock.EXPECT().ListSecrets(ns, opts).Return(nil, errAPIGeneric), + oldCAClientMock.EXPECT().ListSecrets(ns, opts).Return(result, nil), + ) + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().ListSecrets(ns, opts).Return(nil, unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + x, err := sut.ListSecrets(ns, opts) + g.Expect(x).To(Equal(result)) + g.Expect(err).NotTo(HaveOccurred()) + }) + + t.Run("both clients fail, process times out", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + oldCAClientMock.EXPECT().ListSecrets(ns, opts).Return(nil, errAPIGeneric).AnyTimes() + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().ListSecrets(ns, opts).Return(nil, unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + x, err := sut.ListSecrets(ns, opts) + g.Expect(x).To(BeNil()) + g.Expect(err).To(HaveOccurred()) + g.Expect(err).To(Equal(errAPIGeneric)) + }) +} + +func TestDeleteServiceAccount(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + + t.Run("good client success after a retry, bad client tries only once", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + gomock.InOrder( + oldCAClientMock.EXPECT().DeleteServiceAccount(gomock.Any()).Return(errAPIGeneric), + oldCAClientMock.EXPECT().DeleteServiceAccount(gomock.Any()).Return(nil), + ) + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().DeleteServiceAccount(gomock.Any()).Return(unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + err := sut.DeleteServiceAccount(&v1.ServiceAccount{}) + g.Expect(err).NotTo(HaveOccurred()) + }) + + t.Run("good client success after a 404, bad client tries only once", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + oldCAClientMock.EXPECT().DeleteServiceAccount(gomock.Any()).Return(errAPINotFound).MaxTimes(1) + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().DeleteServiceAccount(gomock.Any()).Return(unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + err := sut.DeleteServiceAccount(&v1.ServiceAccount{}) + g.Expect(err).To(HaveOccurred()) + g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) + }) + + t.Run("both clients fail, process times out", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + oldCAClientMock.EXPECT().DeleteServiceAccount(gomock.Any()).Return(errAPIGeneric).AnyTimes() + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().DeleteServiceAccount(gomock.Any()).Return(unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + err := sut.DeleteServiceAccount(&v1.ServiceAccount{}) + g.Expect(err).To(HaveOccurred()) + g.Expect(err).To(Equal(errAPIGeneric)) + }) +} + +func TestDeleteSecret(t *testing.T) { + t.Parallel() + g := NewGomegaWithT(t) + + t.Run("good client success after a retry, bad client tries only once", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + gomock.InOrder( + oldCAClientMock.EXPECT().DeleteSecret(gomock.Any()).Return(errAPIGeneric), + oldCAClientMock.EXPECT().DeleteSecret(gomock.Any()).Return(nil), + ) + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().DeleteSecret(gomock.Any()).Return(unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + err := sut.DeleteSecret(&v1.Secret{}) + g.Expect(err).NotTo(HaveOccurred()) + }) + + t.Run("good client success after a 404, bad client tries only once", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + oldCAClientMock.EXPECT().DeleteSecret(gomock.Any()).Return(errAPINotFound).MaxTimes(1) + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().DeleteSecret(gomock.Any()).Return(unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + err := sut.DeleteSecret(&v1.Secret{}) + g.Expect(err).To(HaveOccurred()) + g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) + }) + + t.Run("both clients fail, process times out", func(t *testing.T) { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + + oldCAClientMock := mock.NewMockClient(mockCtrl) + oldCAClientMock.EXPECT().DeleteSecret(gomock.Any()).Return(errAPIGeneric).AnyTimes() + newCAClientMock := mock.NewMockClient(mockCtrl) + newCAClientMock.EXPECT().DeleteSecret(gomock.Any()).Return(unknownAuthorityError).MaxTimes(1) + + interval, timeout := 1*time.Second, 5*time.Second + sut := NewCompositeClient(oldCAClientMock, newCAClientMock, interval, timeout) + err := sut.DeleteSecret(&v1.Secret{}) + g.Expect(err).To(HaveOccurred()) + g.Expect(err).To(Equal(errAPIGeneric)) + }) +} diff --git a/pkg/kubernetes/internal/interfaces.go b/pkg/kubernetes/internal/interfaces.go new file mode 100644 index 0000000000..99eae428bb --- /dev/null +++ b/pkg/kubernetes/internal/interfaces.go @@ -0,0 +1,38 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +package internal + +import ( + appsv1 "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// Client interface models client for interacting with kubernetes api server +type Client interface { + // ListPodsByOptions returns Pods based on the passed in list options. + ListPodsByOptions(namespace string, opts metav1.ListOptions) (*v1.PodList, error) + // ListNodes returns a list of Nodes registered in the api server. + ListNodes() (*v1.NodeList, error) + // ListServiceAccountsByOptions returns a list of Service Accounts in the provided namespace. + ListServiceAccountsByOptions(namespace string, opts metav1.ListOptions) (*v1.ServiceAccountList, error) + // ListDeployments returns a list of deployments in the provided namespace. + ListDeployments(namespace string, opts metav1.ListOptions) (*appsv1.DeploymentList, error) + // ListDaemonSets returns a list of daemonsets in the provided namespace. + ListDaemonSets(namespace string, opts metav1.ListOptions) (*appsv1.DaemonSetList, error) + // ListSecrets returns a list of secrets in the provided namespace. + ListSecrets(namespace string, opts metav1.ListOptions) (*v1.SecretList, error) + // PatchDeployment applies a JSON patch to a deployment in the provided namespace. + PatchDeployment(namespace, name, jsonPatch string) (*appsv1.Deployment, error) + // PatchDaemonSet applies a JSON patch to a daemonset in the provided namespace. + PatchDaemonSet(namespace, name, jsonPatch string) (*appsv1.DaemonSet, error) + // GetDeployment returns a given deployment in a namespace. + GetDeployment(namespace, name string) (*appsv1.Deployment, error) + // DeletePods deletes all pods in a namespace that match the option filters. + DeletePods(namespace string, opts metav1.ListOptions) error + // DeleteSecret deletes the passed in secret. + DeleteSecret(secret *v1.Secret) error + // DeleteServiceAccount deletes the passed in service account. + DeleteServiceAccount(sa *v1.ServiceAccount) error +} diff --git a/pkg/kubernetes/internal/mock_internal/client_mock.go b/pkg/kubernetes/internal/mock_internal/client_mock.go new file mode 100644 index 0000000000..e896972da7 --- /dev/null +++ b/pkg/kubernetes/internal/mock_internal/client_mock.go @@ -0,0 +1,216 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +// Code generated by MockGen. DO NOT EDIT. +// Source: ../interfaces.go + +// Package mock_internal is a generated GoMock package. +package mock_internal + +import ( + gomock "github.com/golang/mock/gomock" + v1 "k8s.io/api/apps/v1" + v10 "k8s.io/api/core/v1" + v11 "k8s.io/apimachinery/pkg/apis/meta/v1" + reflect "reflect" +) + +// MockClient is a mock of Client interface +type MockClient struct { + ctrl *gomock.Controller + recorder *MockClientMockRecorder +} + +// MockClientMockRecorder is the mock recorder for MockClient +type MockClientMockRecorder struct { + mock *MockClient +} + +// NewMockClient creates a new mock instance +func NewMockClient(ctrl *gomock.Controller) *MockClient { + mock := &MockClient{ctrl: ctrl} + mock.recorder = &MockClientMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use +func (m *MockClient) EXPECT() *MockClientMockRecorder { + return m.recorder +} + +// ListPodsByOptions mocks base method +func (m *MockClient) ListPodsByOptions(namespace string, opts v11.ListOptions) (*v10.PodList, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ListPodsByOptions", namespace, opts) + ret0, _ := ret[0].(*v10.PodList) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// ListPodsByOptions indicates an expected call of ListPodsByOptions +func (mr *MockClientMockRecorder) ListPodsByOptions(namespace, opts interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListPodsByOptions", reflect.TypeOf((*MockClient)(nil).ListPodsByOptions), namespace, opts) +} + +// ListNodes mocks base method +func (m *MockClient) ListNodes() (*v10.NodeList, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ListNodes") + ret0, _ := ret[0].(*v10.NodeList) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// ListNodes indicates an expected call of ListNodes +func (mr *MockClientMockRecorder) ListNodes() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListNodes", reflect.TypeOf((*MockClient)(nil).ListNodes)) +} + +// ListServiceAccountsByOptions mocks base method +func (m *MockClient) ListServiceAccountsByOptions(namespace string, opts v11.ListOptions) (*v10.ServiceAccountList, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ListServiceAccountsByOptions", namespace, opts) + ret0, _ := ret[0].(*v10.ServiceAccountList) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// ListServiceAccountsByOptions indicates an expected call of ListServiceAccountsByOptions +func (mr *MockClientMockRecorder) ListServiceAccountsByOptions(namespace, opts interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListServiceAccountsByOptions", reflect.TypeOf((*MockClient)(nil).ListServiceAccountsByOptions), namespace, opts) +} + +// ListDeployments mocks base method +func (m *MockClient) ListDeployments(namespace string, opts v11.ListOptions) (*v1.DeploymentList, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ListDeployments", namespace, opts) + ret0, _ := ret[0].(*v1.DeploymentList) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// ListDeployments indicates an expected call of ListDeployments +func (mr *MockClientMockRecorder) ListDeployments(namespace, opts interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListDeployments", reflect.TypeOf((*MockClient)(nil).ListDeployments), namespace, opts) +} + +// ListDaemonSets mocks base method +func (m *MockClient) ListDaemonSets(namespace string, opts v11.ListOptions) (*v1.DaemonSetList, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ListDaemonSets", namespace, opts) + ret0, _ := ret[0].(*v1.DaemonSetList) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// ListDaemonSets indicates an expected call of ListDaemonSets +func (mr *MockClientMockRecorder) ListDaemonSets(namespace, opts interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListDaemonSets", reflect.TypeOf((*MockClient)(nil).ListDaemonSets), namespace, opts) +} + +// ListSecrets mocks base method +func (m *MockClient) ListSecrets(namespace string, opts v11.ListOptions) (*v10.SecretList, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ListSecrets", namespace, opts) + ret0, _ := ret[0].(*v10.SecretList) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// ListSecrets indicates an expected call of ListSecrets +func (mr *MockClientMockRecorder) ListSecrets(namespace, opts interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListSecrets", reflect.TypeOf((*MockClient)(nil).ListSecrets), namespace, opts) +} + +// PatchDeployment mocks base method +func (m *MockClient) PatchDeployment(namespace, name, jsonPatch string) (*v1.Deployment, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "PatchDeployment", namespace, name, jsonPatch) + ret0, _ := ret[0].(*v1.Deployment) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// PatchDeployment indicates an expected call of PatchDeployment +func (mr *MockClientMockRecorder) PatchDeployment(namespace, name, jsonPatch interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "PatchDeployment", reflect.TypeOf((*MockClient)(nil).PatchDeployment), namespace, name, jsonPatch) +} + +// PatchDaemonSet mocks base method +func (m *MockClient) PatchDaemonSet(namespace, name, jsonPatch string) (*v1.DaemonSet, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "PatchDaemonSet", namespace, name, jsonPatch) + ret0, _ := ret[0].(*v1.DaemonSet) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// PatchDaemonSet indicates an expected call of PatchDaemonSet +func (mr *MockClientMockRecorder) PatchDaemonSet(namespace, name, jsonPatch interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "PatchDaemonSet", reflect.TypeOf((*MockClient)(nil).PatchDaemonSet), namespace, name, jsonPatch) +} + +// GetDeployment mocks base method +func (m *MockClient) GetDeployment(namespace, name string) (*v1.Deployment, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetDeployment", namespace, name) + ret0, _ := ret[0].(*v1.Deployment) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetDeployment indicates an expected call of GetDeployment +func (mr *MockClientMockRecorder) GetDeployment(namespace, name interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetDeployment", reflect.TypeOf((*MockClient)(nil).GetDeployment), namespace, name) +} + +// DeletePods mocks base method +func (m *MockClient) DeletePods(namespace string, opts v11.ListOptions) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DeletePods", namespace, opts) + ret0, _ := ret[0].(error) + return ret0 +} + +// DeletePods indicates an expected call of DeletePods +func (mr *MockClientMockRecorder) DeletePods(namespace, opts interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeletePods", reflect.TypeOf((*MockClient)(nil).DeletePods), namespace, opts) +} + +// DeleteSecret mocks base method +func (m *MockClient) DeleteSecret(secret *v10.Secret) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DeleteSecret", secret) + ret0, _ := ret[0].(error) + return ret0 +} + +// DeleteSecret indicates an expected call of DeleteSecret +func (mr *MockClientMockRecorder) DeleteSecret(secret interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteSecret", reflect.TypeOf((*MockClient)(nil).DeleteSecret), secret) +} + +// DeleteServiceAccount mocks base method +func (m *MockClient) DeleteServiceAccount(sa *v10.ServiceAccount) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DeleteServiceAccount", sa) + ret0, _ := ret[0].(error) + return ret0 +} + +// DeleteServiceAccount indicates an expected call of DeleteServiceAccount +func (mr *MockClientMockRecorder) DeleteServiceAccount(sa interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteServiceAccount", reflect.TypeOf((*MockClient)(nil).DeleteServiceAccount), sa) +} diff --git a/pkg/kubernetes/internal/mock_internal/doc.go b/pkg/kubernetes/internal/mock_internal/doc.go new file mode 100644 index 0000000000..efaec5352d --- /dev/null +++ b/pkg/kubernetes/internal/mock_internal/doc.go @@ -0,0 +1,7 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT license. + +//go:generate mockgen -destination client_mock.go --package mock_internal --source ../interfaces.go Client +//go:generate /usr/bin/env bash -c "cat ../../../../scripts/copyright.txt client_mock.go > _client_mock.go && mv _client_mock.go client_mock.go" + +package mock_internal //nolint diff --git a/test/e2e/cluster.sh b/test/e2e/cluster.sh index f95c688b04..601df38bee 100755 --- a/test/e2e/cluster.sh +++ b/test/e2e/cluster.sh @@ -73,16 +73,42 @@ function rotateCertificates { ${DEV_IMAGE} \ ./bin/aks-engine rotate-certs \ --api-model _output/${RESOURCE_GROUP}/apimodel.json \ - --apiserver ${API_SERVER} \ - --azure-env ${AZURE_ENV} \ - --client-id ${AZURE_CLIENT_ID} \ - --client-secret ${AZURE_CLIENT_SECRET} \ - --debug \ - --identity-system ${IDENTITY_SYSTEM} \ + --ssh-host ${API_SERVER} \ --location ${REGION} \ + --linux-ssh-private-key _output/${RESOURCE_GROUP}-ssh \ --resource-group ${RESOURCE_GROUP} \ - --ssh _output/${RESOURCE_GROUP}-ssh \ - --subscription-id ${AZURE_SUBSCRIPTION_ID} || exit 1 + --client-id ${AZURE_CLIENT_ID} \ + --client-secret ${AZURE_CLIENT_SECRET} \ + --subscription-id ${AZURE_SUBSCRIPTION_ID} \ + --debug + + # Retry if it fails the first time (validate --certificate-profile instead of regenerating a new set of certs) + exit_code=$? + if [ $exit_code -ne 0 ]; then + local CERTS_PATH=_output/${RESOURCE_GROUP}/certificateProfile.json + jq '.properties.certificateProfile' _output/${RESOURCE_GROUP}/_rotate_certs_output/apimodel.json > ${CERTS_PATH} + + docker run --rm \ + -v $(pwd):${WORK_DIR} \ + -v /etc/ssl/certs:/etc/ssl/certs \ + -w ${WORK_DIR} \ + -e REGION=${REGION} \ + -e RESOURCE_GROUP=${RESOURCE_GROUP} \ + ${DEV_IMAGE} \ + ./bin/aks-engine rotate-certs \ + --api-model _output/${RESOURCE_GROUP}/apimodel.json \ + --ssh-host ${API_SERVER} \ + --location ${REGION} \ + --linux-ssh-private-key _output/${RESOURCE_GROUP}-ssh \ + --resource-group ${RESOURCE_GROUP} \ + --client-id ${AZURE_CLIENT_ID} \ + --client-secret ${AZURE_CLIENT_SECRET} \ + --subscription-id ${AZURE_SUBSCRIPTION_ID} \ + --certificate-profile ${CERTS_PATH} --force \ + --debug + + exit $? + fi } echo "Running E2E tests against a cluster built with the following API model:" @@ -224,10 +250,6 @@ if [ "${UPGRADE_CLUSTER}" = "true" ] || [ "${SCALE_CLUSTER}" = "true" ] || [ -n REGION=$(ls -dt1 _output/* | head -n 1 | cut -d/ -f2 | cut -d- -f2) API_SERVER=${RESOURCE_GROUP}.${REGION}.${RESOURCE_MANAGER_VM_DNS_SUFFIX:-cloudapp.azure.com} - if [ "${ROTATE_CERTS}" = "true" ]; then - rotateCertificates - fi - if [ "${GET_CLUSTER_LOGS}" = "true" ]; then docker run --rm \ -v $(pwd):${WORK_DIR} \ @@ -272,6 +294,66 @@ else exit 0 fi +if [ "${ROTATE_CERTS}" = "true" ]; then + rotateCertificates + + SKIP_AFTER_ROTATE_CERTS="should be able to autoscale" + SKIP_AFTER_SCALE_DOWN="${SKIP_AFTER_SCALE_DOWN}|should be able to autoscale" + SKIP_AFTER_SCALE_UP="${SKIP_AFTER_SCALE_DOWN}|should be able to autoscale" + + docker run --rm \ + -v $(pwd):${WORK_DIR} \ + -v /etc/ssl/certs:/etc/ssl/certs \ + -w ${WORK_DIR} \ + -e CLIENT_ID=${AZURE_CLIENT_ID} \ + -e CLIENT_SECRET=${AZURE_CLIENT_SECRET} \ + -e CLIENT_OBJECTID=${CLIENT_OBJECTID} \ + -e TENANT_ID=${AZURE_TENANT_ID} \ + -e SUBSCRIPTION_ID=${AZURE_SUBSCRIPTION_ID} \ + -e INFRA_RESOURCE_GROUP="${INFRA_RESOURCE_GROUP}" \ + -e ORCHESTRATOR=kubernetes \ + -e NAME=$RESOURCE_GROUP \ + -e TIMEOUT=${E2E_TEST_TIMEOUT} \ + -e LB_TIMEOUT=${LB_TEST_TIMEOUT} \ + -e KUBERNETES_IMAGE_BASE=$KUBERNETES_IMAGE_BASE \ + -e KUBERNETES_IMAGE_BASE_TYPE=$KUBERNETES_IMAGE_BASE_TYPE \ + -e CLEANUP_ON_EXIT=false \ + -e REGIONS=$REGION \ + -e IS_JENKINS=${IS_JENKINS} \ + -e SKIP_LOGS_COLLECTION=true \ + -e GINKGO_FAIL_FAST="${GINKGO_FAIL_FAST}" \ + -e GINKGO_SKIP="${SKIP_AFTER_ROTATE_CERTS}" \ + -e GINKGO_FOCUS="${GINKGO_FOCUS}" \ + -e TEST_PVC="${TEST_PVC}" \ + -e SKIP_TEST=false \ + -e ADD_NODE_POOL_INPUT=${ADD_NODE_POOL_INPUT} \ + -e API_PROFILE="${API_PROFILE}" \ + -e CUSTOM_CLOUD_NAME="${ENVIRONMENT_NAME}" \ + -e IDENTITY_SYSTEM="${IDENTITY_SYSTEM}" \ + -e AUTHENTICATION_METHOD="${AUTHENTICATION_METHOD}" \ + -e LOCATION="${LOCATION}" \ + -e CUSTOM_CLOUD_CLIENT_ID="${CUSTOM_CLOUD_CLIENT_ID}" \ + -e CUSTOM_CLOUD_SECRET="${CUSTOM_CLOUD_SECRET}" \ + -e PORTAL_ENDPOINT="${PORTAL_ENDPOINT}" \ + -e SERVICE_MANAGEMENT_ENDPOINT="${SERVICE_MANAGEMENT_ENDPOINT}" \ + -e RESOURCE_MANAGER_ENDPOINT="${RESOURCE_MANAGER_ENDPOINT}" \ + -e STORAGE_ENDPOINT_SUFFIX="${STORAGE_ENDPOINT_SUFFIX}" \ + -e KEY_VAULT_DNS_SUFFIX="${KEY_VAULT_DNS_SUFFIX}" \ + -e ACTIVE_DIRECTORY_ENDPOINT="${ACTIVE_DIRECTORY_ENDPOINT}" \ + -e GALLERY_ENDPOINT="${GALLERY_ENDPOINT}" \ + -e GRAPH_ENDPOINT="${GRAPH_ENDPOINT}" \ + -e SERVICE_MANAGEMENT_VM_DNS_SUFFIX="${SERVICE_MANAGEMENT_VM_DNS_SUFFIX}" \ + -e RESOURCE_MANAGER_VM_DNS_SUFFIX="${RESOURCE_MANAGER_VM_DNS_SUFFIX}" \ + -e STABILITY_ITERATIONS=${STABILITY_ITERATIONS} \ + -e STABILITY_TIMEOUT_SECONDS=${STABILITY_TIMEOUT_SECONDS} \ + -e ARC_CLIENT_ID=${ARC_CLIENT_ID:-$AZURE_CLIENT_ID} \ + -e ARC_CLIENT_SECRET=${ARC_CLIENT_SECRET:-$AZURE_CLIENT_SECRET} \ + -e ARC_SUBSCRIPTION_ID=${ARC_SUBSCRIPTION_ID:-$AZURE_SUBSCRIPTION_ID} \ + -e ARC_LOCATION=${ARC_LOCATION:-$LOCATION} \ + -e ARC_TENANT_ID=${ARC_TENANT_ID:-$AZURE_TENANT_ID} \ + ${DEV_IMAGE} make test-kubernetes || tryExit && renameResultsFile "rotate-certs" +fi + if [ -n "$ADD_NODE_POOL_INPUT" ]; then for pool in $(echo ${ADD_NODE_POOL_INPUT} | jq -c '.[]'); do echo $pool > ${TMP_DIR}/addpool-input.json @@ -299,10 +381,6 @@ if [ -n "$ADD_NODE_POOL_INPUT" ]; then CLEANUP_AFTER_ADD_NODE_POOL="false" fi - if [ "${ROTATE_CERTS}" = "true" ]; then - rotateCertificates - fi - docker run --rm \ -v $(pwd):${WORK_DIR} \ -v /etc/ssl/certs:/etc/ssl/certs \ @@ -417,10 +495,6 @@ if [ "${SCALE_CLUSTER}" = "true" ]; then --client-secret ${AZURE_CLIENT_SECRET} || exit 1 done - if [ "${ROTATE_CERTS}" = "true" ]; then - rotateCertificates - fi - docker run --rm \ -v $(pwd):${WORK_DIR} \ -v /etc/ssl/certs:/etc/ssl/certs \ @@ -510,10 +584,6 @@ if [ "${UPGRADE_CLUSTER}" = "true" ]; then --client-id ${AZURE_CLIENT_ID} \ --client-secret ${AZURE_CLIENT_SECRET} || exit 1 - if [ "${ROTATE_CERTS}" = "true" ]; then - rotateCertificates - fi - docker run --rm \ -v $(pwd):${WORK_DIR} \ -v /etc/ssl/certs:/etc/ssl/certs \ @@ -592,10 +662,6 @@ if [ "${SCALE_CLUSTER}" = "true" ]; then --client-secret ${AZURE_CLIENT_SECRET} || exit 1 done - if [ "${ROTATE_CERTS}" = "true" ]; then - rotateCertificates - fi - docker run --rm \ -v $(pwd):${WORK_DIR} \ -v /etc/ssl/certs:/etc/ssl/certs \ From 8f305b72e394b72103ba967087017806df65186b Mon Sep 17 00:00:00 2001 From: jadarsie Date: Mon, 1 Feb 2021 14:31:07 -0800 Subject: [PATCH 2/2] cluster.sh tweak --- test/e2e/cluster.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/test/e2e/cluster.sh b/test/e2e/cluster.sh index 601df38bee..efc8f7c3a2 100755 --- a/test/e2e/cluster.sh +++ b/test/e2e/cluster.sh @@ -85,8 +85,12 @@ function rotateCertificates { # Retry if it fails the first time (validate --certificate-profile instead of regenerating a new set of certs) exit_code=$? if [ $exit_code -ne 0 ]; then - local CERTS_PATH=_output/${RESOURCE_GROUP}/certificateProfile.json - jq '.properties.certificateProfile' _output/${RESOURCE_GROUP}/_rotate_certs_output/apimodel.json > ${CERTS_PATH} + docker run --rm \ + -v $(pwd):${WORK_DIR} \ + -w ${WORK_DIR} \ + -e RESOURCE_GROUP=$RESOURCE_GROUP \ + ${DEV_IMAGE} \ + /bin/bash -c "jq '.properties.certificateProfile' _output/${RESOURCE_GROUP}/_rotate_certs_output/apimodel.json > _output/${RESOURCE_GROUP}/certificateProfile.json" || exit 1 docker run --rm \ -v $(pwd):${WORK_DIR} \ @@ -104,7 +108,7 @@ function rotateCertificates { --client-id ${AZURE_CLIENT_ID} \ --client-secret ${AZURE_CLIENT_SECRET} \ --subscription-id ${AZURE_SUBSCRIPTION_ID} \ - --certificate-profile ${CERTS_PATH} --force \ + --certificate-profile _output/${RESOURCE_GROUP}/certificateProfile.json --force \ --debug exit $?