Skip to content

Commit

Permalink
Merge pull request kubernetes#16798 from justinsb/metal_test_4
Browse files Browse the repository at this point in the history
metal: support `kops toolbox enroll` on a control-plane machine
  • Loading branch information
k8s-ci-robot committed Aug 31, 2024
2 parents 1901437 + a65bb16 commit 6262087
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 51 deletions.
170 changes: 124 additions & 46 deletions pkg/commands/toolbox_enroll.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"net"
"os"
"path"
"path/filepath"
"sort"
"strconv"
"strings"
Expand All @@ -40,6 +41,7 @@ import (
"k8s.io/client-go/rest"
"k8s.io/klog/v2"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/yaml"

"k8s.io/kops/pkg/apis/kops"
"k8s.io/kops/pkg/apis/kops/v1alpha2"
Expand All @@ -50,11 +52,9 @@ import (
"k8s.io/kops/pkg/model"
"k8s.io/kops/pkg/model/resources"
"k8s.io/kops/pkg/nodemodel"
"k8s.io/kops/pkg/nodemodel/wellknownassets"
"k8s.io/kops/pkg/wellknownservices"
"k8s.io/kops/upup/pkg/fi"
"k8s.io/kops/upup/pkg/fi/cloudup"
"k8s.io/kops/util/pkg/architectures"
"k8s.io/kops/util/pkg/vfs"
)

Expand Down Expand Up @@ -92,12 +92,16 @@ func RunToolboxEnroll(ctx context.Context, f commandutils.Factory, out io.Writer
if err != nil {
return err
}

if cluster == nil {
return fmt.Errorf("cluster not found %q", options.ClusterName)
}

ig, err := clientset.InstanceGroupsFor(cluster).Get(ctx, options.InstanceGroup, metav1.GetOptions{})
channel, err := cloudup.ChannelForCluster(clientset.VFSContext(), cluster)
if err != nil {
return fmt.Errorf("getting channel for cluster %q: %w", options.ClusterName, err)
}

instanceGroupList, err := clientset.InstanceGroupsFor(cluster).List(ctx, metav1.ListOptions{})
if err != nil {
return err
}
Expand All @@ -107,10 +111,62 @@ func RunToolboxEnroll(ctx context.Context, f commandutils.Factory, out io.Writer
return err
}

wellKnownAddresses := make(model.WellKnownAddresses)
// The assetBuilder is used primarily to remap images.
var assetBuilder *assets.AssetBuilder
{
// ApplyClusterCmd is get the assets.
// We use DryRun and GetAssets to do this without applying any changes.
apply := &cloudup.ApplyClusterCmd{
Cloud: cloud,
Cluster: cluster,
Clientset: clientset,
DryRun: true,
GetAssets: true,
TargetName: cloudup.TargetDryRun,
}
applyResults, err := apply.Run(ctx)
if err != nil {
return fmt.Errorf("error during apply: %w", err)
}
assetBuilder = applyResults.AssetBuilder
}

// Populate the full cluster and instanceGroup specs.
var fullInstanceGroup *kops.InstanceGroup
var fullCluster *kops.Cluster
{
var instanceGroups []*kops.InstanceGroup
for i := range instanceGroupList.Items {
instanceGroup := &instanceGroupList.Items[i]
instanceGroups = append(instanceGroups, instanceGroup)
}

populatedCluster, err := cloudup.PopulateClusterSpec(ctx, clientset, cluster, instanceGroups, cloud, assetBuilder)
if err != nil {
return fmt.Errorf("building full cluster spec: %w", err)
}
fullCluster = populatedCluster

// Build full IG spec to ensure we end up with a valid IG
for _, ig := range instanceGroups {
if ig.Name != options.InstanceGroup {
continue
}
populated, err := cloudup.PopulateInstanceGroupSpec(fullCluster, ig, cloud, channel)
if err != nil {
return err
}
fullInstanceGroup = populated
}
}
if fullInstanceGroup == nil {
return fmt.Errorf("instance group %q not found", options.InstanceGroup)
}

// Determine the well-known addresses for the cluster.
wellKnownAddresses := make(model.WellKnownAddresses)
{
ingresses, err := cloud.GetApiIngressStatus(cluster)
ingresses, err := cloud.GetApiIngressStatus(fullCluster)
if err != nil {
return fmt.Errorf("error getting ingress status: %v", err)
}
Expand All @@ -125,24 +181,24 @@ func RunToolboxEnroll(ctx context.Context, f commandutils.Factory, out io.Writer
}
}
}

if len(wellKnownAddresses[wellknownservices.KubeAPIServer]) == 0 {
// TODO: Should we support DNS?
return fmt.Errorf("unable to determine IP address for kube-apiserver")
}

for k := range wellKnownAddresses {
sort.Strings(wellKnownAddresses[k])
}

scriptBytes, err := buildBootstrapData(ctx, clientset, cluster, ig, wellKnownAddresses)
// Build the bootstrap data for this node.
bootstrapData, err := buildBootstrapData(ctx, clientset, fullCluster, fullInstanceGroup, wellKnownAddresses)
if err != nil {
return err
return fmt.Errorf("building bootstrap data: %w", err)
}

// Enroll the node over SSH.
if options.Host != "" {
// TODO: This is the pattern we use a lot, but should we try to access it directly?
contextName := cluster.ObjectMeta.Name
contextName := fullCluster.ObjectMeta.Name
clientGetter := genericclioptions.NewConfigFlags(true)
clientGetter.Context = &contextName

Expand All @@ -151,14 +207,15 @@ func RunToolboxEnroll(ctx context.Context, f commandutils.Factory, out io.Writer
return fmt.Errorf("cannot load kubecfg settings for %q: %w", contextName, err)
}

if err := enrollHost(ctx, options, string(scriptBytes), restConfig); err != nil {
if err := enrollHost(ctx, fullInstanceGroup, options, bootstrapData, restConfig); err != nil {
return err
}
}

return nil
}

func enrollHost(ctx context.Context, options *ToolboxEnrollOptions, nodeupScript string, restConfig *rest.Config) error {
func enrollHost(ctx context.Context, ig *kops.InstanceGroup, options *ToolboxEnrollOptions, bootstrapData *bootstrapData, restConfig *rest.Config) error {
scheme := runtime.NewScheme()
if err := v1alpha2.AddToScheme(scheme); err != nil {
return fmt.Errorf("building kubernetes scheme: %w", err)
Expand Down Expand Up @@ -211,19 +268,29 @@ func enrollHost(ctx context.Context, options *ToolboxEnrollOptions, nodeupScript
return err
}

if err := createHost(ctx, options, hostname, publicKeyBytes, kubeClient); err != nil {
return err
// We can't create the host resource in the API server for control-plane nodes,
// because the API server (likely) isn't running yet.
if !ig.IsControlPlane() {
if err := createHostResourceInAPIServer(ctx, options, hostname, publicKeyBytes, kubeClient); err != nil {
return err
}
}

for k, v := range bootstrapData.configFiles {
if err := host.writeFile(ctx, k, bytes.NewReader(v)); err != nil {
return fmt.Errorf("writing file %q over SSH: %w", k, err)
}
}

if len(nodeupScript) != 0 {
if _, err := host.runScript(ctx, nodeupScript, ExecOptions{Sudo: sudo, Echo: true}); err != nil {
if len(bootstrapData.nodeupScript) != 0 {
if _, err := host.runScript(ctx, string(bootstrapData.nodeupScript), ExecOptions{Sudo: sudo, Echo: true}); err != nil {
return err
}
}
return nil
}

func createHost(ctx context.Context, options *ToolboxEnrollOptions, nodeName string, publicKey []byte, client client.Client) error {
func createHostResourceInAPIServer(ctx context.Context, options *ToolboxEnrollOptions, nodeName string, publicKey []byte, client client.Client) error {
host := &v1alpha2.Host{}
host.Namespace = "kops-system"
host.Name = nodeName
Expand Down Expand Up @@ -317,6 +384,11 @@ func (s *SSHHost) readFile(ctx context.Context, path string) ([]byte, error) {
return p.ReadFile(ctx)
}

func (s *SSHHost) writeFile(ctx context.Context, path string, data io.ReadSeeker) error {
p := vfs.NewSSHPath(s.sshClient, s.hostname, path, s.sudo)
return p.WriteFile(ctx, data, nil)
}

func (s *SSHHost) runScript(ctx context.Context, script string, options ExecOptions) (*CommandOutput, error) {
var tempDir string
{
Expand Down Expand Up @@ -398,10 +470,14 @@ func (s *SSHHost) getHostname(ctx context.Context) (string, error) {
return hostname, nil
}

func buildBootstrapData(ctx context.Context, clientset simple.Clientset, cluster *kops.Cluster, ig *kops.InstanceGroup, wellknownAddresses model.WellKnownAddresses) ([]byte, error) {
if cluster.Spec.KubeAPIServer == nil {
cluster.Spec.KubeAPIServer = &kops.KubeAPIServerConfig{}
}
type bootstrapData struct {
nodeupScript []byte
configFiles map[string][]byte
}

func buildBootstrapData(ctx context.Context, clientset simple.Clientset, cluster *kops.Cluster, ig *kops.InstanceGroup, wellknownAddresses model.WellKnownAddresses) (*bootstrapData, error) {
bootstrapData := &bootstrapData{}
bootstrapData.configFiles = make(map[string][]byte)

getAssets := false
assetBuilder := assets.NewAssetBuilder(clientset.VFSContext(), cluster.Spec.Assets, cluster.Spec.KubernetesVersion, getAssets)
Expand All @@ -423,17 +499,12 @@ func buildBootstrapData(ctx context.Context, clientset simple.Clientset, cluster
// encryptionConfigSecretHash = base64.URLEncoding.EncodeToString(hashBytes[:])
// }

nodeUpAssets := make(map[architectures.Architecture]*assets.MirroredAsset)
for _, arch := range architectures.GetSupported() {
asset, err := wellknownassets.NodeUpAsset(assetBuilder, arch)
if err != nil {
return nil, err
}
nodeUpAssets[arch] = asset
fileAssets := &nodemodel.FileAssets{Cluster: cluster}
if err := fileAssets.AddFileAssets(assetBuilder); err != nil {
return nil, err
}

assets := make(map[architectures.Architecture][]*assets.MirroredAsset)
configBuilder, err := nodemodel.NewNodeUpConfigBuilder(cluster, assetBuilder, assets, encryptionConfigSecretHash)
configBuilder, err := nodemodel.NewNodeUpConfigBuilder(cluster, assetBuilder, fileAssets.Assets, encryptionConfigSecretHash)
if err != nil {
return nil, err
}
Expand All @@ -445,7 +516,8 @@ func buildBootstrapData(ctx context.Context, clientset simple.Clientset, cluster
return nil, err
}

for _, keyName := range []string{"kubernetes-ca"} {
keyNames := model.KeypairNamesForInstanceGroup(cluster, ig)
for _, keyName := range keyNames {
keyset, err := keystore.FindKeyset(ctx, keyName)
if err != nil {
return nil, fmt.Errorf("getting keyset %q: %w", keyName, err)
Expand All @@ -458,23 +530,13 @@ func buildBootstrapData(ctx context.Context, clientset simple.Clientset, cluster
keysets[keyName] = keyset
}

_, bootConfig, err := configBuilder.BuildConfig(ig, wellknownAddresses, keysets)
nodeupConfig, bootConfig, err := configBuilder.BuildConfig(ig, wellknownAddresses, keysets)
if err != nil {
return nil, err
}

bootConfig.CloudProvider = "metal"

// TODO: Should we / can we specify the node config hash?
// configData, err := utils.YamlMarshal(config)
// if err != nil {
// return nil, fmt.Errorf("error converting nodeup config to yaml: %v", err)
// }
// sum256 := sha256.Sum256(configData)
// bootConfig.NodeupConfigHash = base64.StdEncoding.EncodeToString(sum256[:])

var nodeupScript resources.NodeUpScript
nodeupScript.NodeUpAssets = nodeUpAssets
nodeupScript.NodeUpAssets = fileAssets.NodeUpAssets
nodeupScript.BootConfig = bootConfig

nodeupScript.WithEnvironmentVariables(cluster, ig)
Expand All @@ -483,15 +545,31 @@ func buildBootstrapData(ctx context.Context, clientset simple.Clientset, cluster

nodeupScript.CloudProvider = string(cluster.GetCloudProvider())

bootConfig.ConfigBase = fi.PtrTo("file:///etc/kubernetes/kops/config")

nodeupScriptResource, err := nodeupScript.Build()
if err != nil {
return nil, err
}

b, err := fi.ResourceAsBytes(nodeupScriptResource)
if bootConfig.InstanceGroupRole == kops.InstanceGroupRoleControlPlane {
nodeupConfigBytes, err := yaml.Marshal(nodeupConfig)
if err != nil {
return nil, fmt.Errorf("error converting nodeup config to yaml: %w", err)
}
// Not much reason to hash this, since we're reading it from the local file system
// sum256 := sha256.Sum256(nodeupConfigBytes)
// bootConfig.NodeupConfigHash = base64.StdEncoding.EncodeToString(sum256[:])

p := filepath.Join("/etc/kubernetes/kops/config", "igconfig", bootConfig.InstanceGroupRole.ToLowerString(), ig.Name, "nodeupconfig.yaml")
bootstrapData.configFiles[p] = nodeupConfigBytes
}

nodeupScriptBytes, err := fi.ResourceAsBytes(nodeupScriptResource)
if err != nil {
return nil, err
}
bootstrapData.nodeupScript = nodeupScriptBytes

return b, nil
return bootstrapData, nil
}
18 changes: 13 additions & 5 deletions pkg/model/bootstrapscript.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,9 @@ func (b *BootstrapScript) kubeEnv(ig *kops.InstanceGroup, c *fi.CloudupContext)
return bootConfig, nil
}

// ResourceNodeUp generates and returns a nodeup (bootstrap) script from a
// template file, substituting in specific env vars & cluster spec configuration
func (b *BootstrapScriptBuilder) ResourceNodeUp(c *fi.CloudupModelBuilderContext, ig *kops.InstanceGroup) (fi.Resource, error) {
func KeypairNamesForInstanceGroup(cluster *kops.Cluster, ig *kops.InstanceGroup) []string {
keypairs := []string{"kubernetes-ca", "etcd-clients-ca"}
for _, etcdCluster := range b.Cluster.Spec.EtcdClusters {
for _, etcdCluster := range cluster.Spec.EtcdClusters {
k := etcdCluster.Name
keypairs = append(keypairs, "etcd-manager-ca-"+k, "etcd-peers-ca-"+k)
if k != "events" && k != "main" {
Expand All @@ -142,15 +140,25 @@ func (b *BootstrapScriptBuilder) ResourceNodeUp(c *fi.CloudupModelBuilderContext

if ig.IsBastion() {
keypairs = nil
}

return keypairs
}

// ResourceNodeUp generates and returns a nodeup (bootstrap) script from a
// template file, substituting in specific env vars & cluster spec configuration
func (b *BootstrapScriptBuilder) ResourceNodeUp(c *fi.CloudupModelBuilderContext, ig *kops.InstanceGroup) (fi.Resource, error) {
keypairNames := KeypairNamesForInstanceGroup(b.Cluster, ig)

if ig.IsBastion() {
// Bastions can have AdditionalUserData, but if there isn't any skip this part
if len(ig.Spec.AdditionalUserData) == 0 {
return nil, nil
}
}

caTasks := map[string]*fitasks.Keypair{}
for _, keypair := range keypairs {
for _, keypair := range keypairNames {
caTaskObject, found := c.Tasks["Keypair/"+keypair]
if !found {
return nil, fmt.Errorf("keypair/%s task not found", keypair)
Expand Down
10 changes: 10 additions & 0 deletions tests/e2e/scenarios/bare-metal/run-test
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 r

cd ${REPO_ROOT}

# Enable feature flag for bare metal
export KOPS_FEATURE_FLAGS=Metal

# Set up the AWS credentials
export AWS_SECRET_ACCESS_KEY=secret
export AWS_ACCESS_KEY_ID=accesskey
Expand Down Expand Up @@ -90,4 +93,11 @@ ${KOPS} get ig --name metal.k8s.local -oyaml
${KOPS} update cluster metal.k8s.local
${KOPS} update cluster metal.k8s.local --yes --admin

# Start an SSH agent; enroll assumes SSH connectivity to the VMs with the key in the agent
eval $(ssh-agent)
ssh-add ${REPO_ROOT}/.build/.ssh/id_ed25519

# Enroll the control-plane VM
${KOPS} toolbox enroll --cluster metal.k8s.local --instance-group control-plane-main --host 10.123.45.10 --v=8

echo "Test successful"
3 changes: 3 additions & 0 deletions upup/pkg/fi/cloudup/new_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,9 @@ func NewCluster(opt *NewClusterOptions, clientset simple.Clientset) (*NewCluster
case api.CloudProviderScaleway:
cluster.Spec.CloudProvider.Scaleway = &api.ScalewaySpec{}
case api.CloudProviderMetal:
if !featureflag.Metal.Enabled() {
return nil, fmt.Errorf("bare-metal support requires the Metal feature flag to be enabled")
}
if cluster.Labels == nil {
cluster.Labels = make(map[string]string)
}
Expand Down

0 comments on commit 6262087

Please sign in to comment.