Skip to content

Commit

Permalink
metal: support kops toolbox enroll on a control-plane machine
Browse files Browse the repository at this point in the history
In particular, we want to build the full cluster and instance group.

The control plane does not yet start, because etcd is not configured correctly.
  • Loading branch information
justinsb committed Aug 31, 2024
1 parent 1901437 commit a65bb16
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 51 deletions.
170 changes: 124 additions & 46 deletions pkg/commands/toolbox_enroll.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"net"
"os"
"path"
"path/filepath"
"sort"
"strconv"
"strings"
Expand All @@ -40,6 +41,7 @@ import (
"k8s.io/client-go/rest"
"k8s.io/klog/v2"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/yaml"

"k8s.io/kops/pkg/apis/kops"
"k8s.io/kops/pkg/apis/kops/v1alpha2"
Expand All @@ -50,11 +52,9 @@ import (
"k8s.io/kops/pkg/model"
"k8s.io/kops/pkg/model/resources"
"k8s.io/kops/pkg/nodemodel"
"k8s.io/kops/pkg/nodemodel/wellknownassets"
"k8s.io/kops/pkg/wellknownservices"
"k8s.io/kops/upup/pkg/fi"
"k8s.io/kops/upup/pkg/fi/cloudup"
"k8s.io/kops/util/pkg/architectures"
"k8s.io/kops/util/pkg/vfs"
)

Expand Down Expand Up @@ -92,12 +92,16 @@ func RunToolboxEnroll(ctx context.Context, f commandutils.Factory, out io.Writer
if err != nil {
return err
}

if cluster == nil {
return fmt.Errorf("cluster not found %q", options.ClusterName)
}

ig, err := clientset.InstanceGroupsFor(cluster).Get(ctx, options.InstanceGroup, metav1.GetOptions{})
channel, err := cloudup.ChannelForCluster(clientset.VFSContext(), cluster)
if err != nil {
return fmt.Errorf("getting channel for cluster %q: %w", options.ClusterName, err)
}

instanceGroupList, err := clientset.InstanceGroupsFor(cluster).List(ctx, metav1.ListOptions{})
if err != nil {
return err
}
Expand All @@ -107,10 +111,62 @@ func RunToolboxEnroll(ctx context.Context, f commandutils.Factory, out io.Writer
return err
}

wellKnownAddresses := make(model.WellKnownAddresses)
// The assetBuilder is used primarily to remap images.
var assetBuilder *assets.AssetBuilder
{
// ApplyClusterCmd is get the assets.
// We use DryRun and GetAssets to do this without applying any changes.
apply := &cloudup.ApplyClusterCmd{
Cloud: cloud,
Cluster: cluster,
Clientset: clientset,
DryRun: true,
GetAssets: true,
TargetName: cloudup.TargetDryRun,
}
applyResults, err := apply.Run(ctx)
if err != nil {
return fmt.Errorf("error during apply: %w", err)
}
assetBuilder = applyResults.AssetBuilder
}

// Populate the full cluster and instanceGroup specs.
var fullInstanceGroup *kops.InstanceGroup
var fullCluster *kops.Cluster
{
var instanceGroups []*kops.InstanceGroup
for i := range instanceGroupList.Items {
instanceGroup := &instanceGroupList.Items[i]
instanceGroups = append(instanceGroups, instanceGroup)
}

populatedCluster, err := cloudup.PopulateClusterSpec(ctx, clientset, cluster, instanceGroups, cloud, assetBuilder)
if err != nil {
return fmt.Errorf("building full cluster spec: %w", err)
}
fullCluster = populatedCluster

// Build full IG spec to ensure we end up with a valid IG
for _, ig := range instanceGroups {
if ig.Name != options.InstanceGroup {
continue
}
populated, err := cloudup.PopulateInstanceGroupSpec(fullCluster, ig, cloud, channel)
if err != nil {
return err
}
fullInstanceGroup = populated
}
}
if fullInstanceGroup == nil {
return fmt.Errorf("instance group %q not found", options.InstanceGroup)
}

// Determine the well-known addresses for the cluster.
wellKnownAddresses := make(model.WellKnownAddresses)
{
ingresses, err := cloud.GetApiIngressStatus(cluster)
ingresses, err := cloud.GetApiIngressStatus(fullCluster)
if err != nil {
return fmt.Errorf("error getting ingress status: %v", err)
}
Expand All @@ -125,24 +181,24 @@ func RunToolboxEnroll(ctx context.Context, f commandutils.Factory, out io.Writer
}
}
}

if len(wellKnownAddresses[wellknownservices.KubeAPIServer]) == 0 {
// TODO: Should we support DNS?
return fmt.Errorf("unable to determine IP address for kube-apiserver")
}

for k := range wellKnownAddresses {
sort.Strings(wellKnownAddresses[k])
}

scriptBytes, err := buildBootstrapData(ctx, clientset, cluster, ig, wellKnownAddresses)
// Build the bootstrap data for this node.
bootstrapData, err := buildBootstrapData(ctx, clientset, fullCluster, fullInstanceGroup, wellKnownAddresses)
if err != nil {
return err
return fmt.Errorf("building bootstrap data: %w", err)
}

// Enroll the node over SSH.
if options.Host != "" {
// TODO: This is the pattern we use a lot, but should we try to access it directly?
contextName := cluster.ObjectMeta.Name
contextName := fullCluster.ObjectMeta.Name
clientGetter := genericclioptions.NewConfigFlags(true)
clientGetter.Context = &contextName

Expand All @@ -151,14 +207,15 @@ func RunToolboxEnroll(ctx context.Context, f commandutils.Factory, out io.Writer
return fmt.Errorf("cannot load kubecfg settings for %q: %w", contextName, err)
}

if err := enrollHost(ctx, options, string(scriptBytes), restConfig); err != nil {
if err := enrollHost(ctx, fullInstanceGroup, options, bootstrapData, restConfig); err != nil {
return err
}
}

return nil
}

func enrollHost(ctx context.Context, options *ToolboxEnrollOptions, nodeupScript string, restConfig *rest.Config) error {
func enrollHost(ctx context.Context, ig *kops.InstanceGroup, options *ToolboxEnrollOptions, bootstrapData *bootstrapData, restConfig *rest.Config) error {
scheme := runtime.NewScheme()
if err := v1alpha2.AddToScheme(scheme); err != nil {
return fmt.Errorf("building kubernetes scheme: %w", err)
Expand Down Expand Up @@ -211,19 +268,29 @@ func enrollHost(ctx context.Context, options *ToolboxEnrollOptions, nodeupScript
return err
}

if err := createHost(ctx, options, hostname, publicKeyBytes, kubeClient); err != nil {
return err
// We can't create the host resource in the API server for control-plane nodes,
// because the API server (likely) isn't running yet.
if !ig.IsControlPlane() {
if err := createHostResourceInAPIServer(ctx, options, hostname, publicKeyBytes, kubeClient); err != nil {
return err
}
}

for k, v := range bootstrapData.configFiles {
if err := host.writeFile(ctx, k, bytes.NewReader(v)); err != nil {
return fmt.Errorf("writing file %q over SSH: %w", k, err)
}
}

if len(nodeupScript) != 0 {
if _, err := host.runScript(ctx, nodeupScript, ExecOptions{Sudo: sudo, Echo: true}); err != nil {
if len(bootstrapData.nodeupScript) != 0 {
if _, err := host.runScript(ctx, string(bootstrapData.nodeupScript), ExecOptions{Sudo: sudo, Echo: true}); err != nil {
return err
}
}
return nil
}

func createHost(ctx context.Context, options *ToolboxEnrollOptions, nodeName string, publicKey []byte, client client.Client) error {
func createHostResourceInAPIServer(ctx context.Context, options *ToolboxEnrollOptions, nodeName string, publicKey []byte, client client.Client) error {
host := &v1alpha2.Host{}
host.Namespace = "kops-system"
host.Name = nodeName
Expand Down Expand Up @@ -317,6 +384,11 @@ func (s *SSHHost) readFile(ctx context.Context, path string) ([]byte, error) {
return p.ReadFile(ctx)
}

func (s *SSHHost) writeFile(ctx context.Context, path string, data io.ReadSeeker) error {
p := vfs.NewSSHPath(s.sshClient, s.hostname, path, s.sudo)
return p.WriteFile(ctx, data, nil)
}

func (s *SSHHost) runScript(ctx context.Context, script string, options ExecOptions) (*CommandOutput, error) {
var tempDir string
{
Expand Down Expand Up @@ -398,10 +470,14 @@ func (s *SSHHost) getHostname(ctx context.Context) (string, error) {
return hostname, nil
}

func buildBootstrapData(ctx context.Context, clientset simple.Clientset, cluster *kops.Cluster, ig *kops.InstanceGroup, wellknownAddresses model.WellKnownAddresses) ([]byte, error) {
if cluster.Spec.KubeAPIServer == nil {
cluster.Spec.KubeAPIServer = &kops.KubeAPIServerConfig{}
}
type bootstrapData struct {
nodeupScript []byte
configFiles map[string][]byte
}

func buildBootstrapData(ctx context.Context, clientset simple.Clientset, cluster *kops.Cluster, ig *kops.InstanceGroup, wellknownAddresses model.WellKnownAddresses) (*bootstrapData, error) {
bootstrapData := &bootstrapData{}
bootstrapData.configFiles = make(map[string][]byte)

getAssets := false
assetBuilder := assets.NewAssetBuilder(clientset.VFSContext(), cluster.Spec.Assets, cluster.Spec.KubernetesVersion, getAssets)
Expand All @@ -423,17 +499,12 @@ func buildBootstrapData(ctx context.Context, clientset simple.Clientset, cluster
// encryptionConfigSecretHash = base64.URLEncoding.EncodeToString(hashBytes[:])
// }

nodeUpAssets := make(map[architectures.Architecture]*assets.MirroredAsset)
for _, arch := range architectures.GetSupported() {
asset, err := wellknownassets.NodeUpAsset(assetBuilder, arch)
if err != nil {
return nil, err
}
nodeUpAssets[arch] = asset
fileAssets := &nodemodel.FileAssets{Cluster: cluster}
if err := fileAssets.AddFileAssets(assetBuilder); err != nil {
return nil, err
}

assets := make(map[architectures.Architecture][]*assets.MirroredAsset)
configBuilder, err := nodemodel.NewNodeUpConfigBuilder(cluster, assetBuilder, assets, encryptionConfigSecretHash)
configBuilder, err := nodemodel.NewNodeUpConfigBuilder(cluster, assetBuilder, fileAssets.Assets, encryptionConfigSecretHash)
if err != nil {
return nil, err
}
Expand All @@ -445,7 +516,8 @@ func buildBootstrapData(ctx context.Context, clientset simple.Clientset, cluster
return nil, err
}

for _, keyName := range []string{"kubernetes-ca"} {
keyNames := model.KeypairNamesForInstanceGroup(cluster, ig)
for _, keyName := range keyNames {
keyset, err := keystore.FindKeyset(ctx, keyName)
if err != nil {
return nil, fmt.Errorf("getting keyset %q: %w", keyName, err)
Expand All @@ -458,23 +530,13 @@ func buildBootstrapData(ctx context.Context, clientset simple.Clientset, cluster
keysets[keyName] = keyset
}

_, bootConfig, err := configBuilder.BuildConfig(ig, wellknownAddresses, keysets)
nodeupConfig, bootConfig, err := configBuilder.BuildConfig(ig, wellknownAddresses, keysets)
if err != nil {
return nil, err
}

bootConfig.CloudProvider = "metal"

// TODO: Should we / can we specify the node config hash?
// configData, err := utils.YamlMarshal(config)
// if err != nil {
// return nil, fmt.Errorf("error converting nodeup config to yaml: %v", err)
// }
// sum256 := sha256.Sum256(configData)
// bootConfig.NodeupConfigHash = base64.StdEncoding.EncodeToString(sum256[:])

var nodeupScript resources.NodeUpScript
nodeupScript.NodeUpAssets = nodeUpAssets
nodeupScript.NodeUpAssets = fileAssets.NodeUpAssets
nodeupScript.BootConfig = bootConfig

nodeupScript.WithEnvironmentVariables(cluster, ig)
Expand All @@ -483,15 +545,31 @@ func buildBootstrapData(ctx context.Context, clientset simple.Clientset, cluster

nodeupScript.CloudProvider = string(cluster.GetCloudProvider())

bootConfig.ConfigBase = fi.PtrTo("file:///etc/kubernetes/kops/config")

nodeupScriptResource, err := nodeupScript.Build()
if err != nil {
return nil, err
}

b, err := fi.ResourceAsBytes(nodeupScriptResource)
if bootConfig.InstanceGroupRole == kops.InstanceGroupRoleControlPlane {
nodeupConfigBytes, err := yaml.Marshal(nodeupConfig)
if err != nil {
return nil, fmt.Errorf("error converting nodeup config to yaml: %w", err)
}
// Not much reason to hash this, since we're reading it from the local file system
// sum256 := sha256.Sum256(nodeupConfigBytes)
// bootConfig.NodeupConfigHash = base64.StdEncoding.EncodeToString(sum256[:])

p := filepath.Join("/etc/kubernetes/kops/config", "igconfig", bootConfig.InstanceGroupRole.ToLowerString(), ig.Name, "nodeupconfig.yaml")
bootstrapData.configFiles[p] = nodeupConfigBytes
}

nodeupScriptBytes, err := fi.ResourceAsBytes(nodeupScriptResource)
if err != nil {
return nil, err
}
bootstrapData.nodeupScript = nodeupScriptBytes

return b, nil
return bootstrapData, nil
}
18 changes: 13 additions & 5 deletions pkg/model/bootstrapscript.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,9 @@ func (b *BootstrapScript) kubeEnv(ig *kops.InstanceGroup, c *fi.CloudupContext)
return bootConfig, nil
}

// ResourceNodeUp generates and returns a nodeup (bootstrap) script from a
// template file, substituting in specific env vars & cluster spec configuration
func (b *BootstrapScriptBuilder) ResourceNodeUp(c *fi.CloudupModelBuilderContext, ig *kops.InstanceGroup) (fi.Resource, error) {
func KeypairNamesForInstanceGroup(cluster *kops.Cluster, ig *kops.InstanceGroup) []string {
keypairs := []string{"kubernetes-ca", "etcd-clients-ca"}
for _, etcdCluster := range b.Cluster.Spec.EtcdClusters {
for _, etcdCluster := range cluster.Spec.EtcdClusters {
k := etcdCluster.Name
keypairs = append(keypairs, "etcd-manager-ca-"+k, "etcd-peers-ca-"+k)
if k != "events" && k != "main" {
Expand All @@ -142,15 +140,25 @@ func (b *BootstrapScriptBuilder) ResourceNodeUp(c *fi.CloudupModelBuilderContext

if ig.IsBastion() {
keypairs = nil
}

return keypairs
}

// ResourceNodeUp generates and returns a nodeup (bootstrap) script from a
// template file, substituting in specific env vars & cluster spec configuration
func (b *BootstrapScriptBuilder) ResourceNodeUp(c *fi.CloudupModelBuilderContext, ig *kops.InstanceGroup) (fi.Resource, error) {
keypairNames := KeypairNamesForInstanceGroup(b.Cluster, ig)

if ig.IsBastion() {
// Bastions can have AdditionalUserData, but if there isn't any skip this part
if len(ig.Spec.AdditionalUserData) == 0 {
return nil, nil
}
}

caTasks := map[string]*fitasks.Keypair{}
for _, keypair := range keypairs {
for _, keypair := range keypairNames {
caTaskObject, found := c.Tasks["Keypair/"+keypair]
if !found {
return nil, fmt.Errorf("keypair/%s task not found", keypair)
Expand Down
10 changes: 10 additions & 0 deletions tests/e2e/scenarios/bare-metal/run-test
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ ssh -o StrictHostKeyChecking=accept-new -i ${REPO_ROOT}/.build/.ssh/id_ed25519 r

cd ${REPO_ROOT}

# Enable feature flag for bare metal
export KOPS_FEATURE_FLAGS=Metal

# Set up the AWS credentials
export AWS_SECRET_ACCESS_KEY=secret
export AWS_ACCESS_KEY_ID=accesskey
Expand Down Expand Up @@ -90,4 +93,11 @@ ${KOPS} get ig --name metal.k8s.local -oyaml
${KOPS} update cluster metal.k8s.local
${KOPS} update cluster metal.k8s.local --yes --admin

# Start an SSH agent; enroll assumes SSH connectivity to the VMs with the key in the agent
eval $(ssh-agent)
ssh-add ${REPO_ROOT}/.build/.ssh/id_ed25519

# Enroll the control-plane VM
${KOPS} toolbox enroll --cluster metal.k8s.local --instance-group control-plane-main --host 10.123.45.10 --v=8

echo "Test successful"
3 changes: 3 additions & 0 deletions upup/pkg/fi/cloudup/new_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,9 @@ func NewCluster(opt *NewClusterOptions, clientset simple.Clientset) (*NewCluster
case api.CloudProviderScaleway:
cluster.Spec.CloudProvider.Scaleway = &api.ScalewaySpec{}
case api.CloudProviderMetal:
if !featureflag.Metal.Enabled() {
return nil, fmt.Errorf("bare-metal support requires the Metal feature flag to be enabled")
}
if cluster.Labels == nil {
cluster.Labels = make(map[string]string)
}
Expand Down

0 comments on commit a65bb16

Please sign in to comment.