Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions e2e/test_operator_plugin/operator_plugin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"context"
"fmt"
"log"
"strings"
"time"

fdbv1beta2 "github.com/FoundationDB/fdb-kubernetes-operator/v2/api/v1beta2"
Expand Down Expand Up @@ -249,6 +250,11 @@ var _ = Describe("Operator Plugin", Label("e2e", "pr"), func() {
false,
)
log.Println("stdout:", stdout, "stderr:", stderr)
if strings.Contains(stderr, "Error determining public address") {
Skip(
"plugin was not able to determine public address, this means that all coordinators are probably gone",
)
}
Expect(err).NotTo(HaveOccurred())

// Ensure the cluster is available again.
Expand Down
24 changes: 24 additions & 0 deletions kubectl-fdb/cmd/recover_multi_region_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,30 @@ func checkIfClusterIsUnavailableAndMajorityOfCoordinatorsAreUnreachable(
}

log.Println("Getting the status from:", clientPod.Name)
for retry := 0; retry < 5; retry++ {
err = getStatusAndCheckIfClusterShouldBeRecovered(ctx, kubeClient, config, clientPod)
if err == nil {
break
}

time.Sleep(5 * time.Second)
}

// If DNS is used for the cluster file, we could hit cases where no DNS entry can be resolved, in this case we could
// assume that the cluster is also down. The error from the client side is the following:
// Error: error getting status: Error determining public address.
// ERROR: Unable to bind to network (1512)
if err != nil && strings.Contains(err.Error(), "Error determining public address") {
return err
}

return err
}

func getStatusAndCheckIfClusterShouldBeRecovered(ctx context.Context,
kubeClient client.Client,
config *rest.Config,
clientPod *corev1.Pod) error {
status, err := getStatus(ctx, kubeClient, config, clientPod)
if err != nil {
return err
Expand Down