Skip to content

Commit

Permalink
br/pdutils: retry when encountered dns error (#53005)
Browse files Browse the repository at this point in the history
close #53029
  • Loading branch information
YuJuncen committed May 6, 2024
1 parent 9075922 commit fe6837a
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 2 deletions.
4 changes: 4 additions & 0 deletions br/pkg/lightning/common/retry.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,10 @@ func isSingleRetryableError(err error) bool {

switch nerr := err.(type) {
case net.Error:
var dErr *net.DNSError
if goerrors.As(nerr, &dErr) {
return true
}
if nerr.Timeout() {
return true
}
Expand Down
2 changes: 1 addition & 1 deletion br/pkg/lightning/common/retry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func TestIsRetryableError(t *testing.T) {
require.True(t, IsRetryableError(ErrWriteTooSlow))
require.False(t, IsRetryableError(io.EOF))
require.False(t, IsRetryableError(&net.AddrError{}))
require.False(t, IsRetryableError(&net.DNSError{}))
require.True(t, IsRetryableError(&net.DNSError{}))
require.True(t, IsRetryableError(&net.DNSError{IsTimeout: true}))

// kv errors
Expand Down
6 changes: 5 additions & 1 deletion br/pkg/pdutil/pd.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ const (
pauseTimeout = 5 * time.Minute

// pd request retry time when connection fail
pdRequestRetryTime = 10
pdRequestRetryTime = 120

// set max-pending-peer-count to a large value to avoid scatter region failed.
maxPendingPeerUnlimited uint64 = math.MaxInt32
Expand Down Expand Up @@ -174,6 +174,10 @@ func pdRequestWithCode(
if err != nil {
return 0, nil, errors.Trace(err)
}
failpoint.Inject("DNSError", func() {
req.Host = "nosuchhost"
req.URL.Host = "nosuchhost"
})
resp, err = cli.Do(req) //nolint:bodyclose
count++
failpoint.Inject("InjectClosed", func(v failpoint.Value) {
Expand Down
2 changes: 2 additions & 0 deletions tests/realtikvtest/brietest/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ go_test(
"flashback_test.go",
"main_test.go",
"operator_test.go",
"pdutil_test.go",
],
flaky = True,
race = "on",
deps = [
"//br/pkg/pdutil",
"//br/pkg/task",
"//br/pkg/task/operator",
"//config",
Expand Down
33 changes: 33 additions & 0 deletions tests/realtikvtest/brietest/pdutil_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright 2024 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package brietest

import (
"context"
"testing"

"github.com/pingcap/failpoint"
"github.com/pingcap/tidb/br/pkg/pdutil"
"github.com/stretchr/testify/require"
pd "github.com/tikv/pd/client"
)

func TestCreateClient(t *testing.T) {
require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/br/pkg/pdutil/DNSError", "119*return"))
require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/br/pkg/pdutil/FastRetry", "return(true)"))
ctl, err := pdutil.NewPdController(context.Background(), "127.0.0.1:2379", nil, pd.SecurityOption{})
require.NoError(t, err)
ctl.Close()
}

0 comments on commit fe6837a

Please sign in to comment.