Skip to content

Commit

Permalink
feat: add support for debug logging
Browse files Browse the repository at this point in the history
  • Loading branch information
enocom committed Mar 11, 2024
1 parent e9f63a3 commit 805e66a
Show file tree
Hide file tree
Showing 7 changed files with 161 additions and 21 deletions.
33 changes: 33 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,39 @@ func main() {
[Cloud Monitoring]: https://cloud.google.com/monitoring
[Cloud Trace]: https://cloud.google.com/trace

### Debug Logging

The Go Connector supports optional debug logging to help diagnose problems with
the background certificate refresh. To enable it, provide a logger that
implements the `debug.Logger` interface when initializing the Dialer.

For example:

``` go
import (
"context"
"net"

"cloud.google.com/go/alloydbconn"
)

type myLogger struct{}

func (l *myLogger) Debugf(format string, args ...interface{}) {
// Log as you like here
}

func connect() {
l := &myLogger{}

d, err := NewDialer(
context.Background(),
alloydbconn.WithDebugLogger(l),
)
// use dialer as usual...
}
```

## Support policy

### Major version lifecycle
Expand Down
21 changes: 21 additions & 0 deletions debug/debug.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package debug

// Logger is the interface used for debug logging. By default, it is unused.
type Logger interface {
// Debugf is for reporting information about internal operations.
Debugf(format string, args ...interface{})
}
63 changes: 48 additions & 15 deletions dialer.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (

alloydbadmin "cloud.google.com/go/alloydb/apiv1alpha"
"cloud.google.com/go/alloydb/connectors/apiv1alpha/connectorspb"
"cloud.google.com/go/alloydbconn/debug"
"cloud.google.com/go/alloydbconn/errtype"
"cloud.google.com/go/alloydbconn/internal/alloydb"
"cloud.google.com/go/alloydbconn/internal/trace"
Expand Down Expand Up @@ -91,6 +92,7 @@ type Dialer struct {
refreshTimeout time.Duration

client *alloydbadmin.AlloyDBAdminClient
logger debug.Logger

// defaultDialCfg holds the constructor level DialOptions, so that it can
// be copied and mutated by the Dial function.
Expand All @@ -111,6 +113,10 @@ type Dialer struct {
buffer *buffer
}

type nullLogger struct{}

func (nullLogger) Debugf(string, ...interface{}) {}

// NewDialer creates a new Dialer.
//
// Initial calls to NewDialer make take longer than normal because generation of an
Expand All @@ -120,6 +126,7 @@ func NewDialer(ctx context.Context, opts ...Option) (*Dialer, error) {
cfg := &dialerConfig{
refreshTimeout: alloydb.RefreshTimeout,
dialFunc: proxy.Dial,
logger: nullLogger{},
userAgents: []string{userAgent},
}
for _, opt := range opts {
Expand Down Expand Up @@ -171,6 +178,7 @@ func NewDialer(ctx context.Context, opts ...Option) (*Dialer, error) {
key: cfg.rsaKey,
refreshTimeout: cfg.refreshTimeout,
client: client,
logger: cfg.logger,
defaultDialCfg: dialCfg,
dialerID: uuid.New().String(),
dialFunc: cfg.dialFunc,
Expand Down Expand Up @@ -216,6 +224,11 @@ func (d *Dialer) Dial(ctx context.Context, instance string, opts ...DialOption)
if err != nil {
d.lock.Lock()
defer d.lock.Unlock()
d.logger.Debugf(
"[%v] Removing connection info from cache: %v",
inst.String(),
err,
)
// Stop all background refreshes
i.Close()
delete(d.instances, inst)
Expand All @@ -229,13 +242,19 @@ func (d *Dialer) Dial(ctx context.Context, instance string, opts ...DialOption)
// The TLS handshake will not fail on an expired client certificate. It's
// not until the first read where the client cert error will be surfaced.
// So check that the certificate is valid before proceeding.
if invalidClientCert(tlsCfg) {
if invalidClientCert(inst, d.logger, tlsCfg) {
d.logger.Debugf("[%v] Refreshing certificate now", inst.String())
i.ForceRefresh()
// Block on refreshed connection info
addr, tlsCfg, err = i.ConnectInfo(ctx, cfg.ipType)
if err != nil {
d.lock.Lock()
defer d.lock.Unlock()
d.logger.Debugf(
"[%v] Removing connection info from cache: %v",
inst.String(),
err,
)
// Stop all background refreshes
i.Close()
delete(d.instances, inst)
Expand All @@ -251,8 +270,10 @@ func (d *Dialer) Dial(ctx context.Context, instance string, opts ...DialOption)
if cfg.dialFunc != nil {
f = cfg.dialFunc
}
d.logger.Debugf("[%v] Dialing %v", inst.String(), addr)
conn, err = f(ctx, "tcp", addr)
if err != nil {
d.logger.Debugf("[%v] Dialing %v failed: %v", inst.String(), addr, err)
// refresh the instance info in case it caused the connection failure
i.ForceRefresh()
return nil, errtype.NewDialError("failed to dial", inst.String(), err)
Expand All @@ -268,6 +289,7 @@ func (d *Dialer) Dial(ctx context.Context, instance string, opts ...DialOption)

tlsConn := tls.Client(conn, tlsCfg)
if err := tlsConn.HandshakeContext(ctx); err != nil {
d.logger.Debugf("[%v] TLS handshake failed: %v", inst.String(), err)
// refresh the instance info in case it caused the handshake failure
i.ForceRefresh()
_ = tlsConn.Close() // best effort close attempt
Expand Down Expand Up @@ -295,11 +317,20 @@ func (d *Dialer) Dial(ctx context.Context, instance string, opts ...DialOption)
}), nil
}

func invalidClientCert(c *tls.Config) bool {
// The following conditions should be impossible (no certs, nil leaf), but
// just in case there's an unknown edge case, check assumptions before
// proceeding.
return time.Now().After(c.Certificates[0].Leaf.NotAfter)
func invalidClientCert(
inst alloydb.InstanceURI, l debug.Logger, c *tls.Config,
) bool {
now := time.Now().UTC()
notAfter := c.Certificates[0].Leaf.NotAfter.UTC()
invalid := now.After(notAfter)
l.Debugf(
"[%v] Now = %v, Current cert expiration = %v",
inst.String(),
now.Format(time.RFC3339),
notAfter.Format(time.RFC3339),
)
l.Debugf("[%v] Cert is valid = %v", inst.String(), !invalid)
return invalid
}

// metadataExchange sends metadata about the connection prior to the database
Expand Down Expand Up @@ -457,25 +488,27 @@ func (d *Dialer) Close() error {
}

func (d *Dialer) instance(instance alloydb.InstanceURI) (connectionInfoCache, error) {
// Check instance cache
d.lock.RLock()
i, ok := d.instances[instance]
d.lock.RUnlock()
if !ok {
d.lock.Lock()
defer d.lock.Unlock()
// Recheck to ensure instance wasn't created between locks
i, ok = d.instances[instance]
if !ok {
// Create a new instance
var err error
i = alloydb.NewInstance(instance, d.client, d.key, d.refreshTimeout, d.dialerID)
if err != nil {
d.lock.Unlock()
return nil, err
}
i = alloydb.NewInstance(
instance,
d.logger,
d.client, d.key,
d.refreshTimeout, d.dialerID,
)
d.logger.Debugf(
"[%v] Connection info added to cache",
instance.String(),
)
d.instances[instance] = i
}
d.lock.Unlock()
}
return i, nil
}
36 changes: 36 additions & 0 deletions internal/alloydb/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"time"

alloydbadmin "cloud.google.com/go/alloydb/apiv1alpha"
"cloud.google.com/go/alloydbconn/debug"
"cloud.google.com/go/alloydbconn/errtype"
"golang.org/x/time/rate"
)
Expand Down Expand Up @@ -131,6 +132,7 @@ type Instance struct {
openConns uint64

instanceURI InstanceURI
logger debug.Logger
key *rsa.PrivateKey
// refreshTimeout sets the maximum duration a refresh cycle can run
// for.
Expand All @@ -157,6 +159,7 @@ type Instance struct {
// NewInstance initializes a new Instance given an instance URI
func NewInstance(
instance InstanceURI,
l debug.Logger,
client *alloydbadmin.AlloyDBAdminClient,
key *rsa.PrivateKey,
refreshTimeout time.Duration,
Expand All @@ -165,6 +168,7 @@ func NewInstance(
ctx, cancel := context.WithCancel(context.Background())
i := &Instance{
instanceURI: instance,
logger: l,
key: key,
l: rate.NewLimiter(rate.Every(refreshInterval), refreshBurst),
r: newRefresher(client, dialerID),
Expand Down Expand Up @@ -281,10 +285,18 @@ func (i *Instance) scheduleRefresh(d time.Duration) *refreshOperation {
r.timer = time.AfterFunc(d, func() {
// instance has been closed, don't schedule anything
if err := i.ctx.Err(); err != nil {
i.logger.Debugf(
"[%v] Instance is closed, stopping refresh operations",
i.instanceURI.String(),
)
r.err = err
close(r.ready)
return
}
i.logger.Debugf(
"[%v] Connection info refresh operation started",
i.instanceURI.String(),
)

ctx, cancel := context.WithTimeout(i.ctx, i.refreshTimeout)
defer cancel()
Expand All @@ -296,8 +308,22 @@ func (i *Instance) scheduleRefresh(d time.Duration) *refreshOperation {
i.instanceURI.String(),
nil,
)
i.logger.Debugf(
"[%v] Connection info refresh operation failed, err = %v",
i.instanceURI.String(),
r.err,
)
} else {
r.result, r.err = i.r.performRefresh(i.ctx, i.instanceURI, i.key)
i.logger.Debugf(
"[%v] Connection info refresh operation complete",
i.instanceURI.String(),
)
i.logger.Debugf(
"[%v] Current certificate expiration = %v",
i.instanceURI.String(),
r.result.expiry.UTC().Format(time.RFC3339),
)
}

close(r.ready)
Expand All @@ -309,6 +335,10 @@ func (i *Instance) scheduleRefresh(d time.Duration) *refreshOperation {

// if failed, scheduled the next refresh immediately
if r.err != nil {
i.logger.Debugf(
"[%v] Connection info refresh operation scheduled immediately",
i.instanceURI.String(),
)
i.next = i.scheduleRefresh(0)
// If the latest result is bad, avoid replacing the
// used result while it's still valid and potentially
Expand All @@ -325,6 +355,12 @@ func (i *Instance) scheduleRefresh(d time.Duration) *refreshOperation {
// the future
i.cur = r
t := refreshDuration(time.Now(), i.cur.result.expiry)
i.logger.Debugf(
"[%v] Connection info refresh operation scheduled at %v (now + %v)",
i.instanceURI.String(),
time.Now().Add(t).UTC().Format(time.RFC3339),
t.Round(time.Minute),
)
i.next = i.scheduleRefresh(t)
})
return r
Expand Down
13 changes: 10 additions & 3 deletions internal/alloydb/instance_test.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
// Copyright 2020 Google LLC

//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//
// https://www.apache.org/licenses/LICENSE-2.0

//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
Expand All @@ -29,6 +29,10 @@ import (
"google.golang.org/api/option"
)

type nullLogger struct{}

func (nullLogger) Debugf(string, ...interface{}) {}

// genRSAKey generates an RSA key used for test.
func genRSAKey() *rsa.PrivateKey {
key, err := rsa.GenerateKey(rand.Reader, 2048)
Expand Down Expand Up @@ -150,6 +154,7 @@ func TestConnectInfo(t *testing.T) {

i := NewInstance(
testInstanceURI(),
nullLogger{},
c, RSAKey, 30*time.Second, "dialer-id",
)
if err != nil {
Expand Down Expand Up @@ -184,6 +189,7 @@ func TestConnectInfoErrors(t *testing.T) {
// Use a timeout that should fail instantly
i := NewInstance(
testInstanceURI(),
nullLogger{},
c, RSAKey, 0, "dialer-id",
)
if err != nil {
Expand Down Expand Up @@ -213,6 +219,7 @@ func TestClose(t *testing.T) {
// Set up an instance and then close it immediately
i := NewInstance(
testInstanceURI(),
nullLogger{},
c, RSAKey, 30, "dialer-ider",
)
if err != nil {
Expand Down
6 changes: 3 additions & 3 deletions internal/alloydb/refresh.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
// Copyright 2020 Google LLC

//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//
// https://www.apache.org/licenses/LICENSE-2.0

//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
Expand Down
Loading

0 comments on commit 805e66a

Please sign in to comment.