-
Notifications
You must be signed in to change notification settings - Fork 65
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Interrupt Ghostferry via signals and resume from SerializedState #67
Changes from all commits
3943a4f
4252d9e
b2b38cb
8a77101
4398c6f
03d9484
c501639
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,8 +15,9 @@ import ( | |
const caughtUpThreshold = 10 * time.Second | ||
|
||
type BinlogStreamer struct { | ||
Db *sql.DB | ||
Config *Config | ||
DB *sql.DB | ||
DBConfig DatabaseConfig | ||
MyServerId uint32 | ||
ErrorHandler ErrorHandler | ||
Filter CopyFilter | ||
|
||
|
@@ -35,37 +36,37 @@ type BinlogStreamer struct { | |
eventListeners []func([]DMLEvent) error | ||
} | ||
|
||
func (s *BinlogStreamer) Initialize() error { | ||
s.logger = logrus.WithField("tag", "binlog_streamer") | ||
s.stopRequested = false | ||
return nil | ||
func (s *BinlogStreamer) ensureLogger() { | ||
if s.logger == nil { | ||
s.logger = logrus.WithField("tag", "binlog_streamer") | ||
} | ||
} | ||
|
||
func (s *BinlogStreamer) createBinlogSyncer() error { | ||
var err error | ||
var tlsConfig *tls.Config | ||
|
||
if s.Config.Source.TLS != nil { | ||
tlsConfig, err = s.Config.Source.TLS.BuildConfig() | ||
if s.DBConfig.TLS != nil { | ||
tlsConfig, err = s.DBConfig.TLS.BuildConfig() | ||
if err != nil { | ||
return err | ||
} | ||
} | ||
|
||
if s.Config.MyServerId == 0 { | ||
s.Config.MyServerId, err = s.generateNewServerId() | ||
if s.MyServerId == 0 { | ||
s.MyServerId, err = s.generateNewServerId() | ||
if err != nil { | ||
s.logger.WithError(err).Error("could not generate unique server_id") | ||
return err | ||
} | ||
} | ||
|
||
syncerConfig := replication.BinlogSyncerConfig{ | ||
ServerID: s.Config.MyServerId, | ||
Host: s.Config.Source.Host, | ||
Port: s.Config.Source.Port, | ||
User: s.Config.Source.User, | ||
Password: s.Config.Source.Pass, | ||
ServerID: s.MyServerId, | ||
Host: s.DBConfig.Host, | ||
Port: s.DBConfig.Port, | ||
User: s.DBConfig.User, | ||
Password: s.DBConfig.Pass, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
TLSConfig: tlsConfig, | ||
UseDecimal: true, | ||
TimestampStringLocation: time.UTC, | ||
|
@@ -76,22 +77,31 @@ func (s *BinlogStreamer) createBinlogSyncer() error { | |
} | ||
|
||
func (s *BinlogStreamer) ConnectBinlogStreamerToMysql() error { | ||
err := s.createBinlogSyncer() | ||
s.ensureLogger() | ||
|
||
currentPosition, err := ShowMasterStatusBinlogPosition(s.DB) | ||
if err != nil { | ||
s.logger.WithError(err).Error("failed to read current binlog position") | ||
return err | ||
} | ||
|
||
s.logger.Info("reading current binlog position") | ||
s.lastStreamedBinlogPosition, err = ShowMasterStatusBinlogPosition(s.Db) | ||
return s.ConnectBinlogStreamerToMysqlFrom(currentPosition) | ||
} | ||
|
||
func (s *BinlogStreamer) ConnectBinlogStreamerToMysqlFrom(startFromBinlogPosition mysql.Position) error { | ||
s.ensureLogger() | ||
|
||
err := s.createBinlogSyncer() | ||
if err != nil { | ||
s.logger.WithError(err).Error("failed to read current binlog position") | ||
return err | ||
} | ||
|
||
s.lastStreamedBinlogPosition = startFromBinlogPosition | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. feels like this should only be set after the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Having it set there would be kind of redundant as this is an internal tracking variable for the main BinlogStreamer loop. Basically, it would have to look like:
Without setting lastStreamedBinlogPosition before we start that loop, the conditional would be more complex. Note that this variable has nothing to do with the StateTracker binlog position, which is set in BinlogWriter and once in Ferry during initial connection. |
||
|
||
s.logger.WithFields(logrus.Fields{ | ||
"file": s.lastStreamedBinlogPosition.Name, | ||
"pos": s.lastStreamedBinlogPosition.Pos, | ||
}).Info("found binlog position, starting synchronization") | ||
}).Info("starting binlog streaming") | ||
|
||
s.binlogStreamer, err = s.binlogSyncer.StartSync(s.lastStreamedBinlogPosition) | ||
if err != nil { | ||
|
@@ -103,6 +113,8 @@ func (s *BinlogStreamer) ConnectBinlogStreamerToMysql() error { | |
} | ||
|
||
func (s *BinlogStreamer) Run() { | ||
s.ensureLogger() | ||
|
||
defer func() { | ||
s.logger.Info("exiting binlog streamer") | ||
s.binlogSyncer.Close() | ||
|
@@ -192,7 +204,7 @@ func (s *BinlogStreamer) FlushAndStop() { | |
// passed the initial target position. | ||
err := WithRetries(100, 600*time.Millisecond, s.logger, "read current binlog position", func() error { | ||
var err error | ||
s.targetBinlogPosition, err = ShowMasterStatusBinlogPosition(s.Db) | ||
s.targetBinlogPosition, err = ShowMasterStatusBinlogPosition(s.DB) | ||
return err | ||
}) | ||
|
||
|
@@ -290,7 +302,7 @@ func (s *BinlogStreamer) generateNewServerId() (uint32, error) { | |
for { | ||
id = randomServerId() | ||
|
||
exists, err := idExistsOnServer(id, s.Db) | ||
exists, err := idExistsOnServer(id, s.DB) | ||
if err != nil { | ||
return 0, err | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -237,9 +237,19 @@ type Config struct { | |
// Optional: defaults to false | ||
AutomaticCutover bool | ||
|
||
// This specifies whether or not Ferry.Run will handle SIGINT and SIGTERM | ||
// by dumping the current state to stdout. | ||
// This state can be used to resume Ghostferry. | ||
DumpStateToStdoutOnSignal bool | ||
|
||
// Config for the ControlServer | ||
ServerBindAddr string | ||
WebBasedir string | ||
|
||
// The state to resume from as dumped by the PanicErrorHandler. | ||
// If this is null, a new Ghostferry run will be started. Otherwise, the | ||
// reconciliation process will start and Ghostferry will resume after that. | ||
StateToResumeFrom *SerializableState | ||
} | ||
|
||
func (c *Config) ValidateConfig() error { | ||
|
@@ -255,6 +265,10 @@ func (c *Config) ValidateConfig() error { | |
return fmt.Errorf("Table filter function must be provided") | ||
} | ||
|
||
if c.StateToResumeFrom != nil && c.StateToResumeFrom.GhostferryVersion != VersionString { | ||
return fmt.Errorf("StateToResumeFrom version mismatch: resume = %s, current = %s", c.StateToResumeFrom.GhostferryVersion, VersionString) | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this just being defensive or there's a legitimate reason? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Defensive for now as there's no guarantee that we won't change things. If we want to force resume, we can just set the version passed to Ghostferry to the same one as the Ghostferry instance that's launched. |
||
|
||
if c.DBWriteRetries == 0 { | ||
c.DBWriteRetries = 5 | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,23 +40,21 @@ type CursorConfig struct { | |
} | ||
|
||
// returns a new Cursor with an embedded copy of itself | ||
func (c *CursorConfig) NewCursor(table *schema.Table, maxPk uint64) *Cursor { | ||
func (c *CursorConfig) NewCursor(table *schema.Table, startPk, maxPk uint64) *Cursor { | ||
return &Cursor{ | ||
CursorConfig: *c, | ||
Table: table, | ||
MaxPrimaryKey: maxPk, | ||
RowLock: true, | ||
CursorConfig: *c, | ||
Table: table, | ||
MaxPrimaryKey: maxPk, | ||
RowLock: true, | ||
lastSuccessfulPrimaryKey: startPk, | ||
} | ||
} | ||
|
||
// returns a new Cursor with an embedded copy of itself | ||
func (c *CursorConfig) NewCursorWithoutRowLock(table *schema.Table, maxPk uint64) *Cursor { | ||
return &Cursor{ | ||
CursorConfig: *c, | ||
Table: table, | ||
MaxPrimaryKey: maxPk, | ||
RowLock: false, | ||
} | ||
func (c *CursorConfig) NewCursorWithoutRowLock(table *schema.Table, startPk, maxPk uint64) *Cursor { | ||
cursor := c.NewCursor(table, startPk, maxPk) | ||
cursor.RowLock = false | ||
return cursor | ||
} | ||
|
||
type Cursor struct { | ||
|
@@ -72,7 +70,6 @@ type Cursor struct { | |
} | ||
|
||
func (c *Cursor) Each(f func(*RowBatch) error) error { | ||
c.lastSuccessfulPrimaryKey = 0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what was this here for to begin with? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It was to ensure that subsequent calls to We only ever call this method once. |
||
c.logger = logrus.WithFields(logrus.Fields{ | ||
"table": c.Table.String(), | ||
"tag": "cursor", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,8 +2,8 @@ package ghostferry | |
|
||
import ( | ||
"database/sql" | ||
"errors" | ||
"fmt" | ||
"math" | ||
"sync" | ||
|
||
"github.com/siddontang/go-mysql/schema" | ||
|
@@ -25,20 +25,18 @@ type DataIterator struct { | |
logger *logrus.Entry | ||
} | ||
|
||
func (d *DataIterator) Initialize() error { | ||
func (d *DataIterator) Run() { | ||
d.logger = logrus.WithField("tag", "data_iterator") | ||
d.targetPKs = &sync.Map{} | ||
|
||
// If a state tracker is not provided, then the caller doesn't care about | ||
// tracking state. However, some methods are still useful so we initialize | ||
// a minimal local instance. | ||
if d.StateTracker == nil { | ||
return errors.New("StateTracker must be defined") | ||
d.StateTracker = NewStateTracker(0) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func (d *DataIterator) Run() { | ||
d.logger.WithField("tablesCount", len(d.Tables)).Info("starting data iterator run") | ||
|
||
tablesWithData, emptyTables, err := MaxPrimaryKeys(d.DB, d.Tables, d.logger) | ||
if err != nil { | ||
d.ErrorHandler.Fatal("data_iterator", err) | ||
|
@@ -49,7 +47,14 @@ func (d *DataIterator) Run() { | |
} | ||
|
||
for table, maxPk := range tablesWithData { | ||
d.targetPKs.Store(table.String(), maxPk) | ||
tableName := table.String() | ||
if d.StateTracker.IsTableComplete(tableName) { | ||
// In a previous run, the table may have been completed. | ||
// We don't need to reiterate those tables as it has already been done. | ||
delete(tablesWithData, table) | ||
} else { | ||
d.targetPKs.Store(table.String(), maxPk) | ||
} | ||
} | ||
|
||
tablesQueue := make(chan *schema.Table) | ||
|
@@ -76,7 +81,15 @@ func (d *DataIterator) Run() { | |
return | ||
} | ||
|
||
cursor := d.CursorConfig.NewCursor(table, targetPKInterface.(uint64)) | ||
startPk := d.StateTracker.LastSuccessfulPK(table.String()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's |
||
if startPk == math.MaxUint64 { | ||
err := fmt.Errorf("%v has been marked as completed but a table iterator has been spawned, this is likely a programmer error which resulted in the inconsistent starting state", table.String()) | ||
logger.WithError(err).Error("this is definitely a bug") | ||
d.ErrorHandler.Fatal("data_iterator", err) | ||
return | ||
} | ||
|
||
cursor := d.CursorConfig.NewCursor(table, startPk, targetPKInterface.(uint64)) | ||
err := cursor.Each(func(batch *RowBatch) error { | ||
metrics.Count("RowEvent", int64(batch.Size()), []MetricTag{ | ||
MetricTag{"table", table.Name}, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's the reasoning for removing the
Initialize
method?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's a legacy of the original Ghostferry design. This method doesn't do much and just complicates the API. Anything it does can be done directly in the
Run
method, or within theFerry.NewBinlogStreamer
methods.