Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(dot/state/epoch, lib/babe): enable block production through epochs without rely on finalization #2593

Merged
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
3fc7d75
fix: block production to be independant of finalized blocks
EclesioMeloJunior Jun 9, 2022
960dd15
chore: fix
EclesioMeloJunior Jun 9, 2022
ed5b41b
chore: remove not need check functions
EclesioMeloJunior Jun 9, 2022
bbf4b15
chore: inserting locks
EclesioMeloJunior Jun 9, 2022
6e87289
Merge branch 'development' into eclesio/fix/block-production-for-epochs
EclesioMeloJunior Jun 9, 2022
46bde58
chore: remove deadcode
EclesioMeloJunior Jun 9, 2022
697abcc
chore: fix wrapped error
EclesioMeloJunior Jun 13, 2022
719a724
chore: remove not needed diffs
EclesioMeloJunior Jun 13, 2022
246e442
chore: remove not needed diffs
EclesioMeloJunior Jun 13, 2022
2ea979c
chore: simplify the conditions and fix the get config method from epo…
EclesioMeloJunior Jun 16, 2022
4f0f701
chore: fixing a infof->debugf
EclesioMeloJunior Jun 16, 2022
2a637d4
Merge branch 'development' into eclesio/fix/block-production-for-epochs
EclesioMeloJunior Jun 22, 2022
5eac5bb
chore: pass `*BlockState` instead of `*EpochState`
EclesioMeloJunior Jun 22, 2022
5e57512
Merge branch 'development' into eclesio/fix/block-production-for-epochs
EclesioMeloJunior Jun 23, 2022
07a1d9c
chore: remove `chain/dev/chain-spec` diffs
EclesioMeloJunior Jun 27, 2022
1f3e18d
chore: fix mocks expected calls
EclesioMeloJunior Jun 27, 2022
cf0d28d
Merge branch 'development' into eclesio/fix/block-production-for-epochs
EclesioMeloJunior Jun 29, 2022
f196dbb
Merge branch 'development' into eclesio/fix/block-production-for-epochs
EclesioMeloJunior Jun 30, 2022
9b69ffb
chore: fix test `getEpochDataAndStartSlot`
EclesioMeloJunior Jun 30, 2022
257c2bb
create nextEpochMap type (#2633)
timwu20 Jul 4, 2022
612a293
update mockery version to 2.14
EclesioMeloJunior Jul 4, 2022
89d544e
Merge branch 'development' into eclesio/fix/block-production-for-epochs
EclesioMeloJunior Jul 5, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
228 changes: 114 additions & 114 deletions chain/dev/genesis-spec.json

Large diffs are not rendered by default.

170 changes: 72 additions & 98 deletions dot/state/epoch.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,17 @@ import (

"github.com/ChainSafe/chaindb"
"github.com/ChainSafe/gossamer/dot/types"
"github.com/ChainSafe/gossamer/lib/blocktree"
"github.com/ChainSafe/gossamer/lib/common"
"github.com/ChainSafe/gossamer/pkg/scale"
)

var (
ErrEpochNotInMemory = errors.New("epoch not found in memory map")
errHashNotInMemory = errors.New("hash not found in memory map")
errHashNotPersisted = errors.New("hash with next epoch not found in database")
ErrEpochNotInMemory = errors.New("epoch not found in memory map")
errHashNotInMemory = errors.New("hash not found in memory map")
errEpochNotInDatabase = errors.New("epoch data not found in the database")
errConfigNotFound = errors.New("config data not found")
errHashNotPersisted = errors.New("hash with next epoch not found in database")
)

var (
Expand Down Expand Up @@ -251,14 +254,26 @@ func (s *EpochState) GetEpochData(epoch uint64, header *types.Header) (*types.Ep

if err != nil && !errors.Is(err, chaindb.ErrKeyNotFound) {
return nil, fmt.Errorf("failed to get epoch data from database: %w", err)
} else if header == nil {
// if no header is given then skip the lookup in-memory
return epochData, nil
}

epochData, err = s.getEpochDataFromMemory(epoch, header)
if err != nil {
return nil, fmt.Errorf("failed to get epoch data from memory: %w", err)
// lookup in-memory only if header is given
if header != nil && errors.Is(err, chaindb.ErrKeyNotFound) {
s.nextEpochDataLock.RLock()
defer s.nextEpochDataLock.RUnlock()

inMemoryEpochData, err := retrieveFromMemory(s.nextEpochData, s, epoch, header)
if err != nil {
return nil, fmt.Errorf("failed to get epoch data from memory: %w", err)
}

epochData, err = inMemoryEpochData.ToEpochData()
if err != nil {
return nil, fmt.Errorf("cannot transform into epoch data: %w", err)
}
}

if epochData == nil {
return nil, errEpochNotInDatabase
EclesioMeloJunior marked this conversation as resolved.
Show resolved Hide resolved
}

return epochData, nil
Expand All @@ -280,32 +295,6 @@ func (s *EpochState) getEpochDataFromDatabase(epoch uint64) (*types.EpochData, e
return raw.ToEpochData()
}

// getEpochDataFromMemory retrieves the right epoch data that belongs to the header parameter
func (s *EpochState) getEpochDataFromMemory(epoch uint64, header *types.Header) (*types.EpochData, error) {
s.nextEpochDataLock.RLock()
defer s.nextEpochDataLock.RUnlock()

atEpoch, has := s.nextEpochData[epoch]
if !has {
return nil, fmt.Errorf("%w: %d", ErrEpochNotInMemory, epoch)
}

headerHash := header.Hash()

for hash, value := range atEpoch {
isDescendant, err := s.blockState.IsDescendantOf(hash, headerHash)
if err != nil {
return nil, fmt.Errorf("cannot verify the ancestry: %w", err)
}

if isDescendant {
return value.ToEpochData()
}
}

return nil, fmt.Errorf("%w: %s", errHashNotInMemory, headerHash)
}

// GetLatestEpochData returns the EpochData for the current epoch
func (s *EpochState) GetLatestEpochData() (*types.EpochData, error) {
curr, err := s.GetCurrentEpoch()
Expand All @@ -316,26 +305,6 @@ func (s *EpochState) GetLatestEpochData() (*types.EpochData, error) {
return s.GetEpochData(curr, nil)
}

// HasEpochData returns whether epoch data exists for a given epoch
func (s *EpochState) HasEpochData(epoch uint64) (bool, error) {
has, err := s.db.Has(epochDataKey(epoch))
if err == nil && has {
return has, nil
}

// we can have `has == false` and `err == nil`
// so ensure the error is not nil in the condition below.
if err != nil && !errors.Is(chaindb.ErrKeyNotFound, err) {
return false, fmt.Errorf("cannot check database for epoch key %d: %w", epoch, err)
}

s.nextEpochDataLock.Lock()
defer s.nextEpochDataLock.Unlock()

_, has = s.nextEpochData[epoch]
return has, nil
}

// SetConfigData sets the BABE config data for a given epoch
func (s *EpochState) SetConfigData(epoch uint64, info *types.ConfigData) error {
enc, err := scale.Marshal(*info)
Expand All @@ -357,28 +326,41 @@ func (s *EpochState) setLatestConfigData(epoch uint64) error {
return s.db.Put(latestConfigDataKey, buf)
}

// GetConfigData returns the config data for a given epoch persisted in database
// otherwise tries to get the data from the in-memory map using the header.
// If the header params is nil then it will search only in the database
func (s *EpochState) GetConfigData(epoch uint64, header *types.Header) (*types.ConfigData, error) {
configData, err := s.getConfigDataFromDatabase(epoch)
if err == nil && configData != nil {
return configData, nil
// GetConfigData returns the newest config data for a given epoch persisted in database
// otherwise tries to get the data from the in-memory map using the header. If we don't
// find any config data for the current epoch we lookup in the previous epochs, as the spec says:
// - The supplied configuration data are intended to be used from the next epoch onwards.
// If the header params is nil then it will search only in the database.
func (s *EpochState) GetConfigData(epoch uint64, header *types.Header) (configData *types.ConfigData, err error) {
if header != nil {
EclesioMeloJunior marked this conversation as resolved.
Show resolved Hide resolved
s.nextConfigDataLock.RLock()
defer s.nextConfigDataLock.RUnlock()
}

if err != nil && !errors.Is(err, chaindb.ErrKeyNotFound) {
return nil, fmt.Errorf("failed to get config data from database: %w", err)
} else if header == nil {
// if no header is given then skip the lookup in-memory
return configData, nil
}
for tryEpoch := epoch; tryEpoch >= 0; tryEpoch-- {
EclesioMeloJunior marked this conversation as resolved.
Show resolved Hide resolved
configData, err = s.getConfigDataFromDatabase(tryEpoch)
if err == nil && configData != nil {
return configData, nil
kishansagathiya marked this conversation as resolved.
Show resolved Hide resolved
}

configData, err = s.getConfigDataFromMemory(epoch, header)
if err != nil {
return nil, fmt.Errorf("failed to get config data from memory: %w", err)
if err != nil && !errors.Is(err, chaindb.ErrKeyNotFound) {
return nil, fmt.Errorf("failed to get config data from database: %w", err)
}

// lookup in-memory only if header is given
if header != nil && errors.Is(err, chaindb.ErrKeyNotFound) {
EclesioMeloJunior marked this conversation as resolved.
Show resolved Hide resolved
inMemoryConfigData, err := retrieveFromMemory(s.nextConfigData, s, epoch, header)
if errors.Is(err, ErrEpochNotInMemory) {
continue
} else if err != nil {
return nil, fmt.Errorf("failed to get config data from memory: %w", err)
}

return inMemoryConfigData.ToConfigData(), err
}
}

return configData, nil
return nil, errConfigNotFound
EclesioMeloJunior marked this conversation as resolved.
Show resolved Hide resolved
}

// getConfigDataFromDatabase returns the BABE config data for a given epoch persisted in database
Expand All @@ -397,26 +379,36 @@ func (s *EpochState) getConfigDataFromDatabase(epoch uint64) (*types.ConfigData,
return info, nil
}

// getConfigDataFromMemory retrieves the BABE config data for a given epoch that belongs to the header parameter
func (s *EpochState) getConfigDataFromMemory(epoch uint64, header *types.Header) (*types.ConfigData, error) {
s.nextConfigDataLock.RLock()
defer s.nextConfigDataLock.RUnlock()
func retrieveFromMemory[T types.NextEpochData | types.NextConfigData](nextEpochMap map[uint64]map[common.Hash]T,
EclesioMeloJunior marked this conversation as resolved.
Show resolved Hide resolved
EclesioMeloJunior marked this conversation as resolved.
Show resolved Hide resolved
es *EpochState, epoch uint64, header *types.Header) (*T, error) {

atEpoch, has := s.nextConfigData[epoch]
atEpoch, has := nextEpochMap[epoch]
if !has {
return nil, fmt.Errorf("%w: %d", ErrEpochNotInMemory, epoch)
}

headerHash := header.Hash()

for hash, value := range atEpoch {
isDescendant, err := s.blockState.IsDescendantOf(hash, headerHash)
isDescendant, err := es.blockState.IsDescendantOf(hash, headerHash)

// sometimes while moving to the next epoch is possible the header
// is not fully imported by the blocktree, in this case we will use
// its parent header which migth be already imported.
if errors.Is(err, blocktree.ErrEndNodeNotFound) {
parentHeader, err := es.blockState.GetHeader(header.ParentHash)
if err != nil {
return nil, fmt.Errorf("cannot get parent header: %w", err)
}

return retrieveFromMemory(nextEpochMap, es, epoch, parentHeader)
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what do you mean the header is not fully imported? a block can only be imported or not imported afaik

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem I notice is that at the block where we should change to the next epoch, I got the error blocktree.ErrEndNodeNotFound on function chainProcessor.processBlockData.

This method (processBlockData) calls the method handleHeader which calls the babeVerifier.VerifyBlock(header), and the variable header contains the header value of the first block in the next epoch. The VerifyBlock method (lib/babe/verify.go) calls the method getVerifierInfo which calls the epochState.GetEpochData using the variables header and epoch.

The epochState.GetEpochData did not find anything in the database for epoch 1 so we look up in the memory map using the header and that is when the problem happens, the block state is not aware of the header (first block in the next epoch) and return: end node does not exist.

The fully error is:

2022-06-08T18:30:12+05:30 EROR block data processing for block with hash 0xd671d24ff29cf990010bfc98a54981e06079ad71f900f86881e30b186ea5d158 failed: could not verify block: failed to get verifier info for block 23: failed to get epoch data for epoch 1: failed to get epoch data from memory: cannot verify the ancestry: end node does not exist	chain_processor.go:L84	pkg=sync

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that makes sense, this seems like a decent fix, but maybe we should just pass the parent hash into this function instead, since the parent would always be imported already (i think). what do you think?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@noot yeah, it can work but we should use the parent header only if we got a problem with the current header, passing the parent header directly can cause the same problem leading us to use the parent of the parent. So keeping this implementation we can handle cases where the block is not in the block tree but some ancestor is and retrieves the right config/epoch data.


if err != nil {
return nil, fmt.Errorf("cannot verify the ancestry: %w", err)
}

if isDescendant {
return value.ToConfigData(), nil
return &value, nil
}
}

Expand All @@ -434,24 +426,6 @@ func (s *EpochState) GetLatestConfigData() (*types.ConfigData, error) {
return s.GetConfigData(epoch, nil)
}

// HasConfigData returns whether config data exists for a given epoch
func (s *EpochState) HasConfigData(epoch uint64) (bool, error) {
has, err := s.db.Has(configDataKey(epoch))
if err == nil && has {
return has, nil
}

if err != nil && !errors.Is(chaindb.ErrKeyNotFound, err) {
return false, fmt.Errorf("cannot check database for epoch key %d: %w", epoch, err)
}

s.nextConfigDataLock.Lock()
defer s.nextConfigDataLock.Unlock()

_, has = s.nextConfigData[epoch]
return has, nil
}

// GetStartSlotForEpoch returns the first slot in the given epoch.
// If 0 is passed as the epoch, it returns the start slot for the current epoch.
func (s *EpochState) GetStartSlotForEpoch(epoch uint64) (uint64, error) {
Expand Down
3 changes: 0 additions & 3 deletions dot/state/epoch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,6 @@ func TestEpochState_CurrentEpoch(t *testing.T) {

func TestEpochState_EpochData(t *testing.T) {
s := newEpochStateFromGenesis(t)
has, err := s.HasEpochData(0)
require.NoError(t, err)
require.True(t, has)

keyring, err := keystore.NewSr25519Keyring()
require.NoError(t, err)
Expand Down
83 changes: 29 additions & 54 deletions lib/babe/epoch.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import (
// initiateEpoch sets the epochData for the given epoch, runs the lottery for the slots in the epoch,
// and stores updated EpochInfo in the database
func (b *Service) initiateEpoch(epoch uint64) (*epochData, error) {
logger.Debugf("initiating epoch %d", epoch)
logger.Infof("initiating epoch %d", epoch)
EclesioMeloJunior marked this conversation as resolved.
Show resolved Hide resolved

// if epoch == 1, check that first slot is still set correctly
// ie. that the start slot of the network is the same as the slot number of block 1
Expand All @@ -24,15 +24,25 @@ func (b *Service) initiateEpoch(epoch uint64) (*epochData, error) {
}
}

epochData, startSlot, err := b.getEpochDataAndStartSlot(epoch)
bestBlockHeader, err := b.blockState.BestBlockHeader()
if err != nil {
return nil, fmt.Errorf("cannot get the best block header: %w", err)
}

epochData, err := b.getEpochData(epoch, bestBlockHeader)
if err != nil {
return nil, fmt.Errorf("cannot get epoch data and start slot: %w", err)
}

startSlot, err := b.epochState.GetStartSlotForEpoch(epoch)
if err != nil {
return nil, fmt.Errorf("cannot get start slot for epoch %d: %w", epoch, err)
}

// if we're at genesis, we need to determine when the first slot of the network will be
// by checking when we will be able to produce block 1.
// note that this assumes there will only be one producer of block 1
if b.blockState.BestBlockHash() == b.blockState.GenesisHash() {
if bestBlockHeader.Hash() == b.blockState.GenesisHash() {
startSlot, err = b.getFirstAuthoringSlot(epoch, epochData)
if err != nil {
return nil, fmt.Errorf("cannot get first authoring slot: %w", err)
Expand Down Expand Up @@ -75,78 +85,43 @@ func (b *Service) checkAndSetFirstSlot() error {
return nil
}

func (b *Service) getEpochDataAndStartSlot(epoch uint64) (*epochData, uint64, error) {
func (b *Service) getEpochData(epoch uint64, bestBlock *types.Header) (*epochData, error) {
if epoch == 0 {
startSlot, err := b.epochState.GetStartSlotForEpoch(epoch)
if err != nil {
return nil, 0, fmt.Errorf("cannot get start slot for epoch %d: %w", epoch, err)
}

epochData, err := b.getLatestEpochData()
if err != nil {
return nil, 0, fmt.Errorf("cannot get latest epoch data: %w", err)
return nil, fmt.Errorf("cannot get latest epoch data: %w", err)
}

return epochData, startSlot, nil
}

has, err := b.epochState.HasEpochData(epoch)
if err != nil {
return nil, 0, fmt.Errorf("cannot check epoch state: %w", err)
}

if !has {
logger.Criticalf("%s number=%d", errNoEpochData, epoch)
return nil, 0, fmt.Errorf("%w: for epoch %d", errNoEpochData, epoch)
return epochData, nil
}

data, err := b.epochState.GetEpochData(epoch, nil)
currEpochData, err := b.epochState.GetEpochData(epoch, bestBlock)
if err != nil {
return nil, 0, fmt.Errorf("cannot get epoch data for epoch %d: %w", epoch, err)
return nil, fmt.Errorf("cannot get epoch data for epoch %d: %w", epoch, err)
}

idx, err := b.getAuthorityIndex(data.Authorities)
currentConfigData, err := b.epochState.GetConfigData(epoch, bestBlock)
if err != nil {
return nil, 0, fmt.Errorf("cannot get authority index: %w", err)
return nil, fmt.Errorf("cannot get config data for epoch %d: %w", epoch, err)
}

has, err = b.epochState.HasConfigData(epoch)
threshold, err := CalculateThreshold(currentConfigData.C1, currentConfigData.C2, len(currEpochData.Authorities))
if err != nil {
return nil, 0, fmt.Errorf("cannot check for config data for epoch %d: %w", epoch, err)
}

var cfgData *types.ConfigData
if has {
cfgData, err = b.epochState.GetConfigData(epoch, nil)
if err != nil {
return nil, 0, fmt.Errorf("cannot get config data for epoch %d: %w", epoch, err)
}
} else {
cfgData, err = b.epochState.GetLatestConfigData()
if err != nil {
return nil, 0, fmt.Errorf("cannot get latest config data from epoch state: %w", err)
}
return nil, fmt.Errorf("cannot calculate threshold: %w", err)
}

threshold, err := CalculateThreshold(cfgData.C1, cfgData.C2, len(data.Authorities))
idx, err := b.getAuthorityIndex(currEpochData.Authorities)
if err != nil {
return nil, 0, fmt.Errorf("cannot calculate threshold: %w", err)
return nil, fmt.Errorf("cannot get authority index: %w", err)
}

ed := &epochData{
randomness: data.Randomness,
authorities: data.Authorities,
return &epochData{
randomness: currEpochData.Randomness,
authorities: currEpochData.Authorities,
authorityIndex: idx,
threshold: threshold,
allowedSlots: types.AllowedSlots(cfgData.SecondarySlots),
}

startSlot, err := b.epochState.GetStartSlotForEpoch(epoch)
if err != nil {
return nil, 0, fmt.Errorf("cannot get start slot for epoch %d: %w", epoch, err)
}

return ed, startSlot, nil
allowedSlots: types.AllowedSlots(currentConfigData.SecondarySlots),
}, nil
}

func (b *Service) getLatestEpochData() (resEpochData *epochData, error error) {
Expand Down