-
Notifications
You must be signed in to change notification settings - Fork 58
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: distinguish between normal node startup and snapshot loading #319
base: unstable
Are you sure you want to change the base?
Changes from all commits
dc6c861
610c28c
0e452d3
689eaa2
e38513c
42d7d0a
3f8ae41
dcdf232
6f9f6fb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -122,7 +122,9 @@ void RaftNodeCmd::DoCmdRemove(PClient* client) { | |
} | ||
|
||
void RaftNodeCmd::DoCmdSnapshot(PClient* client) { | ||
auto s = PRAFT.DoSnapshot(); | ||
// @todo need to get self_snapshot_index | ||
// auto self_snapshot_index = PSTORE.GetBackend(client->GetCurrentDB())->GetStorage()->GetSmallestFlushedLogIndex(); | ||
auto s = PRAFT.DoSnapshot(); // self_snapshot_index | ||
Comment on lines
+125
to
+127
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Complete the implementation for The commented-out code suggests that |
||
if (s.ok()) { | ||
client->SetRes(CmdRes::kOK); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ | |
|
||
#include <cassert> | ||
|
||
#include "braft/raft.h" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tip Codebase Verification The verification process did not find any direct or indirect usage of entities from
Analysis chainEnsure the newly added Let's further investigate if there are any indirect usages or dependencies that might require the inclusion of Scripts executedThe following scripts were executed for the analysis: Script: #!/bin/bash
# Description: Verify if the newly added include is used in the file.
# Test: Search for usage of entities from `braft/raft.h`. Expect: At least one usage.
ast-grep --lang cpp --pattern $'$_ { $$$ braft::raft::$$$ }' src/praft/praft.cc
Length of output: 80 Script: #!/bin/bash
# Description: Verify if there are any indirect usages or dependencies that might require the inclusion of `braft/raft.h`.
# Test 1: Search for any usage of `braft` namespace in the file.
ast-grep --lang cpp --pattern $'$_ { $$$ braft::$$$ }' src/praft/praft.cc
# Test 2: Search for any usage of `raft` keyword in the file.
rg --word-regexp 'raft' src/praft/praft.cc
Length of output: 343 |
||
#include "braft/snapshot.h" | ||
#include "braft/util.h" | ||
#include "brpc/server.h" | ||
|
@@ -150,7 +151,7 @@ butil::Status PRaft::Init(std::string& group_id, bool initial_conf_is_null) { | |
node_options_.fsm = this; | ||
node_options_.node_owns_fsm = false; | ||
node_options_.snapshot_interval_s = 0; | ||
std::string prefix = "local://" + g_config.db_path.ToString() + "_praft"; | ||
std::string prefix = "local://" + g_config.db_path.ToString() + std::to_string(db_id_) + "/_praft"; | ||
node_options_.log_uri = prefix + "/log"; | ||
node_options_.raft_meta_uri = prefix + "/raft_meta"; | ||
node_options_.snapshot_uri = prefix + "/snapshot"; | ||
|
@@ -241,6 +242,24 @@ butil::Status PRaft::GetListPeers(std::vector<braft::PeerId>* peers) { | |
return node_->list_peers(peers); | ||
} | ||
|
||
uint64_t PRaft::GetTerm(uint64_t log_index) { | ||
if (!node_) { | ||
ERROR("Node is not initialized"); | ||
return 0; | ||
} | ||
|
||
return node_->get_term(log_index); | ||
} | ||
|
||
uint64_t PRaft::GetLastLogIndex(bool is_flush) { | ||
if (!node_) { | ||
ERROR("Node is not initialized"); | ||
return 0; | ||
} | ||
|
||
return node_->get_last_log_index(is_flush); | ||
} | ||
|
||
void PRaft::SendNodeRequest(PClient* client) { | ||
assert(client); | ||
|
||
|
@@ -521,10 +540,17 @@ butil::Status PRaft::DoSnapshot(int64_t self_snapshot_index, bool is_sync) { | |
if (!node_) { | ||
return ERROR_LOG_AND_STATUS("Node is not initialized"); | ||
} | ||
braft::SynchronizedClosure done; | ||
node_->snapshot(&done, self_snapshot_index); | ||
done.wait(); | ||
return done.status(); | ||
|
||
if (is_sync) { | ||
braft::SynchronizedClosure done; | ||
node_->snapshot(&done, self_snapshot_index); | ||
done.wait(); | ||
return done.status(); | ||
} else { | ||
node_->snapshot(nullptr, self_snapshot_index); | ||
butil::Status status; | ||
return status; | ||
} | ||
} | ||
|
||
void PRaft::OnClusterCmdConnectionFailed([[maybe_unused]] EventLoop* loop, const char* peer_ip, int port) { | ||
|
@@ -629,10 +655,38 @@ void PRaft::on_snapshot_save(braft::SnapshotWriter* writer, braft::Closure* done | |
int PRaft::on_snapshot_load(braft::SnapshotReader* reader) { | ||
CHECK(!IsLeader()) << "Leader is not supposed to load snapshot"; | ||
assert(reader); | ||
|
||
if (is_node_first_start_up_) { | ||
// get replay point of one db's | ||
/* | ||
1. When a node starts normally, because all memory data is flushed to disks and | ||
snapshots are truncated to the latest, the flush-index and apply-index are the | ||
same when the node starts, so the maximum log index should be obtained. | ||
2. When a node is improperly shut down and restarted, the minimum flush-index should | ||
be obtained as the starting point for fault recovery. | ||
*/ | ||
// @todo GetSmallestFlushedLogIndex | ||
uint64_t replay_point = PSTORE.GetBackend(db_id_)->GetStorage()->GetSmallestFlushedLogIndex(); | ||
node_->set_self_playback_point(replay_point); | ||
is_node_first_start_up_ = false; | ||
INFO("set replay_point: {}", replay_point); | ||
|
||
/* | ||
If a node has just joined the cluster and does not have any data, | ||
it does not load the local snapshot at startup. Therefore, | ||
LoadDBFromCheckPoint is required after loading the snapshot from the leader. | ||
*/ | ||
if (GetLastLogIndex() != 0) { | ||
return 0; | ||
} | ||
} | ||
|
||
// 3. When a snapshot is installed on a node, you do not need to set a playback point. | ||
auto reader_path = reader->get_path(); // xx/snapshot_0000001 | ||
auto path = g_config.db_path.ToString() + std::to_string(db_id_); // db/db_id | ||
TasksVector tasks(1, {TaskType::kLoadDBFromCheckpoint, db_id_, {{TaskArg::kCheckpointPath, reader_path}}, true}); | ||
PSTORE.HandleTaskSpecificDB(tasks); | ||
INFO("load snapshot success!"); | ||
return 0; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,33 +11,44 @@ | |
#include "psnapshot.h" | ||
|
||
#include "braft/local_file_meta.pb.h" | ||
#include "braft/snapshot.h" | ||
#include "butil/files/file_path.h" | ||
|
||
#include "pstd/log.h" | ||
#include "pstd/pstd_string.h" | ||
|
||
#include "config.h" | ||
#include "praft.h" | ||
#include "store.h" | ||
|
||
namespace pikiwidb { | ||
|
||
extern PConfig g_config; | ||
|
||
braft::FileAdaptor* PPosixFileSystemAdaptor::open(const std::string& path, int oflag, | ||
const ::google::protobuf::Message* file_meta, butil::File::Error* e) { | ||
if ((oflag & IS_RDONLY) == 0) { // This is a read operation | ||
bool snapshots_exists = false; | ||
std::string snapshot_path; | ||
int db_id = -1; | ||
|
||
// parse snapshot path | ||
butil::FilePath parse_snapshot_path(path); | ||
std::vector<std::string> components; | ||
bool is_find_db = false; | ||
parse_snapshot_path.GetComponents(&components); | ||
for (auto component : components) { | ||
for (const auto& component : components) { | ||
snapshot_path += component + "/"; | ||
|
||
if (is_find_db && pstd::String2int(component, &db_id)) { | ||
is_find_db = false; | ||
} | ||
|
||
if (component.find("snapshot_") != std::string::npos) { | ||
break; | ||
} else if (component == "db") { | ||
is_find_db = true; | ||
} | ||
} | ||
|
||
// check whether snapshots have been created | ||
std::lock_guard<braft::raft_mutex_t> guard(mutex_); | ||
if (!snapshot_path.empty()) { | ||
|
@@ -55,6 +66,8 @@ braft::FileAdaptor* PPosixFileSystemAdaptor::open(const std::string& path, int o | |
|
||
// Snapshot generation | ||
if (!snapshots_exists) { | ||
assert(db_id >= 0); | ||
|
||
braft::LocalSnapshotMetaTable snapshot_meta_memtable; | ||
std::string meta_path = snapshot_path + "/" PRAFT_SNAPSHOT_META_FILE; | ||
INFO("start to generate snapshot in path {}", snapshot_path); | ||
|
@@ -66,6 +79,14 @@ braft::FileAdaptor* PPosixFileSystemAdaptor::open(const std::string& path, int o | |
PSTORE.HandleTaskSpecificDB(tasks); | ||
AddAllFiles(snapshot_path, &snapshot_meta_memtable, snapshot_path); | ||
|
||
// update snapshot last log index and last_log_term | ||
auto& new_meta = const_cast<braft::SnapshotMeta&>(snapshot_meta_memtable.meta()); | ||
auto last_log_index = 30000; // @todo PSTORE.GetBackend(db_id)->GetStorage()->GetSmallestFlushedLogIndex(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这行是不是忘换了 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 嗯嗯 忘记换了 |
||
new_meta.set_last_included_index(last_log_index); | ||
auto last_log_term = PRAFT.GetTerm(last_log_index); | ||
new_meta.set_last_included_term(last_log_term); | ||
INFO("Succeed to fix snapshot meta: {}, {}", last_log_index, last_log_term); | ||
|
||
auto rc = snapshot_meta_memtable.save_to_file(fs, meta_path); | ||
if (rc == 0) { | ||
INFO("Succeed to save snapshot in path {}", snapshot_path); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
这里既然设为1了,要不要把上边 databases 数量也调整成默认为1?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
可以