Skip to content

Commit

Permalink
implement removal of files after merging/deletion
Browse files Browse the repository at this point in the history
  • Loading branch information
hendrikmuhs committed Jan 20, 2018
1 parent 812c0c1 commit 52fb4d3
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 5 deletions.
2 changes: 1 addition & 1 deletion keyvi/include/keyvi/index/internal/index_writer_worker.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ class IndexWriterWorker final {
// delete old segment files
for (const segment_t& s : p.Segments()) {
TRACE("delete old file: %s", s->GetFilename().c_str());
std::remove(s->GetPath().string().c_str());
s->RemoveFiles();
}

p.SetMerged();
Expand Down
16 changes: 15 additions & 1 deletion keyvi/include/keyvi/index/internal/segment.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#ifndef KEYVI_INDEX_INTERNAL_SEGMENT_H_
#define KEYVI_INDEX_INTERNAL_SEGMENT_H_

#include <cstdio>
#include <memory>
#include <set>
#include <string>
Expand Down Expand Up @@ -110,12 +111,25 @@ class Segment final {
new_delete_ = true;
Persist();
deleted_keys_during_merge_.clear();
// todo: remove dkm file
// remove dkm file
std::string deleted_keys_file_merge{GetPath().string() + ".dkm"};
std::remove(deleted_keys_file_merge.c_str());
}
}

bool MarkedForMerge() const { return in_merge_; }

void RemoveFiles() {
// delete files, not all files might exist, therefore ignore the output
std::remove(GetPath().c_str());

std::string deleted_keys_file_merge{GetPath().string() + ".dkm"};
std::remove(deleted_keys_file_merge.c_str());

std::string deleted_keys_file{GetPath().string() + ".dk"};
std::remove(deleted_keys_file.c_str());
}

void DeleteKey(const std::string& key) {
if (!GetDictionary()->Contains(key)) {
return;
Expand Down
1 change: 1 addition & 0 deletions keyvi/include/keyvi/testing/temp_dictionary.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#ifndef KEYVI_TESTING_TEMP_DICTIONARY_H_
#define KEYVI_TESTING_TEMP_DICTIONARY_H_

#include <cstdio>
#include <string>
#include <utility>
#include <vector>
Expand Down
36 changes: 33 additions & 3 deletions keyvi/tests/keyvi/index/internal/segment_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
//

#include <algorithm>

#include <boost/filesystem.hpp>
#include <boost/test/unit_test.hpp>

#include <msgpack.hpp>
Expand Down Expand Up @@ -50,7 +52,8 @@ BOOST_AUTO_TEST_CASE(deletekey) {
{"abc", "{a:1}"}, {"abbc", "{b:2}"}, {"cde", "{c:2}"}, {"fgh", "{g:6}"}, {"tyc", "{o:2}"}};
testing::TempDictionary dictionary = testing::TempDictionary::makeTempDictionaryFromJson(&test_data);

segment_t segment(new Segment(dictionary.GetFileName()));
// do with lazy load
segment_t segment(new Segment(dictionary.GetFileName(), false));

// delete a key
segment->DeleteKey("abc");
Expand Down Expand Up @@ -99,6 +102,9 @@ BOOST_AUTO_TEST_CASE(deletekey) {
BOOST_CHECK_EQUAL("abc", deleted_keys[0]);
BOOST_CHECK_EQUAL("fgh", deleted_keys[1]);
BOOST_CHECK_EQUAL("tyc", deleted_keys[2]);

segment->RemoveFiles();
BOOST_CHECK(!boost::filesystem::exists(dictionary.GetFileName() + ".dk"));
}

BOOST_AUTO_TEST_CASE(deletekeyDuringMerge) {
Expand Down Expand Up @@ -139,11 +145,17 @@ BOOST_AUTO_TEST_CASE(deletekeyDuringMerge) {
// simulate a merge failure
segment->MergeFailed();

// the "dkm" file should be gone
BOOST_CHECK(!boost::filesystem::exists(dictionary.GetFileName() + ".dkm"));
LoadDeletedKeys(dictionary.GetFileName() + ".dk", &deleted_keys);

BOOST_CHECK_EQUAL(2, deleted_keys.size());
BOOST_CHECK_EQUAL("abc", deleted_keys[0]);
BOOST_CHECK_EQUAL("tyc", deleted_keys[1]);

segment->RemoveFiles();
BOOST_CHECK(!boost::filesystem::exists(dictionary.GetFileName() + ".dk"));
BOOST_CHECK(!boost::filesystem::exists(dictionary.GetFileName() + ".dkm"));
}

BOOST_AUTO_TEST_CASE(deletekeyMerging) {
Expand All @@ -161,7 +173,8 @@ BOOST_AUTO_TEST_CASE(deletekeyMerging) {
{"efg", "{g:1}"}, {"hij", "{b:2}"}, {"lmn", "{c:2}"}, {"q", "{w:6}"}};
testing::TempDictionary dictionary2 = testing::TempDictionary::makeTempDictionaryFromJson(&test_data_segment2);

segment_t segment2(new Segment(dictionary2.GetFileName()));
// mixin lazy load
segment_t segment2(new Segment(dictionary2.GetFileName(), false));

// simulate a merge operation
segment1->ElectedForMerge();
Expand All @@ -174,7 +187,7 @@ BOOST_AUTO_TEST_CASE(deletekeyMerging) {
std::vector<std::pair<std::string, std::string>> test_data_segment_merged = test_data_segment1;
test_data_segment_merged.insert(test_data_segment_merged.end(), test_data_segment2.begin(), test_data_segment2.end());

// erase "abc", has been deleted before merge and create the merged dictionary
// erase "abc", because it has been deleted before merge and create the merged dictionary
test_data_segment_merged.erase(test_data_segment_merged.begin());
testing::TempDictionary dictionary3 = testing::TempDictionary::makeTempDictionaryFromJson(&test_data_segment_merged);

Expand All @@ -187,6 +200,23 @@ BOOST_AUTO_TEST_CASE(deletekeyMerging) {
BOOST_CHECK_EQUAL(2, deleted_keys.size());
BOOST_CHECK_EQUAL("cde", deleted_keys[0]);
BOOST_CHECK_EQUAL("lmn", deleted_keys[1]);

segment1->RemoveFiles();
BOOST_CHECK(!boost::filesystem::exists(dictionary1.GetFileName() + ".dk"));
BOOST_CHECK(!boost::filesystem::exists(dictionary1.GetFileName() + ".dkm"));
BOOST_CHECK(!boost::filesystem::exists(dictionary1.GetFileName()));


segment2->RemoveFiles();
BOOST_CHECK(!boost::filesystem::exists(dictionary2.GetFileName() + ".dk"));
BOOST_CHECK(!boost::filesystem::exists(dictionary2.GetFileName() + ".dkm"));
BOOST_CHECK(!boost::filesystem::exists(dictionary2.GetFileName()));


segment_merged->RemoveFiles();
BOOST_CHECK(!boost::filesystem::exists(dictionary3.GetFileName() + ".dk"));
BOOST_CHECK(!boost::filesystem::exists(dictionary3.GetFileName() + ".dkm"));
BOOST_CHECK(!boost::filesystem::exists(dictionary3.GetFileName()));
}

BOOST_AUTO_TEST_SUITE_END()
Expand Down

0 comments on commit 52fb4d3

Please sign in to comment.