Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cephfs-data-scan: scrub tag filtering (#12133 and #12145) #5685

Merged
merged 3 commits into from Dec 2, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
69 changes: 69 additions & 0 deletions src/cls/cephfs/cls_cephfs.cc
Expand Up @@ -123,6 +123,72 @@ static int accumulate_inode_metadata(cls_method_context_t hctx,
return 0;
}

// I want to select objects that have a name ending 00000000
// and an xattr (scrub_tag) not equal to a specific value.
// This is so special case that we can't really pretend it's
// generic, so just fess up and call this the cephfs filter.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is fine, but we could also have a generic "name and xattr" filter. Require it to match the name in a (definable, or just search-based) pattern, and to either match or not match an xattr name and value.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When we're writing a plugin for the osd, I don't feel the need to make it customizable like this: it's already a special purpose call, and anyone who needs something more generic can write their own object class.

I'm going to take you at the "I think this is fine" part :-)

class PGLSCephFSFilter : public PGLSFilter {
protected:
std::string scrub_tag;
public:
int init(bufferlist::iterator& params) {
try {
InodeTagFilterArgs args;
args.decode(params);
scrub_tag = args.scrub_tag;
} catch (buffer::error &e) {
return -EINVAL;
}

if (scrub_tag.empty()) {
xattr = "";
} else {
xattr = "_scrub_tag";
}

return 0;
}

virtual ~PGLSCephFSFilter() {}
virtual bool reject_empty_xattr() { return false; }
virtual bool filter(const hobject_t &obj, bufferlist& xattr_data,
bufferlist& outdata);
};

bool PGLSCephFSFilter::filter(const hobject_t &obj,
bufferlist& xattr_data, bufferlist& outdata)
{
const std::string need_ending = ".00000000";
const std::string &obj_name = obj.oid.name;

if (obj_name.length() < need_ending.length()) {
return false;
}

const bool match = obj_name.compare (obj_name.length() - need_ending.length(), need_ending.length(), need_ending) == 0;
if (!match) {
return false;
}

if (!scrub_tag.empty() && xattr_data.length() > 0) {
std::string tag_ondisk;
bufferlist::iterator q = xattr_data.begin();
try {
::decode(tag_ondisk, q);
if (tag_ondisk == scrub_tag)
return false;
} catch (const buffer::error &err) {
}
}

return true;
}

PGLSFilter *inode_tag_filter()
{
return new PGLSCephFSFilter();
}

/**
* initialize class
*
Expand All @@ -139,5 +205,8 @@ void __cls_init()
cls_register_cxx_method(h_class, "accumulate_inode_metadata",
CLS_METHOD_WR | CLS_METHOD_RD,
accumulate_inode_metadata, &h_accumulate_inode_metadata);

// A PGLS filter
cls_register_cxx_filter(h_class, "inode_tag", inode_tag_filter);
}

20 changes: 20 additions & 0 deletions src/cls/cephfs/cls_cephfs.h
Expand Up @@ -108,6 +108,26 @@ class AccumulateArgs
}
};

class InodeTagFilterArgs
{
public:
std::string scrub_tag;

void encode(bufferlist &bl) const
{
ENCODE_START(1, 1, bl);
::encode(scrub_tag, bl);
ENCODE_FINISH(bl);
}

void decode(bufferlist::iterator &bl)
{
DECODE_START(1, bl);
::decode(scrub_tag, bl);
DECODE_FINISH(bl);
}
};

class AccumulateResult
{
public:
Expand Down
15 changes: 15 additions & 0 deletions src/cls/cephfs/cls_cephfs_client.cc
Expand Up @@ -144,3 +144,18 @@ int ClsCephFSClient::fetch_inode_accumulate_result(
return 0;
}

void ClsCephFSClient::build_tag_filter(
const std::string &scrub_tag,
bufferlist *out_bl)
{
assert(out_bl != NULL);

// Leading part of bl is un-versioned string naming the filter
::encode(std::string("cephfs.inode_tag"), *out_bl);

// Filter-specific part of the bl: in our case this is a versioned structure
InodeTagFilterArgs args;
args.scrub_tag = scrub_tag;
args.encode(*out_bl);
}

4 changes: 4 additions & 0 deletions src/cls/cephfs/cls_cephfs_client.h
Expand Up @@ -22,5 +22,9 @@ class ClsCephFSClient
inode_backtrace_t *backtrace,
ceph_file_layout *layout,
AccumulateResult *result);

static void build_tag_filter(
const std::string &scrub_tag,
bufferlist *out_bl);
};

73 changes: 66 additions & 7 deletions src/tools/cephfs/DataScan.cc
Expand Up @@ -52,14 +52,15 @@ bool DataScan::parse_kwarg(
}

const std::string arg(*i);
const std::string val(*(++i));
const std::string val(*(i + 1));

if (arg == std::string("--output-dir")) {
if (driver != NULL) {
derr << "Unexpected --output-dir: output already selected!" << dendl;
*r = -EINVAL;
return false;
}
dout(4) << "Using local file output to '" << val << "'" << dendl;
driver = new LocalFileDriver(val, data_io);
return true;
} else if (arg == std::string("-n")) {
Expand All @@ -80,6 +81,10 @@ bool DataScan::parse_kwarg(
return false;
}
return true;
} else if (arg == std::string("--filter-tag")) {
filter_tag = val;
dout(10) << "Applying tag filter: '" << filter_tag << "'" << dendl;
return true;
} else {
return false;
}
Expand Down Expand Up @@ -156,6 +161,7 @@ int DataScan::main(const std::vector<const char*> &args)
driver = new MetadataDriver();
driver->set_force_corrupt(force_corrupt);
driver->set_force_init(force_init);
dout(4) << "Using metadata pool output" << dendl;
}

dout(4) << "connecting to RADOS..." << dendl;
Expand Down Expand Up @@ -447,7 +453,28 @@ int DataScan::scan_inodes()
float progress = 0.0;
librados::NObjectIterator i = data_io.nobjects_begin(n, m);
#else
librados::NObjectIterator i = data_io.nobjects_begin();
librados::NObjectIterator i;
bool legacy_filtering = false;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like you've broken the SHARDED_PGLS build here? legacy_filtering is referenced regardless below, and I imagine it also needs to include the filtering bits. ;)

(Although I'm not sure how interested you are in maintaining the two branches for this short period of time...nor if preprocessor #ifdefs are the right way to handle that, now that I'm seeing them again...)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these #ifdefs aren't functional, just aide memoires for me when the post-infernalis sharded pgls stuff goes in.


bufferlist filter_bl;
ClsCephFSClient::build_tag_filter(filter_tag, &filter_bl);

// try/catch to deal with older OSDs that don't support
// the cephfs pgls filtering mode
try {
i = data_io.nobjects_begin(filter_bl);
dout(4) << "OSDs accepted cephfs object filtering" << dendl;
} catch (const std::runtime_error &e) {
// A little unfriendly, librados raises std::runtime_error
// on pretty much any unhandled I/O return value, such as
// the OSD saying -EINVAL because of our use of a filter
// mode that it doesn't know about.
std::cerr << "OSDs do not support cephfs object filtering: using "
"(slower) fallback mode" << std::endl;
legacy_filtering = true;
i = data_io.nobjects_begin();
}

#endif
librados::NObjectIterator i_end = data_io.nobjects_end();

Expand Down Expand Up @@ -484,10 +511,38 @@ int DataScan::scan_inodes()
continue;
}

// We are only interested in 0th objects during this phase: we touched
// the other objects during scan_extents
if (obj_name_offset != 0) {
continue;
if (legacy_filtering) {
dout(20) << "Applying filter to " << oid << dendl;

// We are only interested in 0th objects during this phase: we touched
// the other objects during scan_extents
if (obj_name_offset != 0) {
dout(20) << "Non-zeroth object" << dendl;
continue;
}

bufferlist scrub_tag_bl;
int r = data_io.getxattr(oid, "scrub_tag", scrub_tag_bl);
if (r >= 0) {
std::string read_tag;
bufferlist::iterator q = scrub_tag_bl.begin();
try {
::decode(read_tag, q);
if (read_tag == filter_tag) {
dout(20) << "skipping " << oid << " because it has the filter_tag"
<< dendl;
continue;
}
} catch (const buffer::error &err) {
}
dout(20) << "read non-matching tag '" << read_tag << "'" << dendl;
} else {
dout(20) << "no tag read (" << r << ")" << dendl;
}

} else {
assert(obj_name_offset == 0);
dout(20) << "OSD matched oid " << oid << dendl;
}

AccumulateResult accum_res;
Expand All @@ -496,7 +551,11 @@ int DataScan::scan_inodes()
int r = ClsCephFSClient::fetch_inode_accumulate_result(
data_io, oid, &backtrace, &loaded_layout, &accum_res);

if (r < 0) {
if (r == -EINVAL) {
dout(4) << "Accumulated metadata missing from '"
<< oid << ", did you run scan_extents?" << dendl;
continue;
} else if (r < 0) {
dout(4) << "Unexpected error loading accumulated metadata from '"
<< oid << "': " << cpp_strerror(r) << dendl;
// FIXME: this creates situation where if a client has a corrupt
Expand Down
2 changes: 2 additions & 0 deletions src/tools/cephfs/DataScan.h
Expand Up @@ -237,6 +237,8 @@ class DataScan : public MDSUtility
bool force_corrupt;
// Overwrite root objects even if they exist
bool force_init;
// Only scan inodes without this scrub tag
string filter_tag;

/**
* @param r set to error on valid key with invalid value
Expand Down